fxos/include/fxos/disassembly.h

211 lines
6.2 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/disassembly: Disassembler infrastructure
//
//
//
// TODO: Instead of defining every field for every argument of every
// disassembled instruction, set up a system of external annotations.
//---
#ifndef FXOS_DISASSEMBLY_H
#define FXOS_DISASSEMBLY_H
#include <fxos/lang.h>
#include <fxos/semantics.h>
#include <fxos/util/Buffer.h>
#include <fxos/util/Queue.h>
#include <set>
#include <map>
#include <queue>
namespace FxOS {
class VirtualSpace;
/* Register an instruction. This is called by loader functions from the asm
table lexer. [inst] must have its opcode field set. */
void register_instruction(AsmInstruction const &inst);
/* Lex and register an assembly instruction table. */
int load_instructions(Buffer const &file);
//---
// Dynamic information on instructions
//---
/* An argument for a disassembled instruction. */
struct Argument
{
Argument();
// Data set by the <pcrel> pass and abstract interpreter
/* Location in CPU or memory, if it makes sense and can be determined */
Location location;
/* Manipulated value. If no information can be obtained, this object
evaluates to false when converted to bool. */
RelConst value;
// Data set by the <syscall> pass
/* If the value is a syscall address, the syscall's id */
int syscall_id;
};
/* A loaded and annotated instruction. */
struct Instruction
{
/* Build from instruction, cannot be nullptr. */
Instruction(AsmInstruction const *inst);
/* Build from opcode, if instruction could not be decoded. */
Instruction(uint16_t opcode);
/* What instruction this is. Note that this does not determine all the
properties below. Placement and delay slots greatly alter them.
This pointer is nullptr if the instruction could not be decoded. */
AsmInstruction const *inst;
/* Argument information (contains data set by several passes) */
Argument args[2];
/* Opcode, valid only if inst==nullptr */
uint16_t opcode;
// Data set by the cfg pass
/* Whether this instruction is a leader. This is always set by another
instruction jumping into this one. */
bool leader;
/* Whether this instruction is in a delay slot. This is always set by
the preceding delayed instruction. */
bool delayslot;
/* Whether this instruction is:
-> Terminal, ie. has no successors and is the end of the function.
-> An unconditional jump of target [jmptarget]. This is the case for eg.
bt, but not bt.s; rather the successor of bt.s is the jump.
-> A conditional jump that can hit [jmptarget] and pc+2.
If delayslot==false, these attributes are set when analyzing this
instruction. If delayslot==true, they are set when the preceding
delayed instruction is analyzed. */
bool terminal;
bool jump;
bool condjump;
/* The jump target, used if jump==true or condjump==true. */
uint32_t jmptarget;
};
//---
// Dynamic information on functions
//---
struct Function
{
uint32_t address;
/* List of subfunctions called. TODO: Not yet populated by anyone */
std::vector<uint32_t> callTargets;
};
/* Disassembly interface that automatically loads code from a target */
struct Disassembly
{
Disassembly(VirtualSpace &space);
/* Check whether an instruction has been visited so far */
bool hasins(uint32_t pc);
/* Get the minimum and maximum loaded instruction addresses */
uint32_t minpc();
uint32_t maxpc();
/* Get the storage to any concrete instruction. The instruction will be
loaded and initialized if it had not been read before. */
Instruction &readins(uint32_t pc);
/* Find a function by address; returns nullptr if not yet defined */
Function *getFunctionAt(uint32_t pc);
/* For other access patterns */
std::map<uint32_t, Instruction> instructions;
/* List of functions being documented */
std::map<uint32_t, Function> functions;
/* Underlying target */
VirtualSpace &space;
};
//---
// Disassembler passes
//---
class DisassemblyPass
{
public:
DisassemblyPass(Disassembly &disasm);
/* Underlying disassembly */
Disassembly &m_disasm;
};
/* A disassembly pass that observes each function independently */
class FunctionPass: public DisassemblyPass
{
public:
FunctionPass(Disassembly &disasm);
/* Analyze the whole disassembly */
bool analyzeAllFunctions();
/* Analyze a function */
virtual bool analyzeFunction(Function &func) = 0;
bool analyzeFunction(uint32_t pc);
/* Analyze a function and its subfunctions, recursively */
bool analyzeFunctionRecursively(Function &func);
bool analyzeFunctionRecursively(uint32_t pc);
/* Enqueue subfunctions of function (this is done automatically by
analyzeFunctionRecursively()) */
void enqueueSubfunctions(Function &func);
/* Same, even for functions that were already seen */
void updateSubfunctions(Function &func);
private:
Queue<uint32_t> m_queue;
};
/* A disassembly pass that observes each instruction independently */
class InstructionPass: public DisassemblyPass
{
public:
InstructionPass(Disassembly &disasm);
/* Analyze the whole disassembly in increasing address order */
bool analyzeAllInstructions();
/* Analyze a single instruction */
virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0;
/* Analyze a function by following its CFG */
bool analyzeFunction(uint32_t pc);
/* Enqueue successors (analyzeFunction() does this automatically) */
void enqueueSuccessors(uint32_t pc, Instruction &ins);
/* Same, even for instructions that were already seen */
void updateSuccessors(uint32_t pc, Instruction &ins);
private:
Queue<uint32_t> m_queue;
};
} /* namespace FxOS */
#endif /* FXOS_DISASSEMBLY_H */