fxos/include/fxos/disassembly.h

303 lines
8.8 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/disassembly: Disassembler infrastructure
//
//
//
// TODO: Instead of defining every field for every argument of every
// disassembled instruction, set up a system of external annotations.
//---
#ifndef FXOS_DISASSEMBLY_H
#define FXOS_DISASSEMBLY_H
#include <fxos/lang.h>
#include <fxos/semantics.h>
#include <fxos/util/Buffer.h>
#include <fxos/util/Queue.h>
#include <set>
#include <map>
#include <queue>
namespace FxOS {
class VirtualSpace;
/* Register an instruction. This is called by loader functions from the asm
table lexer. [inst] must have its opcode field set. */
void register_instruction(AsmInstruction const &inst);
/* Lex and register an assembly instruction table. */
int load_instructions(Buffer const &file);
//---
// Dynamic information on instructions
//---
/* An argument for a disassembled instruction. */
struct Argument
{
Argument();
// Data set by the <pcrel> pass and abstract interpreter
/* Location in CPU or memory, if it makes sense and can be determined */
Location location;
/* Manipulated value. If no information can be obtained, this object
evaluates to false when converted to bool. */
RelConst value;
// Data set by the <syscall> pass
/* If the value is a syscall address, the syscall's id */
int syscall_id;
};
/* A loaded and annotated instruction. */
struct Instruction
{
/* Build from instruction, cannot be nullptr. */
Instruction(AsmInstruction const *inst);
/* Build from opcode, if instruction could not be decoded. */
Instruction(uint16_t opcode);
/* What instruction this is. Note that this does not determine all the
properties below. Placement and delay slots greatly alter them.
This pointer is nullptr if the instruction could not be decoded. */
AsmInstruction const *inst;
/* Argument information (contains data set by several passes) */
Argument args[2];
/* Opcode, valid only if inst==nullptr */
uint16_t opcode;
// Data set by the cfg pass
/* Whether this instruction is a leader. This is always set by another
instruction jumping into this one. */
bool leader;
/* Whether this instruction is in a delay slot. This is always set by
the preceding delayed instruction. */
bool delayslot;
/* Whether this instruction is:
-> Terminal, ie. has no successors and is the end of the function.
-> An unconditional jump of target [jmptarget]. This is the case for eg.
bt, but not bt.s; rather the successor of bt.s is the jump.
-> A conditional jump that can hit [jmptarget] and pc+2.
If delayslot==false, these attributes are set when analyzing this
instruction. If delayslot==true, they are set when the preceding
delayed instruction is analyzed. */
bool terminal;
bool jump;
bool condjump;
/* The jump target, used if jump==true or condjump==true. */
uint32_t jmptarget;
};
//---
// Dynamic information on functions
//---
struct Function
{
/* Create a bare function with no detailed information */
Function(uint32_t pc);
/* Function's entry point */
uint32_t address;
/* List of subfunctions called. TODO: Not yet populated by anyone */
std::vector<uint32_t> callTargets;
};
//---
// Dynamic claims
//
// Claims are small bits of information associated with sections of the virtual
// space, indicating what the data is used for. Typically it's either owned by
// a function, auxiliary function data, or some actual storage.
//---
struct Claim
{
enum {
Function = 1,
FunctionAuxiliary = 2,
Data = 3,
Zero = 4,
Special = 5,
};
/* Start address within the virtual space */
uint32_t address;
/* Size (bytes) */
uint16_t size;
/* Type */
int16_t type;
/* Function address, when relevant (eg. function) */
uint32_t owner;
/* Utility to check for intersections */
bool intersects(Claim const &other) const;
/* Check equality of claims (raw equality) */
bool operator==(Claim const &other) const;
/* String representation */
std::string str() const;
};
constexpr bool operator<(Claim const &c1, Claim const &c2)
{
return c1.address < c2.address;
}
//---
// Storage for disassembled data
//---
/* Disassembly interface that automatically loads code from a target */
struct Disassembly
{
Disassembly(VirtualSpace &vspace);
VirtualSpace &vspace;
// Instruction information
std::map<uint32_t, Instruction> instructions;
/* Check whether an instruction is loaded at PC */
bool hasInstructionAt(uint32_t pc);
/* Find an instruction by address. If the instruction is not loaded,
returns nullptr, unless [allowDiscovery] is set, in which case it's
loaded normally. */
Instruction *getInstructionAt(uint32_t pc, bool allowDiscovery = false);
// Function information
std::map<uint32_t, Function> functions;
/* Check whether a function is defined at PC */
bool hasFunctionAt(uint32_t pc);
/* Find a function by address; returns nullptr if not yet defined */
Function *getFunctionAt(uint32_t pc);
/* Find a function and create it empty if it's not yet defined */
Function *getOrCreateFunctionAt(uint32_t pc);
// Claim information
std::set<Claim> claims;
/* Access the claim that owns the address, if there is one */
Claim const *getClaimAt(uint32_t address);
/* Find the first claim that overlaps this region, if any */
Claim const *findClaimConflict(uint32_t address, int size);
/* Find all (or up to max ≥ 0) claims that overlaps this region */
std::vector<Claim const *> findClaimConflicts(
uint32_t address, int size, int max = -1);
/* Add a new exclusive claim. If there is any intersection with previous
claims which do not compare equal to c, this fails. */
bool addExclusiveClaim(Claim const &c);
/* Get all claims owned by a certain address. */
std::vector<Claim const *> findClaimsOwnedBy(uint32_t address);
// TODO: Add non-exclusive claims/handle collisions
// TODO: We don't want to deal with instructions loaded without a minimum
// amount of analysis; can we tie instruction loading to some analysis?
};
//---
// Disassembler passes
//---
class DisassemblyPass
{
public:
DisassemblyPass(Disassembly &disasm);
/* Underlying disassembly */
Disassembly &m_disasm;
};
/* A disassembly pass that observes each function independently */
class FunctionPass: public DisassemblyPass
{
public:
FunctionPass(Disassembly &disasm);
/* Analyze the whole disassembly */
bool analyzeAllFunctions();
/* Analyze a function */
virtual bool analyzeFunction(Function &func) = 0;
bool analyzeFunction(uint32_t pc);
/* Analyze a function and its subfunctions, recursively */
bool analyzeFunctionRecursively(Function &func);
bool analyzeFunctionRecursively(uint32_t pc);
/* For custom analysis function: enqueue subfunctions. The update variant
enqueues them even if they were already seen. */
void enqueueSubfunctions(Function &func);
void updateSubfunctions(Function &func);
private:
Queue<uint32_t> m_queue;
};
/* A disassembly pass that observes each instruction independently */
class InstructionPass: public FunctionPass
{
public:
InstructionPass(Disassembly &disasm);
/* If set, this pass loads instructions from the disassembly automatically.
This is useful for passes that explore new functions. By default,
queueing new instructions is not allowed to avoid bugs. */
void setAllowDiscovery(bool allowDiscovery);
/* Analyze the whole disassembly in increasing address order */
bool analyzeAllInstructions();
/* Analyze a single instruction */
virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0;
/* Analyze a function by following its CFG */
using FunctionPass::analyzeFunction;
bool analyzeFunction(Function &func) override;
/* Analyze an anonymous function; just assume one starts at PC */
bool analyzeAnonymousFunction(uint32_t pc);
/* For custom analysis functions: enqueue successors. The update variant
enqueues them even if they were already seen. */
void enqueueSuccessors(uint32_t pc, Instruction &ins);
void updateSuccessors(uint32_t pc, Instruction &ins);
private:
Queue<uint32_t> m_queue;
bool m_allowDiscovery;
};
} /* namespace FxOS */
#endif /* FXOS_DISASSEMBLY_H */