fxos/include/fxos/disassembly.h

176 lines
5.5 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/disassembly: Disassembler infrastructure
//
//
//
// TODO: Instead of defining every field for every argument of every
// disassembled instruction, set up a system of external annotations.
//---
#ifndef FXOS_DISASSEMBLY_H
#define FXOS_DISASSEMBLY_H
#include <fxos/lang.h>
#include <fxos/vspace.h>
#include <fxos/semantics.h>
#include <fxos/util/Buffer.h>
#include <set>
#include <map>
#include <queue>
namespace FxOS {
/* Register an instruction. This is called by loader functions from the asm
table lexer. [inst] must have its opcode field set. */
void register_instruction(AsmInstruction const &inst);
/* Lex and register an assembly instruction table. */
int load_instructions(Buffer const &file);
/* An argument for a disassembled instruction. */
struct Argument
{
Argument();
// Data set by the <pcrel> pass and abstract interpreter
/* Location in CPU or memory, if it makes sense and can be determined */
Location location;
/* Manipulated value. If no information can be obtained, this object
evaluates to false when converted to bool. */
RelConst value;
// Data set by the <syscall> pass
/* If the value is a syscall address, the syscall's id */
int syscall_id;
};
/* A loaded and annotated instruction. */
struct Instruction
{
/* Build from instruction, cannot be nullptr. */
Instruction(AsmInstruction const *inst);
/* Build from opcode, if instruction could not be decoded. */
Instruction(uint16_t opcode);
/* What instruction this is. Note that this does not determine all the
properties below. Placement and delay slots greatly alter them.
This pointer is nullptr if the instruction could not be decoded. */
AsmInstruction const *inst;
/* Argument information (contains data set by several passes) */
Argument args[2];
/* Opcode, valid only if inst==nullptr */
uint16_t opcode;
// Data set by the cfg pass
/* Whether this instruction is a leader. This is always set by another
instruction jumping into this one. */
bool leader;
/* Whether this instruction is in a delay slot. This is always set by
the preceding delayed instruction. */
bool delayslot;
/* Whether this instruction is:
-> Terminal, ie. has no successors and is the end of the function.
-> An unconditional jump of target [jmptarget]. This is the case for eg.
bt, but not bt.s; rather the successor of bt.s is the jump.
-> A conditional jump that can hit [jmptarget] and pc+2.
If delayslot==false, these attributes are set when analyzing this
instruction. If delayslot==true, they are set when the preceding
delayed instruction is analyzed. */
bool terminal;
bool jump;
bool condjump;
/* The jump target, used if jump==true or condjump==true. */
uint32_t jmptarget;
};
/* Disassembly interface that automatically loads code from a target */
struct Disassembly
{
Disassembly(VirtualSpace &space);
/* Check whether an instruction has been visited so far */
bool hasins(uint32_t pc);
/* Get the minimum and maximum loaded instruction addresses */
uint32_t minpc();
uint32_t maxpc();
/* Get the storage to any concrete instruction. The instruction will be
loaded and initialized if it had not been read before. */
Instruction &readins(uint32_t pc);
/* For other access patterns */
std::map<uint32_t, Instruction> instructions;
/* Underlying target */
VirtualSpace &space;
};
//---
// Disassembler passes
//---
class DisassemblyPass
{
public:
DisassemblyPass(Disassembly &disasm, std::string name="");
/* Analyze a single instruction, probably updating the annotations and
the state of the pass itself. */
virtual bool analyze(uint32_t pc, Instruction &inst) = 0;
/* Run the pass from the given entry point */
virtual bool run(uint32_t entry_pc);
protected:
/* Add an instruction to the queue to analyze next */
void enqueue(uint32_t pc);
/* Add the next loaded instruction in address space */
void enqueue_next(uint32_t pc);
/* Enqueue the unseen successors of this instruction */
void enqueue_unseen_successors(uint32_t pc, Instruction &inst);
/* Enqueue all the success of this instruction */
void enqueue_all_successors(uint32_t pc, Instruction &inst);
/* Underlying disassembly */
Disassembly &m_disasm;
private:
/* Blocks to visit next, ordered for uniqueness */
std::set<uint32_t> m_next;
std::priority_queue<uint32_t> m_queue;
/* Visited blocks */
std::set<uint32_t> m_seen;
/* Name of pass */
std::string m_name;
};
/* A disassembly pass that observes each instruction independently */
class InstructionDisassemblyPass: public DisassemblyPass
{
public:
InstructionDisassemblyPass(Disassembly &disasm, std::string name="");
/* Runs the pass from the first instruction currently loaded, all the
way down to the bottom, as if always using enqueue_next(). */
virtual bool run();
};
} /* namespace FxOS */
#endif /* FXOS_DISASSEMBLY_H */