fxos/include/fxos/disassembly.h

188 lines
5.1 KiB
C++

//---
// fxos.disassembly: Disassembler
//---
#ifndef LIBFXOS_DISASSEMBLY_H
#define LIBFXOS_DISASSEMBLY_H
#include <fxos/lang.h>
#include <fxos/target.h>
#include <fxos/semantics.h>
#include <set>
#include <map>
#include <queue>
#include <vector>
#include <optional>
namespace FxOS {
/* Register an instruction.
@inst Instruction with [opcode] set to the binary pattern
Typically this is called by loader functions from data tables describing
instructions with parameters, not manually. See <fxos/load.h>. */
void register_instruction(Instruction ins);
/* An argument for a concrete instruction. */
struct ConcreteInstructionArg
{
ConcreteInstructionArg();
//---
// Data set by the <pcrel> pass and abstract interpreter
//---
/* Location in CPU or memory, if that can be determined */
Location location;
/* Pointed value. If the exact value can't be determined, this object
evaluates to false. Sometimes, the type can be determined anyway,
and in this case its [type] attribute below is not null even though
the object evaluates to false. */
DataValue value;
//---
// Data set by the <syscall> pass
//---
/* If the value is a syscall address, the syscall's id */
int syscall_id;
};
/* A loaded and annotated instruction. */
struct ConcreteInstruction
{
/* Build from instruction, cannot be nullptr. */
ConcreteInstruction(Instruction const *inst);
/* Build from opcode, if instruction could not be decoded. */
ConcreteInstruction(uint16_t opcode);
/* What instruction this is. Note that this does not determine all the
properties below. Placement and delay slots greatly alter them.
This pointer is nullptr if the instruction could not be decoded. */
Instruction const *inst;
/* Argument information (contains data set by several passes) */
ConcreteInstructionArg args[2];
/* Opcode, valid only if inst==nullptr */
uint16_t opcode;
//---
// Data set by the cfg pass
//---
/* Whether this instruction is a leader. This is always set by another
instruction jumping into this one. */
bool leader;
/* Whether this instruction is in a delay slot. This is always set by
the preceding delayed instruction. */
bool delayslot;
/* Whether this instruction is:
-> Terminal, ie. has no successors and is the end of the function.
-> An unconditional jump of target [jmptarget].
-> A conditional jump that can hit [jmptarget] and pc+2.
If delayslot==false, these attributes are set when analyzing this
instruction. If delayslot==true, they are set when the preceding
delayed instruction is analyzed. */
bool terminal;
bool jump;
bool condjump;
/* The jump target, used if jump==true or condjump==true. */
uint32_t jmptarget;
};
/* Disassembly interface that automatically loads code from a target */
class Disassembly
{
public:
Disassembly(Target &target);
/* Check whether an instruction has been visited so far */
bool hasins(uint32_t pc);
/* Get the minimum and maximum loaded instruction addresses */
uint32_t minpc();
uint32_t maxpc();
/* Get the storage to any concrete instruction. The instruction will be
loaded and initialized if it had not been read before. */
ConcreteInstruction &readins(uint32_t pc);
/* For other access patterns (careful with write accesses!) */
std::map<uint32_t, ConcreteInstruction> &instructions() noexcept {
return m_instructions;
}
/* Access to memory */
Target &target() noexcept {
return m_target;
}
/* List of passes that have run so far */
std::set<std::string> passes;
private:
/* Underlying target */
Target &m_target;
/* Loaded instructions by address */
std::map<uint32_t, ConcreteInstruction> m_instructions;
};
//---
// Disassembler passes
//---
class DisassemblyPass
{
public:
DisassemblyPass(Disassembly &disasm, std::string name="");
/* Analyze a single instruction, probably updating the annotations and
the state of the pass itself. */
virtual void analyze(uint32_t pc, ConcreteInstruction &inst) = 0;
/* Run the pass from the given entry point */
virtual void run(uint32_t entry_pc);
protected:
/* Add an instruction to the queue to analyze next */
void enqueue(uint32_t pc);
/* Add the next loaded instruction in address space */
void enqueue_next(uint32_t pc);
/* Enqueue the unseen successors of this instruction */
void enqueue_unseen_successors(uint32_t pc, ConcreteInstruction &inst);
/* Enqueue all the success of this instruction */
void enqueue_all_successors(uint32_t pc, ConcreteInstruction &inst);
/* Underlying disassembly */
Disassembly &m_disasm;
private:
/* Blocks to visit next, ordered for uniqueness */
std::set<uint32_t> m_next;
std::priority_queue<uint32_t> m_queue;
/* Visited blocks */
std::set<uint32_t> m_seen;
/* Name of pass */
std::string m_name;
};
/* A disassembly pass that observes each instruction independently */
class InstructionDisassemblyPass: public DisassemblyPass
{
public:
InstructionDisassemblyPass(Disassembly &disasm, std::string name="");
/* Runs the pass from the first instruction currently loaded, all the
way down to the bottom, as if always using enqueue_next(). */
virtual void run();
};
} /* namespace FxOS */
#endif /* LIBFXOS_DISASSEMBLY_H */