188 lines
5.1 KiB
C++
188 lines
5.1 KiB
C++
//---
|
|
// fxos.disassembly: Disassembler
|
|
//---
|
|
|
|
#ifndef LIBFXOS_DISASSEMBLY_H
|
|
#define LIBFXOS_DISASSEMBLY_H
|
|
|
|
#include <fxos/lang.h>
|
|
#include <fxos/target.h>
|
|
#include <fxos/semantics.h>
|
|
|
|
#include <set>
|
|
#include <map>
|
|
#include <queue>
|
|
#include <vector>
|
|
#include <optional>
|
|
|
|
namespace FxOS {
|
|
|
|
/* Register an instruction.
|
|
@inst Instruction with [opcode] set to the binary pattern
|
|
|
|
Typically this is called by loader functions from data tables describing
|
|
instructions with parameters, not manually. See <fxos/load.h>. */
|
|
void register_instruction(Instruction ins);
|
|
|
|
/* An argument for a concrete instruction. */
|
|
struct ConcreteInstructionArg
|
|
{
|
|
ConcreteInstructionArg();
|
|
|
|
//---
|
|
// Data set by the <pcrel> pass and abstract interpreter
|
|
//---
|
|
|
|
/* Location in CPU or memory, if that can be determined */
|
|
Location location;
|
|
/* Pointed value. If the exact value can't be determined, this object
|
|
evaluates to false. Sometimes, the type can be determined anyway,
|
|
and in this case its [type] attribute below is not null even though
|
|
the object evaluates to false. */
|
|
DataValue value;
|
|
|
|
//---
|
|
// Data set by the <syscall> pass
|
|
//---
|
|
|
|
/* If the value is a syscall address, the syscall's id */
|
|
int syscall_id;
|
|
};
|
|
|
|
/* A loaded and annotated instruction. */
|
|
struct ConcreteInstruction
|
|
{
|
|
/* Build from instruction, cannot be nullptr. */
|
|
ConcreteInstruction(Instruction const *inst);
|
|
/* Build from opcode, if instruction could not be decoded. */
|
|
ConcreteInstruction(uint16_t opcode);
|
|
|
|
/* What instruction this is. Note that this does not determine all the
|
|
properties below. Placement and delay slots greatly alter them.
|
|
This pointer is nullptr if the instruction could not be decoded. */
|
|
Instruction const *inst;
|
|
|
|
/* Argument information (contains data set by several passes) */
|
|
ConcreteInstructionArg args[2];
|
|
|
|
/* Opcode, valid only if inst==nullptr */
|
|
uint16_t opcode;
|
|
|
|
//---
|
|
// Data set by the cfg pass
|
|
//---
|
|
|
|
/* Whether this instruction is a leader. This is always set by another
|
|
instruction jumping into this one. */
|
|
bool leader;
|
|
/* Whether this instruction is in a delay slot. This is always set by
|
|
the preceding delayed instruction. */
|
|
bool delayslot;
|
|
|
|
/* Whether this instruction is:
|
|
-> Terminal, ie. has no successors and is the end of the function.
|
|
-> An unconditional jump of target [jmptarget].
|
|
-> A conditional jump that can hit [jmptarget] and pc+2.
|
|
If delayslot==false, these attributes are set when analyzing this
|
|
instruction. If delayslot==true, they are set when the preceding
|
|
delayed instruction is analyzed. */
|
|
bool terminal;
|
|
bool jump;
|
|
bool condjump;
|
|
|
|
/* The jump target, used if jump==true or condjump==true. */
|
|
uint32_t jmptarget;
|
|
};
|
|
|
|
/* Disassembly interface that automatically loads code from a target */
|
|
class Disassembly
|
|
{
|
|
public:
|
|
Disassembly(Target &target);
|
|
|
|
/* Check whether an instruction has been visited so far */
|
|
bool hasins(uint32_t pc);
|
|
/* Get the minimum and maximum loaded instruction addresses */
|
|
uint32_t minpc();
|
|
uint32_t maxpc();
|
|
|
|
/* Get the storage to any concrete instruction. The instruction will be
|
|
loaded and initialized if it had not been read before. */
|
|
ConcreteInstruction &readins(uint32_t pc);
|
|
|
|
/* For other access patterns (careful with write accesses!) */
|
|
std::map<uint32_t, ConcreteInstruction> &instructions() noexcept {
|
|
return m_instructions;
|
|
}
|
|
|
|
/* Access to memory */
|
|
Target &target() noexcept {
|
|
return m_target;
|
|
}
|
|
|
|
/* List of passes that have run so far */
|
|
std::set<std::string> passes;
|
|
|
|
private:
|
|
/* Underlying target */
|
|
Target &m_target;
|
|
/* Loaded instructions by address */
|
|
std::map<uint32_t, ConcreteInstruction> m_instructions;
|
|
};
|
|
|
|
//---
|
|
// Disassembler passes
|
|
//---
|
|
|
|
class DisassemblyPass
|
|
{
|
|
public:
|
|
DisassemblyPass(Disassembly &disasm, std::string name="");
|
|
|
|
/* Analyze a single instruction, probably updating the annotations and
|
|
the state of the pass itself. */
|
|
virtual void analyze(uint32_t pc, ConcreteInstruction &inst) = 0;
|
|
|
|
/* Run the pass from the given entry point */
|
|
virtual void run(uint32_t entry_pc);
|
|
|
|
protected:
|
|
/* Add an instruction to the queue to analyze next */
|
|
void enqueue(uint32_t pc);
|
|
/* Add the next loaded instruction in address space */
|
|
void enqueue_next(uint32_t pc);
|
|
/* Enqueue the unseen successors of this instruction */
|
|
void enqueue_unseen_successors(uint32_t pc, ConcreteInstruction &inst);
|
|
/* Enqueue all the success of this instruction */
|
|
void enqueue_all_successors(uint32_t pc, ConcreteInstruction &inst);
|
|
|
|
/* Underlying disassembly */
|
|
Disassembly &m_disasm;
|
|
|
|
private:
|
|
/* Blocks to visit next, ordered for uniqueness */
|
|
std::set<uint32_t> m_next;
|
|
std::priority_queue<uint32_t> m_queue;
|
|
|
|
/* Visited blocks */
|
|
std::set<uint32_t> m_seen;
|
|
|
|
/* Name of pass */
|
|
std::string m_name;
|
|
};
|
|
|
|
/* A disassembly pass that observes each instruction independently */
|
|
class InstructionDisassemblyPass: public DisassemblyPass
|
|
{
|
|
public:
|
|
InstructionDisassemblyPass(Disassembly &disasm, std::string name="");
|
|
|
|
/* Runs the pass from the first instruction currently loaded, all the
|
|
way down to the bottom, as if always using enqueue_next(). */
|
|
virtual void run();
|
|
};
|
|
|
|
} /* namespace FxOS */
|
|
|
|
#endif /* LIBFXOS_DISASSEMBLY_H */
|