diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 84171c8..d94c6ad 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -6,10 +6,12 @@ #define LIBFXOS_DISASSEMBLY_H #include +#include #include #include #include +#include namespace FxOS { @@ -20,15 +22,46 @@ namespace FxOS { instructions with parameters, not manually. See . */ void register_instruction(Instruction ins); -/* A loaded instruction with all relevant information, and more. */ -class LoadedInstruction +/* An argument for a concrete instruction. */ +struct ConcreteInstructionArg { -private: - /* What instruction it is */ - Instruction &m_inst; + ConcreteInstructionArg(); - /* Operands for arguments, if they have been determined */ -// std::vector> args; + //--- + // Data set by the abstract interpretation passes + //--- + + /* Location in CPU or memory, if that can be determined */ + std::optional loc; + /* Alternatively, data type, which can sometimes be determined uniquely + even if the location is not constant */ + std::optional type; + + //--- + // Data set by the syscall and regs passes + //--- + + /* If the value is a syscall address, the syscall's id */ + int syscall_id; + /* If the value is a peripheral register, its address */ + uint32_t reg_address; +}; + +/* A loaded and annotated instruction. */ +struct ConcreteInstruction +{ + ConcreteInstruction(Instruction &inst); + ConcreteInstruction(ConcreteInstruction const &other) = default; + + /* What instruction it is */ + Instruction &inst; + + /* Argument information (contains data set by several passes) */ + ConcreteInstructionArg args[2]; + + //--- + // Data set by the pcrel pass + //--- /* Jump targets, used for jump instructions only. The first jmp is for unconditional jumps; jmpt and jmpf are for conditional jumps. In @@ -36,6 +69,34 @@ private: preceding branch due to the delay slot mechanism. */ union { uint32_t jmp, jmpt; }; uint32_t jmpf; + + //--- + // Data set by the cfg pass + //--- + + /* Whether this instruction is a basic block leader */ + bool leader; +}; + +/* Short aliases */ +using CI = ConcreteInstruction; +using CIArg = ConcreteInstructionArg; + +/* Disassembly interface that automatically loads code from a target */ +class Disassembly +{ +public: + Disassembly(Target &target); + + /* Get the storage to any concrete instruction. The instruction will be + loaded and initialized if it had not been read before. */ + ConcreteInstruction &readins(uint32_t pc); + +private: + /* Underlying target */ + Target &m_target; + /* Loaded instructions by address */ + std::map m_instructions; }; } /* namespace FxOS */ diff --git a/include/fxos/semantics.h b/include/fxos/semantics.h index fe58ef7..855cece 100644 --- a/include/fxos/semantics.h +++ b/include/fxos/semantics.h @@ -24,6 +24,13 @@ namespace FxOS { class DataType { public: + /* Copy constructor */ + DataType(DataType const &other); + DataType & operator = (DataType other); + + /* Destructor that takes into account the non-trivial union */ + ~DataType(); + enum DataKind { /* Base types */ Integral, @@ -44,7 +51,6 @@ public: /* Type alignment, can only be 1, 2 or 4 */ uint16_t align; -private: /* Type of fields in bit fields */ using Field = std::pair; @@ -52,14 +58,18 @@ private: /* For integer types of size 1, whether to display as char (might be extended to more attributes later) */ bool ischar; - /* For bit field types */ - std::vector fields; - /* For struct types */ - std::vector attributes; /* For array types, number of elements (0 if unknown or variable-size NUL-terminated strings) */ int elements; }; + + /* The following members are not in the union because they have non- + trivial destructors/copy and I don't want to care. */ + + /* For bit field types */ + std::vector fields; + /* For struct types */ + std::vector attributes; }; //--- diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index c411ac2..4a7f997 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -21,4 +21,53 @@ void register_instruction(Instruction ins) insmap[opcode] = ins; } +//--- +// Concrete (instanciated) arguments and instructions +//--- + +ConcreteInstructionArg::ConcreteInstructionArg(): + loc {}, type {}, syscall_id {-1} +{ + reg_address = -1; +} + +ConcreteInstruction::ConcreteInstruction(Instruction &inst): + inst {inst}, jmpt {}, jmpf {}, leader {false} +{ +} + +//--- +// Disassembler interface +//--- + +Disassembly::Disassembly(Target &target): + m_target {target}, m_instructions {} +{ +} + +ConcreteInstruction &Disassembly::readins(uint32_t pc) +{ + if(pc & 1) throw std::runtime_error("Disassembly::readins at odd PC"); + + try + { + return m_instructions.at(pc); + } + catch(std::out_of_range &e) + { + uint16_t opcode = m_target.read_u16(pc); + if(!insmap[opcode]) + { + throw std::runtime_error("No instruction for opcode"); + } + + Instruction &inst = *insmap[opcode]; + + ConcreteInstruction ci(inst); +// std::pair p(pc, ci); +// m_instructions.emplace(std::make_pair(pc, ci)); + return m_instructions.at(pc); + } +} + } /* namespace FxOS */ diff --git a/lib/semantics.cpp b/lib/semantics.cpp new file mode 100644 index 0000000..9579456 --- /dev/null +++ b/lib/semantics.cpp @@ -0,0 +1,5 @@ +#include + +namespace FxOS { + +} /* namespace FxOS */