From 8812886e5852755ed26c5fd4e2686fa8426bdb15 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Mon, 16 Dec 2019 22:14:02 +0100 Subject: [PATCH] basic stuff on the disassembler/interpreter, nothing serious yet --- data/base-types.txt | 24 +++++++++ fxos/main.cpp | 13 ++--- include/fxos/disassembly.h | 25 ++++++++- include/fxos/errors.h | 2 +- include/fxos/memory.h | 10 ++-- include/fxos/operands.h | 103 ++++++++++++++++++------------------- include/fxos/util.h | 2 +- 7 files changed, 110 insertions(+), 69 deletions(-) create mode 100644 data/base-types.txt diff --git a/data/base-types.txt b/data/base-types.txt new file mode 100644 index 0000000..9fe73b5 --- /dev/null +++ b/data/base-types.txt @@ -0,0 +1,24 @@ +type: types +name: base +--- + +u32 StatusRegister { + u _ :1; + u MD :1; + u RB :1; + u BL :1; + u _ :20; + u IMASK :4; + u _ :3; + u T :1; +} + +struct MountTableEntry { + u32 _; + u32 _; + u32 _; + char[20] path2; + char[18] path1; + u8 mounted; + u8 _; +} diff --git a/fxos/main.cpp b/fxos/main.cpp index 90cac9d..46aa914 100644 --- a/fxos/main.cpp +++ b/fxos/main.cpp @@ -37,14 +37,17 @@ Disassembly options: -l Length of region --passes= Execute the specified comma-separated list of passes -The default list of passes is pcrel,cfg,cstprop,syscall,regs. The available -passes are the following: +Available passes: pcrel Resolve PC-relative references as their target address cfg Build the control flow graph (uses pcrel) cstprop Propagate constants by abstract interpretation (uses cfg) syscall Annotate code with reverse syscalls regs Annotate code with peripheral register addresses +The default sequence of passes is pcrel,cfg,cstprop,syscall,regs. When +disassembling a function (ie. no size specified on the command-line), the pcrel +and cfg passes are always executed to explore the function. + Analysis modes: -f, --full Run all analysis passes on (same as -sar) -s, --syscall Run syscall ID analysis @@ -106,13 +109,11 @@ int main_disassembly(int argc, char **argv) { std::cerr << "doing main_disasm, which is incomplete x_x\n"; - try - { + try { FxOS::load("data/sh3.txt"); FxOS::load("data/sh4.txt"); } - catch(FxOS::SyntaxError &e) - { + catch(FxOS::SyntaxError &e) { std::cerr << e.file() << ":" << e.line() << ": " << e.what() << "\n" << std::flush; return 1; diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index ca1770d..dab18be 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -6,17 +6,38 @@ #define LIBFXOS_DISASSEMBLY_H #include -#include +#include + +#include +#include namespace FxOS { /* Register an instruction. - @inst Instruction with [opcode] set to the binary pattern + @inst Instruction with [opcode] set to the binary pattern Typically this is called by loader functions from data tables describing instructions with parameters, not manually. See . */ void register_instruction(Instruction ins); +/* A loaded instruction with all relevant information, and more. */ +class LoadedInstruction +{ +private: + /* What instruction it is */ + Instruction &m_inst; + + /* Operands for arguments, if they have been determined */ + std::vector> args; + + /* Jump targets, used for jump instructions only. The first jmp is for + unconditional jumps; jmpt and jmpf are for conditional jumps. In + many situations the jump is forced on a general instruction by a + preceding branch due to the delay slot mechanism. */ + union { uint32_t jmp, jmpt; }; + uint32_t jmpf; +}; + } /* namespace FxOS */ #endif /* LIBFXOS_DISASSEMBLY_H */ diff --git a/include/fxos/errors.h b/include/fxos/errors.h index a6ce84f..8d35480 100644 --- a/include/fxos/errors.h +++ b/include/fxos/errors.h @@ -18,7 +18,7 @@ public: SyntaxError(char const *file, int line, char const *what): m_file(file), m_line(line), m_what(what) {} - /* Provides access to these free objets */ + /* Provides access to these free objects */ char const *file() const noexcept { return m_file; } diff --git a/include/fxos/memory.h b/include/fxos/memory.h index 33ba3d3..63496ae 100644 --- a/include/fxos/memory.h +++ b/include/fxos/memory.h @@ -32,13 +32,13 @@ public: uint32_t size() const noexcept; /* Conversion to MemoryAreaName for switch */ - constexpr operator MemoryAreaName() noexcept { return m_name; } + constexpr operator MemoryAreaName () noexcept { return m_name; } /* Comparison operators */ - constexpr bool operator==(MemoryArea a) const { + constexpr bool operator == (MemoryArea a) const { return m_name == a.m_name; } - constexpr bool operator!=(MemoryArea a) const { + constexpr bool operator != (MemoryArea a) const { return m_name != a.m_name; } @@ -71,8 +71,8 @@ struct MemoryRegion std::string name {}; /* Start address and end address. Generally the end address has one - additionnaly byte. This is okay since no region is supposed to - extend to the very end of the memory. */ + additional byte. This is okay since no region is supposed to extend + to the very end of the memory. */ uint32_t start, end; /* The region is writable under normal conditions */ diff --git a/include/fxos/operands.h b/include/fxos/operands.h index e2f576a..291a84d 100644 --- a/include/fxos/operands.h +++ b/include/fxos/operands.h @@ -1,66 +1,61 @@ #ifndef LIBFXOS_OPERANDS_H #define LIBFXOS_OPERANDS_H +#include + namespace FxOS { -/* Data types: - Registers longwords - Memory aligned u8, u16, u32 - User-defined structs, ... */ - -/* Status register does not contain too much useful stuff: - (30)MD (29)RB -> Disassembler needs not understand them - (9)M (8)Q -> Idem, divisions are very rare - (7-4)IMASK (28)BL -> Interrupt system rarely used, even less explicitly - (1)S -> ? - (0)T -> Now THAT is important - Disassembler should name them but needs not understand their role, except T. - The T bit might just be stored outside. */ - -enum class DataKind { - /* Base types */ - Integral, - /* Bit fields over integers */ - BitField, - /* Structures (can only reside in memory) */ - Struct, - /* Arrays (can only reside in memory) */ - Array, -}; +//--- +// Data type representation +// +// The abstract interpreter supports the following data types when analyzing +// data movement and access: +// Integers i8 u8 i16 u16 i32 u32 (regs, mem) +// Bit fields over ints T { } (mem) +//--- class DataType { public: - virtual DataKind kind() const noexcept = 0; -}; + enum DataKind { + /* Base types */ + Integral, + /* Bit fields over integers */ + BitField, + /* Structures (can only reside in memory) */ + Struct, + /* Arrays (can only reside in memory) */ + Array, + }; -class IntegralType: public DataType -{ -public: - IntegralType(int bitsize); - - DataKind kind() const noexcept override { - return DataKind::Integral; - } + /* Type kind */ + DataKind kind; + /* Type size in bytes, as would be returned by sizeof(). Must be 1, 2 + or 4 for integral types and bit fields. Might be 0 for arrays if the + size of the array is unknown */ + uint16_t size; + /* Type alignment, can only be 1, 2 or 4 */ + uint16_t align; private: - int m_size; -}; - -class BitFieldType: public DataType -{ + /* Type of fields in bit fields */ using Field = std::pair; -public: - BitFieldType(std::vector fields); - - DataKind kind() const noexcept override { - return DataKind::BitField; - } - -private: - int m_size; - std::vector m_fields; + union { + /* For integer types of size 1, whether to display as char + (might be extended to more attributes later) */ + bool ischar; + /* For bit field types */ + std::vector fields; + /* For struct types */ + std::vector attributes; + /* For array types, number of elements (0 if unknown or + variable-size NUL-terminated strings) */ + int elements; + }; }; enum class OperandKind { @@ -77,30 +72,30 @@ class Operand public: /* Returns the operand kind (which is also the subclass identity) */ virtual OperandKind type() const noexcept = 0; - /* Sring representation */ + /* String representation */ virtual std::string str() const noexcept = 0; }; class RegisterOperand: public Operand { public: - RegisterOperand(std::string name): m_name(name) {} + RegisterOperand(CpuRegister name): m_name(name) {} OperandKind type() const noexcept override { return OperandKind::CpuRegister; } - std::string name() const noexcept { + CpuRegister name() const noexcept { return m_name; } std::string str() const noexcept override { - return this->name(); + return m_name.str(); } private: /* Register name for assembler listings */ - std::string m_name; + CpuRegister m_name; }; } /* namespace FxOS */ diff --git a/include/fxos/util.h b/include/fxos/util.h index 53b67c6..acdd1c0 100644 --- a/include/fxos/util.h +++ b/include/fxos/util.h @@ -25,7 +25,7 @@ std::string format(std::string const &format, Args ... args) return std::string(buf.get(), buf.get() + size - 1); } -/* An object extracted from a targets, which has a virtual address */ +/* An object extracted from a target, which has a virtual address */ template struct Addressable {