From da69725697208e5de3955b7f6ccea1f9edc6be7c Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sun, 27 Mar 2022 16:10:13 +0100 Subject: [PATCH] more refactoring and minor style updates --- CMakeLists.txt | 2 +- include/fxos/ai/AbstractDomain.h | 76 ++++++++ include/fxos/ai/RelConst.h | 126 ++++++++++++ include/fxos/domains.h | 159 --------------- include/fxos/lang.h | 217 +++++++++++---------- include/fxos/memory.h | 159 +++++++-------- include/fxos/semantics.h | 2 +- lib/ai/RelConst.cpp | 309 +++++++++++++++++++++++++++++ lib/domains/relconst.cpp | 282 --------------------------- lib/lang.cpp | 320 +++++++++++++++---------------- lib/memory.cpp | 174 ++++++++--------- lib/passes/pcrel.cpp | 4 +- 12 files changed, 947 insertions(+), 883 deletions(-) create mode 100644 include/fxos/ai/AbstractDomain.h create mode 100644 include/fxos/ai/RelConst.h delete mode 100644 include/fxos/domains.h create mode 100644 lib/ai/RelConst.cpp delete mode 100644 lib/domains/relconst.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 06b3a17..3001af6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,6 @@ flex_target(LoadAsm lib/load-asm.l set(fxos_core_SOURCES lib/disassembly.cpp - lib/domains/relconst.cpp lib/lang.cpp lib/memory.cpp lib/os.cpp @@ -27,6 +26,7 @@ set(fxos_core_SOURCES lib/symbols.cpp lib/vspace.cpp + lib/ai/RelConst.cpp lib/util/Buffer.cpp lib/util/log.cpp lib/util/Timer.cpp) diff --git a/include/fxos/ai/AbstractDomain.h b/include/fxos/ai/AbstractDomain.h new file mode 100644 index 0000000..2a99829 --- /dev/null +++ b/include/fxos/ai/AbstractDomain.h @@ -0,0 +1,76 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/ai/AbstractDomain: Lattice-based domains for abstract interpretation +// +// No, this whole folder has nothing to do with Artifical Intelligence. This is +// Abstract Interpretation. You may have been bamboozled. +// +// This header defines the AbstractDomain typeclass which specifies the +// interface for lattices for abstract-interpretation-based passes. This is +// generic, but there are no plans for any other domain than RelConst. +// +// The interface assumes 32-bit values and assembler-like semantics. +//--- + +#ifndef FXOS_AI_ABSTRACTDOMAIN_H +#define FXOS_AI_ABSTRACTDOMAIN_H + +#include + +namespace FxOS { + +/* An abstract domain over a lattice T modeling CPU operands. */ +template +class AbstractDomain +{ +public: + /* Bottom and Top constants */ + virtual T bottom() const noexcept = 0; + virtual T top() const noexcept = 0; + + /* Construct abstract value from integer constant */ + virtual T constant(uint32_t value) const noexcept = 0; + /* Check if value is constant */ + virtual bool is_constant(T) const noexcept = 0; + /* Unpack a constant. May return anything if is_constant() is false */ + virtual uint32_t constant_value(T) const noexcept = 0; + + /* Basic arithmetic. Division and modulo are both non-trivial + instruction sequences usually isolated in easily-identifiable + subroutines, so we don't care about them. */ + virtual T minus(T) const noexcept = 0; + virtual T add(T, T) const noexcept = 0; + virtual T sub(T, T) const noexcept = 0; + virtual T smul(T, T) const noexcept = 0; + virtual T umul(T, T) const noexcept = 0; + + /* Sign extensions */ + virtual T extub(T) const noexcept = 0; + virtual T extsb(T) const noexcept = 0; + virtual T extuw(T) const noexcept = 0; + virtual T extsw(T) const noexcept = 0; + + /* Logical operations */ + virtual T lnot(T) const noexcept = 0; + virtual T land(T, T) const noexcept = 0; + virtual T lor(T, T) const noexcept = 0; + virtual T lxor(T, T) const noexcept = 0; + + /* Comparisons. This operation proceeds in two steps: + * First call cmp(x, y) to check if the values are comparable. If + this returns false, the test result should be Top. + * If the values are comparable, call cmpu(x, y) or cmps(x, y), which + returns a negative number if x < y, 0 if x == y, and a positive + number if x > y. */ + virtual bool cmp(T, T) const noexcept = 0; + virtual int cmpu(T, T) const noexcept = 0; + virtual int cmps(T, T) const noexcept = 0; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_AI_ABSTRACTDOMAIN_H */ diff --git a/include/fxos/ai/RelConst.h b/include/fxos/ai/RelConst.h new file mode 100644 index 0000000..7f26f13 --- /dev/null +++ b/include/fxos/ai/RelConst.h @@ -0,0 +1,126 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/ai/RelConst: Abstract domain of relative constants +// +// This abstract domain represents values of the form where +// the base is a register, a symbolic function argument, or the initial value +// of a callee-saved register in a function, and the offset is a constant. +// +// I designed this domain for this particular application, although there are +// surely similar takes in decompilation literature. +// +// For readers not familiar with abstract interpretation, the idea is that one +// can run a program by calculating not the exact values being manipulated, +// but instead approximations ("abstractions") of these. The extra freedom +// provided by the approximation makes it possible to defines methods of +// execution that always terminate, thus analyzing any program in finite time. +// This particular domain does little in the way of approximation, but has +// benefits because of its ability to track some symbolic values. +//--- + +#ifndef FXOS_AI_RELCONST_H +#define FXOS_AI_RELCONST_H + +#include +#include + +namespace FxOS { + +/* The lattice of relative constants (base + offset) */ +struct RelConst +{ + enum { Bottom=1, Top=2 }; + + /* The following fields concurrently indicate the base. The order of + resolution is as follows (non-trivial types in parentheses): + * If [spe] is equal to Bottom or Top, this is the value. + * If [arg] is non-zero, the value of the arg-th argument is used. + * If [org] is non-zero, the original value of the associated + callee-saved register is used. (CpuRegister) + * If [reg] is non-zero, the base is that register. (CpuRegister) + + For efficiency, checking [base==0] will tell apart plain old + constants from values with bases (and specials but these are usually + handled first). */ + union { + struct { + uint8_t spe; + uint8_t arg; + uint8_t org; + uint8_t reg; + }; + uint32_t base; + }; + + /* The constant value, or offset. The signedness of this value depends + on the context where it is used: + + * For special values, the members are unused. + * For [arg] and [org] and [reg] bases with additive offset + semantics, the signedness has no effect. Operations with + non-trivial effect on signs such as multiplication are not + supported with bases. + * For zero bases, the interpretation is instruction-dependent. */ + union { + int32_t ival; + uint32_t uval; + }; + + //--- + // RelConst methods + //--- + + /* Default constructors gives zero */ + RelConst() = default; + + /* Evaluates to true if the location is non-trivial, ie. if it is + neither Top nor Bottom. */ + operator bool () const noexcept; + + /* String representation */ + std::string str() const noexcept; +}; + +class RelConstDomain: public AbstractDomain +{ +public: + /* Trivial instances */ + RelConstDomain() = default; + + /* Implementation of the AbstractDomain specification */ + + RelConst bottom() const noexcept override; + RelConst top() const noexcept override; + + RelConst constant(uint32_t value) const noexcept override; + bool is_constant(RelConst) const noexcept override; + uint32_t constant_value(RelConst) const noexcept override; + + RelConst minus(RelConst) const noexcept override; + RelConst add(RelConst, RelConst) const noexcept override; + RelConst sub(RelConst, RelConst) const noexcept override; + RelConst smul(RelConst, RelConst) const noexcept override; + RelConst umul(RelConst, RelConst) const noexcept override; + + RelConst extub(RelConst) const noexcept override; + RelConst extsb(RelConst) const noexcept override; + RelConst extuw(RelConst) const noexcept override; + RelConst extsw(RelConst) const noexcept override; + + RelConst lnot(RelConst) const noexcept override; + RelConst land(RelConst, RelConst) const noexcept override; + RelConst lor(RelConst, RelConst) const noexcept override; + RelConst lxor(RelConst, RelConst) const noexcept override; + + bool cmp(RelConst, RelConst) const noexcept override; + int cmpu(RelConst, RelConst) const noexcept override; + int cmps(RelConst, RelConst) const noexcept override; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_AI_RELCONST_H */ diff --git a/include/fxos/domains.h b/include/fxos/domains.h deleted file mode 100644 index f658167..0000000 --- a/include/fxos/domains.h +++ /dev/null @@ -1,159 +0,0 @@ -//--- -// fxos.domains: Abstract interpretation domains -//--- - -#ifndef FXOS_DOMAINS_H -#define FXOS_DOMAINS_H - -#include -#include - -namespace FxOS { - -/* An abstract domain over a user-defined lattice. */ -template -class AbstractDomain -{ -public: - /* Bottom and Top constants */ - virtual T bottom() const noexcept = 0; - virtual T top() const noexcept = 0; - - /* Construct abstract value from integer constant */ - virtual T constant(uint32_t value) const noexcept = 0; - /* Check if value is constant */ - virtual bool is_constant(T) const noexcept = 0; - /* Unpack a constant */ - virtual uint32_t constant_value(T) const = 0; - - /* Basic arithmetic. Division and modulo are both non-trivial - instruction sequences usually isolated in easily-identifiable - subroutines, so we don't care about them. */ - virtual T minus(T) const noexcept = 0; - virtual T add(T, T) const noexcept = 0; - virtual T sub(T, T) const noexcept = 0; - virtual T smul(T, T) const noexcept = 0; - virtual T umul(T, T) const noexcept = 0; - - /* Sign extensions */ - virtual T extub(T) const noexcept = 0; - virtual T extsb(T) const noexcept = 0; - virtual T extuw(T) const noexcept = 0; - virtual T extsw(T) const noexcept = 0; - - /* Logical operations */ - virtual T lnot(T) const noexcept = 0; - virtual T land(T, T) const noexcept = 0; - virtual T lor(T, T) const noexcept = 0; - virtual T lxor(T, T) const noexcept = 0; - - /* Comparisons. This operation proceeds in two steps: - * First call cmp(x, y) to check if the values are comparable. If - this returns false, the test result should be Top. - * If the values are comparable, call cmpu(x, y) or cmps(x, y), which - returns a negative number if x < y, 0 if x == y, and a positive - number if x > y. */ - virtual bool cmp(T, T) const noexcept = 0; - virtual int cmpu(T, T) const noexcept = 0; - virtual int cmps(T, T) const noexcept = 0; -}; - -//--- -// Domain of relative constants -//--- - -/* The lattice of relative constants (base + offset) */ -struct RelConst -{ - enum { Bottom=1, Top=2 }; - - /* The following fields concurrently indicate the base. The order of - resolution is as follows (non-trivial types in parentheses): - * If [spe] is equal to Bottom or Top, this is the value. - * If [arg] is non-zero, the value of the arg-th argument is used. - * If [org] is non-zero, the original value of the associated - callee-saved register is used. (CpuRegister) - * If [reg] is non-zero, the base is that register. (CpuRegister) - - For efficiency, checking [base==0] will tell apart plain old - constants from values with bases (and specials but these are usually - handled first). */ - union { - struct { - uint8_t spe; - uint8_t arg; - uint8_t org; - uint8_t reg; - }; - - uint32_t base; - }; - - /* The constant value, or offset. The signedness of this value depends - on the context where it is used: - - * For special values, the members are unused. - * For [arg] and [org] and [reg] bases with additive offset - semantics, the signedness has no effect. Operations with - non-trivial effect on signs such as multiplication are not - supported with bases. - * For zero bases, the interpretation is instruction-dependent. */ - union { - int32_t ival; - uint32_t uval; - }; - - //--- - // RelConst methods - //--- - - /* Default constructors gives zero */ - RelConst() = default; - - /* Evaluates to true if the location is non-trivial, ie. if it is - neither Top nor Bottom. */ - operator bool () const noexcept; - - /* String representation */ - std::string str() const noexcept; -}; - -class RelConstDomain: public AbstractDomain -{ -public: - /* Trivial instances */ - RelConstDomain() = default; - - /* Implementation of the AbstractDomain specification */ - - RelConst bottom() const noexcept override; - RelConst top() const noexcept override; - - RelConst constant(uint32_t value) const noexcept override; - bool is_constant(RelConst) const noexcept override; - uint32_t constant_value(RelConst) const override; - - RelConst minus(RelConst) const noexcept override; - RelConst add(RelConst, RelConst) const noexcept override; - RelConst sub(RelConst, RelConst) const noexcept override; - RelConst smul(RelConst, RelConst) const noexcept override; - RelConst umul(RelConst, RelConst) const noexcept override; - - RelConst extub(RelConst) const noexcept override; - RelConst extsb(RelConst) const noexcept override; - RelConst extuw(RelConst) const noexcept override; - RelConst extsw(RelConst) const noexcept override; - - RelConst lnot(RelConst) const noexcept override; - RelConst land(RelConst, RelConst) const noexcept override; - RelConst lor(RelConst, RelConst) const noexcept override; - RelConst lxor(RelConst, RelConst) const noexcept override; - - bool cmp(RelConst, RelConst) const noexcept override; - int cmpu(RelConst, RelConst) const noexcept override; - int cmps(RelConst, RelConst) const noexcept override; -}; - -} /* namespace FxOS */ - -#endif /* FXOS_DOMAINS_H */ diff --git a/include/fxos/lang.h b/include/fxos/lang.h index 983de36..7822fe4 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -1,12 +1,35 @@ -//--- -// fxos.lang: Assembler language specification +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/lang: Assembler language syntax +// +// This file defines the syntactic tools needed to read and manipulate +// assembler instructions. +// +// The CpuRegister class is a glorified type-safe enumeration. Registers can be +// named, fi. CpuRegister::R0; they can be constructed from their lowercase +// name as a string, fi. CpuRegister("r0"); and they can be printed with the +// .str() method. +// +// The Argument struct represents an argument to an instruction. This is +// syntactic only; for instance Deref (@rn) does not mean that memory is +// accessed, since [jmp @rn] or [ocbwb @rn] do not actually access @rn. +// Constructor functions such as Argument_Deref() are provided. +// +// Finally, the Instruction struct represents an abstract instruction out of +// context. Each Instruction object only models one particular instance of one +// particular instruction, for instance [mov #14, r2] and not [mov #imm, rn]. +// The rationale for this is disassembly speed and a number of simplifications +// for passes; and there are less than 65'000 non-DSP instructions anyway. //--- -#ifndef LIBFXOS_LANG_H -#define LIBFXOS_LANG_H +#ifndef FXOS_LANG_H +#define FXOS_LANG_H #include -#include #include namespace FxOS { @@ -15,85 +38,85 @@ namespace FxOS { class CpuRegister { public: - enum CpuRegisterName: int8_t { - /* Value 0 is reserved for special purposes such as "no register" */ - UNDEFINED = 0, - /* Caller-saved general-purpose registers */ - R0, R1, R2, R3, R4, R5, R6, R7, - /* Banked general-purpose registers. fxos does not account for - banking identities, these are just for naming and output. */ - R0B, R1B, R2B, R3B, R4B, R5B, R6B, R7B, - /* Callee-saved general-purpose registers */ - R8, R9, R10, R11, R12, R13, R14, R15, - /* System registers */ - MACH, MACL, PR, PC, - /* Control registers */ - SR, SSR, SPC, GBR, VBR, DBR, SGR, - }; + enum CpuRegisterName: int8_t { + /* Value 0 is reserved for special purposes such as "no register" */ + UNDEFINED = 0, + /* Caller-saved general-purpose registers */ + R0, R1, R2, R3, R4, R5, R6, R7, + /* Banked general-purpose registers. fxos does not account for + banking identities, these are just for naming and output. */ + R0B, R1B, R2B, R3B, R4B, R5B, R6B, R7B, + /* Callee-saved general-purpose registers */ + R8, R9, R10, R11, R12, R13, R14, R15, + /* System registers */ + MACH, MACL, PR, PC, + /* Control registers */ + SR, SSR, SPC, GBR, VBR, DBR, SGR, + }; - CpuRegister() = default; + CpuRegister() = default; - /* Construction from CpuRegisterName */ - constexpr CpuRegister(CpuRegisterName name): m_name(name) {} + /* Construction from CpuRegisterName */ + constexpr CpuRegister(CpuRegisterName name): m_name(name) {} - /* Construction from string */ - CpuRegister(std::string register_name); + /* Construction from string */ + CpuRegister(std::string register_name); - /* Conversion to string */ - std::string str() const noexcept; + /* Conversion to string */ + std::string str() const noexcept; - /* Conversion to CpuRegisterName for switch statements */ - constexpr operator CpuRegisterName() noexcept { return m_name; } + /* Conversion to CpuRegisterName for switch statements */ + constexpr operator CpuRegisterName() noexcept { return m_name; } - /* Comparison operators */ - constexpr bool operator==(CpuRegister r) const { - return m_name == r.m_name; - } - constexpr bool operator!=(CpuRegister r) const { - return m_name != r.m_name; - } + /* Comparison operators */ + constexpr bool operator==(CpuRegister r) const { + return m_name == r.m_name; + } + constexpr bool operator!=(CpuRegister r) const { + return m_name != r.m_name; + } private: - CpuRegisterName m_name; + CpuRegisterName m_name; }; /* Addressing modes for arguments */ struct Argument { - /* Various addressing modes in the language */ - enum Kind: int8_t { - Reg, /* rn */ - Deref, /* @rn */ - PostInc, /* @rn+ */ - PreDec, /* @-rn */ - StructDeref, /* @(disp,rn) or @(disp,gbr) */ - ArrayDeref, /* @(r0,rn) or @(r0,gbr) */ - PcRel, /* @(disp,pc) with 4-alignment correction */ - PcJump, /* pc+disp */ - PcAddr, /* pc+disp with special delayed slot semantics */ - Imm, /* #imm */ - }; + /* Various addressing modes in the language */ + enum Kind: int8_t { + Reg, /* rn */ + Deref, /* @rn */ + PostInc, /* @rn+ */ + PreDec, /* @-rn */ + StructDeref, /* @(disp,rn) or @(disp,gbr) */ + ArrayDeref, /* @(r0,rn) or @(r0,gbr) */ + PcRel, /* @(disp,pc) with 4-alignment correction */ + PcJump, /* pc+disp */ + PcAddr, /* pc+disp with special delayed slot semantics */ + Imm, /* #imm */ + }; - Argument() = default; + Argument() = default; - /* String representation */ - std::string str() const; + /* String representation */ + std::string str() const; - /* Addressing mode */ - Kind kind; - /* Base register. Valid for all modes except Imm */ - CpuRegister base; - /* Index register. Valid for ArrayDeref */ - CpuRegister index; - /* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */ - int8_t opsize; + /* Addressing mode */ + Kind kind; + /* Base register. Valid for all modes except Imm */ + CpuRegister base; + /* Index register. Valid for ArrayDeref */ + CpuRegister index; + /* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */ + int8_t opsize; - union { - /* Displacement in bytes. For StructDeref, PcRel, PcJump, and PcAddr */ - int disp; - /* Immediate value. Valid for Imm */ - int imm; - }; + union { + /* Displacement in bytes. For StructDeref, PcRel, PcJump, and PcAddr */ + int disp; + /* Immediate value. Valid for Imm */ + int imm; + }; }; /* Argument constructors */ @@ -112,42 +135,42 @@ Argument Argument_Imm(int imm); /* Assembler instruction */ struct Instruction { - Instruction() = default; + Instruction() = default; - /* Construct with one or several arguments */ - Instruction(char const *mnemonic); - Instruction(char const *mnemonic, Argument arg); - Instruction(char const *mnemonic, Argument arg1, Argument arg2); + /* Construct with one or several arguments */ + Instruction(char const *mnemonic); + Instruction(char const *mnemonic, Argument arg); + Instruction(char const *mnemonic, Argument arg1, Argument arg2); - /* Original opcode. Initialized to 0 when unset, which is an invalid - instruction by design. */ - uint16_t opcode; - /* Operation size (0, 1, 2 or 4) */ - int8_t opsize; - /* Number of arguments */ - uint8_t arg_count; + /* Original opcode. Initialized to 0 when unset, which is an invalid + instruction by design. */ + uint16_t opcode; + /* Operation size (0, 1, 2 or 4) */ + int8_t opsize; + /* Number of arguments */ + uint8_t arg_count; - /* Mnemonic **without the size indicator** */ - char mnemonic[12]; - /* Arguments (up to 2) */ - Argument args[2]; + /* Mnemonic **without the size indicator** */ + char mnemonic[12]; + /* Arguments (up to 2) */ + Argument args[2]; - //--- - // Instruction classes - //--- + //--- + // Instruction classes + //--- - /* Check whether instruction terminates the function */ - bool isterminal() const noexcept; - /* Check whether instruction is an unconditional jump */ - bool isjump() const noexcept; - /* Check whether it's a conditional jump */ - bool iscondjump() const noexcept; - /* Check whether instruction has a delay slot */ - bool isdelayed() const noexcept; - /* Check whether instruction can be used in a delay slot */ - bool isvaliddelayslot() const noexcept; + /* Check whether instruction terminates the function */ + bool isterminal() const noexcept; + /* Check whether instruction is an unconditional jump */ + bool isjump() const noexcept; + /* Check whether it's a conditional jump */ + bool iscondjump() const noexcept; + /* Check whether instruction has a delay slot */ + bool isdelayed() const noexcept; + /* Check whether instruction can be used in a delay slot */ + bool isvaliddelayslot() const noexcept; }; } /* namespace FxOS */ -#endif /* LIBFXOS_LANG_H */ +#endif /* FXOS_LANG_H */ diff --git a/include/fxos/memory.h b/include/fxos/memory.h index 3c6d41c..50a1944 100644 --- a/include/fxos/memory.h +++ b/include/fxos/memory.h @@ -1,9 +1,28 @@ -//--- -// fxos.memory: Standard memory regions +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/memory: Standard memory areas and regions +// +// This file provides some basic information about memory layout. +// +// The MemoryArea structure is a finite type with exactly 6 instances +// describing the virtual memory areas outlined by the MMU for both the SH7705 +// and the SH7305. +// +// The MemoryRegion structure is use to represent address ranges. The standard +// regions of the SH7305 are defined, including some different placements for +// RAM (which depend on the BSC configuration), but this does not restrict the +// ability of MemoryRegion to describe eg. SH7705 on-chip memory. +// +// MemoryRegion is frequently used in commands to request start/end address +// pairs even when no detailed information is needed. //--- -#ifndef LIBFXOS_MEMORY_H -#define LIBFXOS_MEMORY_H +#ifndef FXOS_MEMORY_H +#define FXOS_MEMORY_H #include #include @@ -11,100 +30,84 @@ namespace FxOS { -/* Memory area enumeration with a few tools */ -class MemoryArea +struct MemoryArea { public: - enum MemoryAreaName: int8_t { - /* Userspace seen from user and privileged mode */ - U0, P0, - /* Second half of memory, only for privileged mode */ - P1, P2, P3, P4, - }; + /* Userspace seen from user and privileged mode */ + static MemoryArea U0, P0; + /* Second half of memory, only for privileged mode */ + static MemoryArea P1, P2, P3, P4; - MemoryArea() = default; + /* Start and end of area (both included) */ + uint32_t start, end; + /* Size of area */ + uint32_t size() const { + return this->end - this->start + 1; + } + /* Name ("U0", "P0", "P1", "P2", "P3", or "P4") */ + char const *name; - /* Construction from MemoryAreaName */ - constexpr MemoryArea(MemoryAreaName name): m_name(name) {} - - /* Start, end (last byte in area) and size of area */ - uint32_t start() const noexcept; - uint32_t end() const noexcept; - uint32_t size() const noexcept; - - /* Conversion to MemoryAreaName for switch */ - constexpr operator MemoryAreaName () noexcept { return m_name; } - - /* Comparison operators */ - constexpr bool operator == (MemoryArea a) const { - return m_name == a.m_name; - } - constexpr bool operator != (MemoryArea a) const { - return m_name != a.m_name; - } - -private: - MemoryAreaName m_name; + constexpr bool operator == (MemoryArea const &other) const { + return this->start == other.start && this->end == other.end; + } }; struct MemoryRegion { - /* Address space regions that correspond to standard (ie. contiguous - multi-addressable) memory */ - static MemoryRegion const ROM; - static MemoryRegion const RAM; - static MemoryRegion const ROM_P2; - static MemoryRegion const RAM_P2; - static MemoryRegion const RS; - static MemoryRegion const ILRAM; - static MemoryRegion const XRAM; - static MemoryRegion const YRAM; + /* Address space regions that correspond to standard (ie. contiguous + multi-addressable) memory */ + static MemoryRegion const ROM, ROM_P2; + static MemoryRegion const RAM, RAM_P2, RAM_8C, RAM_8C_P2; + static MemoryRegion const RS; + static MemoryRegion const ILRAM; + static MemoryRegion const XRAM; + static MemoryRegion const YRAM; - /* All standard regions */ - static std::array const &all(); + /* All standard regions */ + static std::array const &all(); - /* Determine if an address falls into one of the standard regions */ - static MemoryRegion const *region_for(uint32_t address); - /* Determine if a region falls entirely into one of the standard regions */ - static MemoryRegion const *region_for(MemoryRegion r); + /* Determine if an address falls into one of the standard regions */ + static MemoryRegion const *region_for(uint32_t address); + /* Determine if a region falls entirely into one of the standard regions */ + static MemoryRegion const *region_for(MemoryRegion r); - /* Empty region at 0 */ - MemoryRegion(); - /* Short constructor which calls guess_flags() */ - MemoryRegion(std::string name, uint32_t start, uint32_t end, - bool writable); - /* Short constructor for standard regions only */ - MemoryRegion(std::string standard_region_name); + /* Empty region at 0 */ + MemoryRegion(); + /* Short constructor which calls guess_flags() */ + MemoryRegion(std::string name, uint32_t start, uint32_t end, + bool writable); + /* Short constructor for standard regions only */ + MemoryRegion(std::string standard_region_name); - /* Region name */ - std::string name {}; + /* Region name */ + std::string name; - /* Start address and end address. Generally the end address has one - additional byte. This is okay since no region is supposed to extend - to the very end of the memory. */ - uint32_t start, end; + /* Start address and end address. Generally the end address has one + additional byte. This is okay since no region is supposed to extend + to the very end of the memory. */ + uint32_t start, end; - /* The region is writable under normal conditions */ - bool writable; - /* The cache is active in that region (if enabled) */ - bool cacheable; - /* The MMU is active in that region (if enabled) */ - bool mappable; + /* The region is writable under normal conditions */ + bool writable; + /* The cache is active in that region (if enabled) */ + bool cacheable; + /* The MMU is active in that region (if enabled) */ + bool mappable; - /* Returns the size of the region */ - uint32_t size() const noexcept; + /* Returns the size of the region */ + uint32_t size() const noexcept; - /* Returns the area associated to the region (assuming it is fully - contained in one, which should always be the case) */ - MemoryArea area() const noexcept; + /* Returns the area associated to the region (assuming it is fully + contained in one, which should always be the case) */ + MemoryArea area() const noexcept; private: - static std::array const m_all; + static std::array const m_all; - /* Automatically guess the cacheable and mappable flags */ - void guess_flags() noexcept; + /* Automatically guess the cacheable and mappable flags */ + void guess_flags() noexcept; }; } /* namespace FxOS */ -#endif /* LIBFXOS_MEMORY_H */ +#endif /* FXOS_MEMORY_H */ diff --git a/include/fxos/semantics.h b/include/fxos/semantics.h index 39cd6e0..9888965 100644 --- a/include/fxos/semantics.h +++ b/include/fxos/semantics.h @@ -6,7 +6,7 @@ #define LIBFXOS_SEMANTICS_H #include -#include +#include #include #include #include diff --git a/lib/ai/RelConst.cpp b/lib/ai/RelConst.cpp new file mode 100644 index 0000000..1feb246 --- /dev/null +++ b/lib/ai/RelConst.cpp @@ -0,0 +1,309 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +namespace FxOS { + +//--- +// Quick helpers +//--- + +auto constexpr Top = RelConst::Top; +auto constexpr Bottom = RelConst::Bottom; + +/* General prelude that propagates Top, then Bottom */ +#define special(r1, r2) { \ + if((r1).spe == Top || (r2).spe == Top) \ + return top(); \ + if((r1).spe || (r2).spe) \ + return bottom(); \ +} + +inline RelConst RelConstDomain::bottom() const noexcept +{ + RelConst b {}; + b.spe = Bottom; + return b; +} + +inline RelConst RelConstDomain::top() const noexcept +{ + RelConst b {}; + b.spe = Top; + return b; +} + +RelConst RelConstDomain::constant(uint32_t value) const noexcept +{ + RelConst b {}; + b.uval = value; + return b; +} + +bool RelConstDomain::is_constant(RelConst r) const noexcept +{ + return r.base == 0; +} + +uint32_t RelConstDomain::constant_value(RelConst r) const noexcept +{ + if(!is_constant(r)) + return -1; + return r.uval; +} + +//--- +// Basic arithmetic +//--- + +RelConst RelConstDomain::minus(RelConst r) const noexcept +{ + /* Propagate Bottom and Top */ + if(r.spe) + return r; + /* This domain does not support multiplicative coefficients for the + base. If the base is non-zero, return Top. */ + if(r.base) + return top(); + + r.ival = -r.ival; + return r; +} + +RelConst RelConstDomain::add(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + + /* This domain does not support cumulative bases. The sum can only be + represented if at least one of the values has no base */ + if(r1.base && r2.base) + return top(); + + RelConst r; + r.base = r1.base | r2.base; + r.uval = r1.uval + r2.uval; + return r; +} + +RelConst RelConstDomain::sub(RelConst r1, RelConst r2) const noexcept +{ + /* This domain does not support difference between bases. The + difference can only be represented in a few restricted cases. */ + special(r1, r2); + + /* If r2 has no base, keep r1's base. */ + if(!r2.base) { + r1.uval -= r2.uval; + return r1; + } + + /* If r2 has exactly the same base as r1, cancel it. */ + if(r1.base == r2.base) { + r1.base = 0; + r1.uval -= r2.uval; + return r1; + } + + /* Otherwise, the result cannot be represented. */ + return top(); +} + +RelConst RelConstDomain::smul(RelConst r1, RelConst r2) const noexcept +{ + /* No base can be multiplied except by 1. Typically there will be no + such constant because it would be optimized away. */ + special(r1, r2); + + /* Give up if there is any base */ + if(r1.base || r2.base) + return top(); + + /* Multiply with sign */ + r1.ival *= r2.ival; + return r1; +} + +RelConst RelConstDomain::umul(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) + return top(); + + r1.uval *= r2.uval; + return r1; +} + +//--- +// Sign extensions +//--- + +RelConst RelConstDomain::extub(RelConst r) const noexcept +{ + /* The representation does not support sign extensions on bases, so we + just return top whenever there is one. */ + if(r.spe) + return r; + if(r.base) + return top(); + + r.uval = (uint8_t)r.uval; + return r; +} + +RelConst RelConstDomain::extsb(RelConst r) const noexcept +{ + if(r.spe) + return r; + if(r.base) + return top(); + + r.ival = (int8_t)r.ival; + return r; +} + +RelConst RelConstDomain::extuw(RelConst r) const noexcept +{ + if(r.spe) + return r; + if(r.base) + return top(); + + r.uval = (uint16_t)r.uval; + return r; +} + +RelConst RelConstDomain::extsw(RelConst r) const noexcept +{ + if(r.spe) + return r; + if(r.base) + return top(); + + r.ival = (int16_t)r.ival; + return r; +} + +//--- +// Logical operations +//--- + +RelConst RelConstDomain::lnot(RelConst r) const noexcept +{ + /* Don't try to catch very special cases */ + if(r.spe) + return r; + if(r.base) + return top(); + + r.uval = ~r.uval; + return r; +} + +RelConst RelConstDomain::land(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) + return top(); + + r1.uval &= r2.uval; + return r1; +} + +RelConst RelConstDomain::lor(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) + return top(); + + r1.uval |= r2.uval; + return r1; +} + +RelConst RelConstDomain::lxor(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) + return top(); + + r1.uval ^= r2.uval; + return r1; +} + +//--- +// Comparisons +//--- + +/* TODO: RelConst comparison improvements using typing + + Two values base+d1 and base+d2 (sharing the same base) can be proven to + compare as unsigned if the base has a known type and d1 and d2 are smaller + than the size of that type. This derives from the implicit assumption that a + full object cannot cross from P4 space to P0. */ + +bool RelConstDomain::cmp(RelConst r1, RelConst r2) const noexcept +{ + /* Not very good */ + return (r1.base == 0 && r2.base == 0); +} + +int RelConstDomain::cmpu(RelConst r1, RelConst r2) const noexcept +{ + /* We can't just subtract because of overflows (information is lost + because we don't have the V bit) */ + return (r1.uval > r2.uval) - (r1.uval < r2.uval); +} + +int RelConstDomain::cmps(RelConst r1, RelConst r2) const noexcept +{ + return (r1.ival > r2.ival) - (r1.ival < r2.ival); +} + +//--- +// Other functions +//--- + +RelConst::operator bool () const noexcept +{ + return !spe; +} + +std::string RelConst::str() const noexcept +{ + using RegName = CpuRegister::CpuRegisterName; + + if(!base && !uval) + return "0"; + if(spe == Bottom) + return "Bottom"; + if(spe == Top) + return "Top"; + + std::string str; + if(arg) str = format("arg%d", arg); + if(org) str = format("org_%s", CpuRegister((RegName)org).str()); + if(reg) str = CpuRegister((RegName)org).str(); + + if(!uval) + return str; + + if(ival >= -256 && ival < 256) { + uint32_t v = 0; + if(str.size() && ival > 0) str += "+", v = ival; + if(str.size() && ival < 0) str += "-", v = -ival; + + return str + format("%d", v); + } + else { + return str + format("%08x", uval); + } +} + +} /* namespace FxOS */ diff --git a/lib/domains/relconst.cpp b/lib/domains/relconst.cpp deleted file mode 100644 index dcecbd7..0000000 --- a/lib/domains/relconst.cpp +++ /dev/null @@ -1,282 +0,0 @@ -#include -#include -#include -#include - -namespace FxOS { - -//--- -// Quick helpers -//--- - -auto const Top = RelConst::Top; -auto const Bottom = RelConst::Bottom; - -/* General prelude that propagates Top, then Bottom */ -#define special(r1, r2) { \ - if((r1).spe == Top || (r2).spe == Top) return top(); \ - if((r1).spe || (r2).spe) return bottom(); \ -} - -RelConst RelConstDomain::bottom() const noexcept -{ - RelConst b {}; - b.spe = Bottom; - return b; -} - -RelConst RelConstDomain::top() const noexcept -{ - RelConst b {}; - b.spe = Top; - return b; -} - -RelConst RelConstDomain::constant(uint32_t value) const noexcept -{ - RelConst b {}; - b.uval = value; - return b; -} - -bool RelConstDomain::is_constant(RelConst r) const noexcept -{ - return r.base == 0; -} - -uint32_t RelConstDomain::constant_value(RelConst r) const -{ - if(!is_constant(r)) - throw std::invalid_argument("Not a constant RelConst"); - return r.uval; -} - -//--- -// Basic arithmetic -//--- - -RelConst RelConstDomain::minus(RelConst r) const noexcept -{ - /* Propagate Bottom and Top */ - if(r.spe) return r; - /* This domain does not support multiplicative coefficients for the - base. If the base is non-zero, return Top. */ - if(r.base) return top(); - - r.ival = -r.ival; - return r; -} - -RelConst RelConstDomain::add(RelConst r1, RelConst r2) const noexcept -{ - special(r1, r2); - - /* This domain does not support cumulative bases. The sum can only be - represented if at least one of the values has no base */ - if(r1.base && r2.base) return top(); - - RelConst r; - r.base = r1.base | r2.base; - r.uval = r1.uval + r2.uval; - return r; -} - -RelConst RelConstDomain::sub(RelConst r1, RelConst r2) const noexcept -{ - /* This domain does not support difference between bases. The - difference can only be represented in a few restricted cases. */ - special(r1, r2); - - /* If r2 has no base, keep r1's base. */ - if(!r2.base) - { - r1.uval -= r2.uval; - return r1; - } - - /* If r2 has exactly the same base as r1, cancel it. */ - if(r1.base == r2.base) - { - r1.base = 0; - r1.uval -= r2.uval; - return r1; - } - - /* Otherwise, the result cannot be represented. */ - return top(); -} - -RelConst RelConstDomain::smul(RelConst r1, RelConst r2) const noexcept -{ - /* No base can be multiplied except by 1. Typically there will be no - such constant because it would be optimized away. */ - special(r1, r2); - - /* Give up if there is any base */ - if(r1.base || r2.base) return top(); - - /* Multiply with sign */ - r1.ival *= r2.ival; - return r1; -} - -RelConst RelConstDomain::umul(RelConst r1, RelConst r2) const noexcept -{ - special(r1, r2); - if(r1.base || r2.base) return top(); - - r1.uval *= r2.uval; - return r1; -} - -//--- -// Sign extensions -//--- - -RelConst RelConstDomain::extub(RelConst r) const noexcept -{ - /* The representation does not support sign extensions on bases, so we - just return top whenever there is one. */ - if(r.spe) return r; - if(r.base) return top(); - - r.uval = (uint8_t)r.uval; - return r; -} - -RelConst RelConstDomain::extsb(RelConst r) const noexcept -{ - if(r.spe) return r; - if(r.base) return top(); - - r.ival = (int8_t)r.ival; - return r; -} - -RelConst RelConstDomain::extuw(RelConst r) const noexcept -{ - if(r.spe) return r; - if(r.base) return top(); - - r.uval = (uint16_t)r.uval; - return r; -} - -RelConst RelConstDomain::extsw(RelConst r) const noexcept -{ - if(r.spe) return r; - if(r.base) return top(); - - r.ival = (int16_t)r.ival; - return r; -} - -//--- -// Logical operations -//--- - -RelConst RelConstDomain::lnot(RelConst r) const noexcept -{ - /* Don't try to catch very special cases */ - if(r.spe) return r; - if(r.base) return top(); - - r.uval = ~r.uval; - return r; -} - -RelConst RelConstDomain::land(RelConst r1, RelConst r2) const noexcept -{ - special(r1, r2); - if(r1.base || r2.base) return top(); - - r1.uval &= r2.uval; - return r1; -} - -RelConst RelConstDomain::lor(RelConst r1, RelConst r2) const noexcept -{ - special(r1, r2); - if(r1.base || r2.base) return top(); - - r1.uval |= r2.uval; - return r1; -} - -RelConst RelConstDomain::lxor(RelConst r1, RelConst r2) const noexcept -{ - special(r1, r2); - if(r1.base || r2.base) return top(); - - r1.uval ^= r2.uval; - return r1; -} - -//--- -// Comparisons -//--- - -/* TODO: RelConst comparison improvements using typing - - Two values base+d1 and base+d2 (sharing the same base) can be proven to - compare as unsigned if the base has a known type and d1 and d2 are smaller - than the size of that type. This derives from the implicit assumption that a - full object cannot cross from P4 space to P0. */ - -bool RelConstDomain::cmp(RelConst r1, RelConst r2) const noexcept -{ - /* Not very good */ - return (r1.base == 0 && r2.base == 0); -} - -int RelConstDomain::cmpu(RelConst r1, RelConst r2) const noexcept -{ - /* We can't just subtract because of overflows (information is lost - because we don't have the V bit) */ - return (r1.uval > r2.uval) - (r1.uval < r2.uval); -} - -int RelConstDomain::cmps(RelConst r1, RelConst r2) const noexcept -{ - return (r1.ival > r2.ival) - (r1.ival < r2.ival); -} - -//--- -// Other functions -//--- - -RelConst::operator bool () const noexcept -{ - return !spe; -} - -std::string RelConst::str() const noexcept -{ - using RegName = CpuRegister::CpuRegisterName; - - if(!base && !uval) return "0"; - if(spe == Bottom) return "Bottom"; - if(spe == Top) return "Top"; - - std::string str; - if(arg) str = format("arg%d", arg); - if(org) str = format("org_%s", CpuRegister((RegName)org).str()); - if(reg) str = CpuRegister((RegName)org).str(); - - if(!uval) return str; - - if(ival >= -256 && ival < 256) - { - uint32_t v = 0; - if(str.size() && ival > 0) str += "+", v = ival; - if(str.size() && ival < 0) str += "-", v = -ival; - - return str + format("%d", v); - } - else - { - return str + format("%08x", uval); - } -} - -} /* namespace FxOS */ diff --git a/lib/lang.cpp b/lib/lang.cpp index 54c69de..cb11616 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -1,9 +1,14 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + #include #include -#include #include #include -#include namespace FxOS { @@ -11,62 +16,40 @@ namespace FxOS { // CPU registers //--- -using Reg = CpuRegister::CpuRegisterName; - -static std::map regnames = { - { Reg::R0, "r0" }, - { Reg::R1, "r1" }, - { Reg::R2, "r2" }, - { Reg::R3, "r3" }, - { Reg::R4, "r4" }, - { Reg::R5, "r5" }, - { Reg::R6, "r6" }, - { Reg::R7, "r7" }, - { Reg::R0B, "r0_bank" }, - { Reg::R1B, "r1_bank" }, - { Reg::R2B, "r2_bank" }, - { Reg::R3B, "r3_bank" }, - { Reg::R4B, "r4_bank" }, - { Reg::R5B, "r5_bank" }, - { Reg::R6B, "r6_bank" }, - { Reg::R7B, "r7_bank" }, - { Reg::R8, "r8" }, - { Reg::R9, "r9" }, - { Reg::R10, "r10" }, - { Reg::R11, "r11" }, - { Reg::R12, "r12" }, - { Reg::R13, "r13" }, - { Reg::R14, "r14" }, - { Reg::R15, "r15" }, - { Reg::MACH, "mach" }, - { Reg::MACL, "macl" }, - { Reg::PR, "pr" }, - { Reg::PC, "pc" }, - { Reg::SR, "sr" }, - { Reg::SSR, "ssr" }, - { Reg::SPC, "spc" }, - { Reg::GBR, "gbr" }, - { Reg::VBR, "vbr" }, - { Reg::DBR, "dbr" }, - { Reg::SGR, "sgr" }, +char const *regnames[] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r0_bank", "r1_bank", "r2_bank", "r3_bank", + "r4_bank", "r5_bank", "r6_bank", "r7_bank", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "mach", "macl", "pr", "pc", + "sr", "ssr", "spc", "gbr", "vbr", "dbr", "sgr" }; -/* Construction from string - pretty slow */ +/* Construction from string */ CpuRegister::CpuRegister(std::string name) { - for(auto &it: regnames) if(it.second == name) - { - m_name = it.first; - return; - } + int regcount = (sizeof regnames / sizeof regnames[0]); + char const *name_c = name.c_str(); - throw std::invalid_argument("invalid CpuRegister name"); + for(int i = 0; i < regcount; i++) { + if(!strcmp(regnames[i], name_c)) { + m_name = CpuRegisterName(i+1); + return; + } + } + + m_name = CpuRegister::UNDEFINED; } /* Conversion to string */ std::string CpuRegister::str() const noexcept { - return regnames.at(m_name); + int regcount = (sizeof regnames / sizeof regnames[0]); + int i = m_name - 1; + + if(i < 0 || i >= regcount) + return format("", i+1); + return regnames[i]; } //--- @@ -77,117 +60,117 @@ std::string CpuRegister::str() const noexcept Argument Argument_Reg(CpuRegister base) { - Argument arg; - arg.kind = Argument::Reg; - arg.base = base; - return arg; + Argument arg; + arg.kind = Argument::Reg; + arg.base = base; + return arg; } Argument Argument_Deref(CpuRegister base) { - Argument arg; - arg.kind = Argument::Deref; - arg.base = base; - return arg; + Argument arg; + arg.kind = Argument::Deref; + arg.base = base; + return arg; } Argument Argument_PostInc(CpuRegister base) { - Argument arg; - arg.kind = Argument::PostInc; - arg.base = base; - return arg; + Argument arg; + arg.kind = Argument::PostInc; + arg.base = base; + return arg; } Argument Argument_PreDec(CpuRegister base) { - Argument arg; - arg.kind = Argument::PreDec; - arg.base = base; - return arg; + Argument arg; + arg.kind = Argument::PreDec; + arg.base = base; + return arg; } Argument Argument_StructDeref(int disp, int opsize, CpuRegister base) { - Argument arg; - arg.kind = Argument::StructDeref; - arg.base = base; - arg.disp = disp; - arg.opsize = opsize; - return arg; + Argument arg; + arg.kind = Argument::StructDeref; + arg.base = base; + arg.disp = disp; + arg.opsize = opsize; + return arg; } Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base) { - Argument arg; - arg.kind = Argument::ArrayDeref; - arg.base = base; - arg.index = index; - return arg; + Argument arg; + arg.kind = Argument::ArrayDeref; + arg.base = base; + arg.index = index; + return arg; } Argument Argument_PcRel(int disp, int opsize) { - Argument arg; - arg.kind = Argument::PcRel; - arg.disp = disp; - arg.opsize = opsize; - return arg; + Argument arg; + arg.kind = Argument::PcRel; + arg.disp = disp; + arg.opsize = opsize; + return arg; } Argument Argument_PcJump(int disp) { - Argument arg; - arg.kind = Argument::PcJump; - arg.disp = disp; - return arg; + Argument arg; + arg.kind = Argument::PcJump; + arg.disp = disp; + return arg; } Argument Argument_PcAddr(int disp) { - Argument arg; - arg.kind = Argument::PcAddr; - arg.disp = disp; - return arg; + Argument arg; + arg.kind = Argument::PcAddr; + arg.disp = disp; + return arg; } Argument Argument_Imm(int imm) { - Argument arg; - arg.kind = Argument::Imm; - arg.imm = imm; - return arg; + Argument arg; + arg.kind = Argument::Imm; + arg.imm = imm; + return arg; } /* String representation */ std::string Argument::str() const { - switch(kind) - { - case Argument::Reg: - return base.str(); - case Argument::Deref: - return format("@%s", base.str()); - case Argument::PostInc: - return format("@%s+", base.str()); - case Argument::PreDec: - return format("@-%s", base.str()); - case Argument::StructDeref: - return format("@(%d,%s)", disp, base.str().c_str()); - case Argument::ArrayDeref: - return format("@(%s,%s)", index.str().c_str(), - base.str().c_str()); - case Argument::PcRel: - return format("@(%d,pc)", disp); - case Argument::PcJump: - return format("pc+%d", disp); - case Argument::PcAddr: - return format("pc+%u", disp); - case Argument::Imm: - return format("#%d", imm); - default: - return "(invalid)"; - } + switch(kind) + { + case Argument::Reg: + return base.str(); + case Argument::Deref: + return format("@%s", base.str()); + case Argument::PostInc: + return format("@%s+", base.str()); + case Argument::PreDec: + return format("@-%s", base.str()); + case Argument::StructDeref: + return format("@(%d,%s)", disp, base.str().c_str()); + case Argument::ArrayDeref: + return format("@(%s,%s)", index.str().c_str(), + base.str().c_str()); + case Argument::PcRel: + return format("@(%d,pc)", disp); + case Argument::PcJump: + return format("pc+%d", disp); + case Argument::PcAddr: + return format("pc+%u", disp); + case Argument::Imm: + return format("#%d", imm); + default: + return "(invalid)"; + } } //--- @@ -195,45 +178,42 @@ std::string Argument::str() const //--- Instruction::Instruction(char const *mn): - opcode(0), opsize(0), arg_count(0) + opcode {0}, opsize {0}, arg_count {0} { - int len = strlen(mn); - int pos = std::max(0, len - 2); + int len = strlen(mn); + int pos = std::max(0, len - 2); - if(!strncmp(mn + pos, ".b", 2)) - { - opsize = 1; - len -= 2; - } - else if(!strncmp(mn + pos, ".w", 2)) - { - opsize = 2; - len -= 2; - } - else if(!strncmp(mn + pos, ".l", 2)) - { - opsize = 4; - len -= 2; - } + if(!strncmp(mn + pos, ".b", 2)) { + opsize = 1; + len -= 2; + } + else if(!strncmp(mn + pos, ".w", 2)) { + opsize = 2; + len -= 2; + } + else if(!strncmp(mn + pos, ".l", 2)) { + opsize = 4; + len -= 2; + } - len = std::min(len, 11); - strncpy(mnemonic, mn, len); - mnemonic[len] = 0; + len = std::min(len, 11); + strncpy(mnemonic, mn, len); + mnemonic[len] = 0; } Instruction::Instruction(char const *mn, Argument arg): - Instruction(mn) + Instruction(mn) { - args[0] = arg; - arg_count = 1; + args[0] = arg; + arg_count = 1; } Instruction::Instruction(char const *mn, Argument arg1, Argument arg2): - Instruction(mn) + Instruction(mn) { - args[0] = arg1; - args[1] = arg2; - arg_count = 2; + args[0] = arg1; + args[1] = arg2; + arg_count = 2; } //--- @@ -242,45 +222,55 @@ Instruction::Instruction(char const *mn, Argument arg1, Argument arg2): bool Instruction::isterminal() const noexcept { - if(!strcmp(mnemonic, "rte") || !strcmp(mnemonic, "rts")) return true; + if(!strcmp(mnemonic, "rte") || !strcmp(mnemonic, "rts")) + return true; - /* Also jmp @rn which is regarded as a terminal call */ - if(!strcmp(mnemonic,"jmp") && args[0].kind == Argument::Deref) return true; - /* Same for braf because we can't analyse further */ - if(!strcmp(mnemonic, "braf")) return true; + /* Also jmp @rn which is regarded as a terminal call */ + if(!strcmp(mnemonic,"jmp") && args[0].kind == Argument::Deref) + return true; - return false; + /* Same for braf because we can't analyse further */ + if(!strcmp(mnemonic, "braf")) + return true; + + return false; } bool Instruction::isjump() const noexcept { - return !strcmp(mnemonic, "bra"); + return !strcmp(mnemonic, "bra"); } bool Instruction::iscondjump() const noexcept { - char const *v[] = { - "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s", NULL, - }; + char const *v[] = { + "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s", NULL, + }; - for(int i = 0; v[i]; i++) if(!strcmp(mnemonic, v[i])) return true; - return false; + for(int i = 0; v[i]; i++) { + if(!strcmp(mnemonic, v[i])) + return true; + } + return false; } bool Instruction::isdelayed() const noexcept { - char const *v[] = { - "rte", "rts", "jmp", "jsr", "bra", "braf", "bsr", "bsrf", - "bf.s", "bf/s", "bt.s", "bt/s", NULL, - }; + char const *v[] = { + "rte", "rts", "jmp", "jsr", "bra", "braf", "bsr", "bsrf", + "bf.s", "bf/s", "bt.s", "bt/s", NULL, + }; - for(int i = 0; v[i]; i++) if(!strcmp(mnemonic, v[i])) return true; - return false; + for(int i = 0; v[i]; i++) { + if(!strcmp(mnemonic, v[i])) + return true; + } + return false; } bool Instruction::isvaliddelayslot() const noexcept { - return !isdelayed() && !isterminal() && !isjump() && !iscondjump(); + return !isdelayed() && !isterminal() && !isjump() && !iscondjump(); } } /* namespace FxOS */ diff --git a/lib/memory.cpp b/lib/memory.cpp index 7dd354d..e1a2708 100644 --- a/lib/memory.cpp +++ b/lib/memory.cpp @@ -1,153 +1,131 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + #include #include #include namespace FxOS { -//--- -// Overview of the memory areas -//--- - -uint32_t MemoryArea::start() const noexcept -{ - switch(m_name) - { - case U0: return 0x00000000; - case P0: return 0x00000000; - case P1: return 0x80000000; - case P2: return 0xa0000000; - case P3: return 0xc0000000; - case P4: return 0xe0000000; - } - - return 0; -} - -uint32_t MemoryArea::end() const noexcept -{ - switch(m_name) - { - case U0: return 0x7fffffff; - case P0: return 0x7fffffff; - case P1: return 0x9fffffff; - case P2: return 0xbfffffff; - case P3: return 0xdfffffff; - case P4: return 0xffffffff; - } - - return -1; -} - -uint32_t MemoryArea::size() const noexcept -{ - return this->end() - this->start() + 1; -} +MemoryArea MemoryArea::U0 = { 0x00000000, 0x7fffffff, "U0" }; +MemoryArea MemoryArea::P0 = { 0x00000000, 0x7fffffff, "P0" }; +MemoryArea MemoryArea::P1 = { 0x80000000, 0x9fffffff, "P1" }; +MemoryArea MemoryArea::P2 = { 0xa0000000, 0xbfffffff, "P2" }; +MemoryArea MemoryArea::P3 = { 0xc0000000, 0xdfffffff, "P3" }; +MemoryArea MemoryArea::P4 = { 0xe0000000, 0xffffffff, "P4" }; //--- // Fine memory region management //--- MemoryRegion::MemoryRegion(): - name {"null"}, start {0x00000000}, end {0x00000000}, writable {false} + name {"null"}, start {0x00000000}, end {0x00000000}, writable {false} { } MemoryRegion::MemoryRegion(std::string name, uint32_t start, uint32_t end, - bool writable): - name {name}, start {start}, end {end}, writable {writable} + bool writable): + name {name}, start {start}, end {end}, writable {writable} { - this->guess_flags(); + this->guess_flags(); } uint32_t MemoryRegion::size() const noexcept { - return end - start; + return end - start; } MemoryArea MemoryRegion::area() const noexcept { - using Area = MemoryArea; - static Area areas[5]={ Area::P4, Area::P3, Area::P2, Area::P1, Area::P0 }; + using Area = MemoryArea; + static Area areas[5]={ Area::P4, Area::P3, Area::P2, Area::P1, Area::P0 }; - for(int i = 0; i < 5; i++) - { - if(start >= areas[i].start()) return areas[i]; - } + for(int i = 0; i < 5; i++) { + if(start >= areas[i].start) + return areas[i]; + } - return Area::P0; + return Area::P0; } void MemoryRegion::guess_flags() noexcept { - switch(this->area()) - { - case MemoryArea::U0: - case MemoryArea::P0: - case MemoryArea::P3: - cacheable = true; - mappable = true; - break; + MemoryArea area = this->area(); - case MemoryArea::P1: - cacheable = true; - mappable = false; - break; - - case MemoryArea::P2: - case MemoryArea::P4: - cacheable = false; - mappable = false; - break; - } + if(area==MemoryArea::U0 || area==MemoryArea::P0 || area==MemoryArea::P3) { + cacheable = true; + mappable = true; + } + else if(area == MemoryArea::P1) { + cacheable = true; + mappable = false; + } + else if(area == MemoryArea::P2 || area == MemoryArea::P4) { + cacheable = false; + mappable = false; + } + else { + cacheable = false; + mappable = false; + } } using R = MemoryRegion; /* Basic memory regions */ -R const R::ROM = MemoryRegion("ROM", 0x80000000, 0x81ffffff, false); -R const R::RAM = MemoryRegion("RAM", 0x88000000, 0x8803ffff, true); -R const R::ROM_P2 = MemoryRegion("ROM_P2", 0xa0000000, 0xa07fffff, false); -R const R::RAM_P2 = MemoryRegion("RAM_P2", 0xa8000000, 0xa803ffff, true); -R const R::RS = MemoryRegion("RS", 0xfd800000, 0xfd8007ff, true); -R const R::ILRAM = MemoryRegion("ILRAM", 0xe5200000, 0xe5203fff, true); -R const R::XRAM = MemoryRegion("XRAM", 0xe5007000, 0xe5008fff, true); -R const R::YRAM = MemoryRegion("YRAM", 0xe5017000, 0xe5018fff, true); +R const R::ROM ("ROM", 0x80000000, 0x81ffffff, false); +R const R::ROM_P2 ("ROM_P2", 0xa0000000, 0xa07fffff, false); +R const R::RAM ("RAM", 0x88000000, 0x881fffff, true); +R const R::RAM_P2 ("RAM_P2", 0xa8000000, 0xa81fffff, true); +R const R::RAM_8C ("RAM_8C", 0x8c000000, 0x8c7fffff, true); +R const R::RAM_8C_P2("RAM_8C_P2", 0xac000000, 0xac7fffff, true); +R const R::RS ("RS", 0xfd800000, 0xfd8007ff, true); +R const R::ILRAM ("ILRAM", 0xe5200000, 0xe5203fff, true); +R const R::XRAM ("XRAM", 0xe5007000, 0xe5008fff, true); +R const R::YRAM ("YRAM", 0xe5017000, 0xe5018fff, true); -std::array const MemoryRegion::m_all = { - &R::ROM, &R::RAM, &R::ROM_P2, &R::RAM_P2, - &R::RS, &R::ILRAM, &R::XRAM, &R::YRAM, +std::array const MemoryRegion::m_all = { + &R::ROM, &R::ROM_P2, + &R::RAM, &R::RAM_P2, &R::RAM_8C, &R::RAM_8C_P2, + &R::RS, &R::ILRAM, &R::XRAM, &R::YRAM, }; -std::array const &MemoryRegion::all() +std::array const &MemoryRegion::all() { - return m_all; + return m_all; } MemoryRegion const *MemoryRegion::region_for(uint32_t address) { - for(auto r: MemoryRegion::all()) { - if(r->start <= address && address < r->end) return r; - } - return nullptr; + for(auto r: MemoryRegion::all()) { + if(r->start <= address && address < r->end) + return r; + } + return nullptr; } MemoryRegion const *MemoryRegion::region_for(MemoryRegion region) { - for(auto r: MemoryRegion::all()) { - if(r->start <= region.start && region.end <= r->end) return r; - } - return nullptr; + for(auto r: MemoryRegion::all()) { + if(r->start <= region.start && region.end <= r->end) + return r; + } + return nullptr; } -MemoryRegion::MemoryRegion(std::string name) +MemoryRegion::MemoryRegion(std::string name): + MemoryRegion() { - for(auto r: MemoryRegion::all()) { - if(r->name == name) { - *this = *r; - return; - } - } - throw std::invalid_argument("No standard region named '" + name + "'"); + for(auto r: MemoryRegion::all()) { + if(r->name == name) { + *this = *r; + return; + } + } } } /* namespace FxOS */ diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index ff419e5..cab3f48 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -33,13 +33,13 @@ bool PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) VirtualSpace &space = m_disasm.space(); uint32_t v = -1; - if(i->opsize == 2 && v.covers(addr, 2)) + if(i->opsize == 2 && space.covers(addr, 2)) { v = space.read_i16(addr); ca.value = DataValue(IntegerType::u32); ca.value.write(0, 4, v); } - if(i->opsize == 4 && v.covers(addr, 4)) + if(i->opsize == 4 && space.covers(addr, 4)) { v = space.read_i32(addr); ca.value = DataValue(IntegerType::u32);