From 29cd2815ec38e1f2a3818ce63458f7c88fc6ffda Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Mon, 28 Mar 2022 20:59:30 +0100 Subject: [PATCH] refactor disassembly infrastructure and passes --- include/fxos/disasm-passes/cfg.h | 51 ------ include/fxos/disasm-passes/pcrel.h | 25 --- include/fxos/disasm-passes/print.h | 86 ---------- include/fxos/disasm-passes/syscall.h | 31 ---- include/fxos/disassembly.h | 230 ++++++++++++------------- include/fxos/lang.h | 40 ++--- include/fxos/passes/cfg.h | 60 +++++++ include/fxos/passes/pcrel.h | 36 ++++ include/fxos/passes/print.h | 115 +++++++++++++ include/fxos/passes/syscall.h | 34 ++++ include/fxos/semantics.h | 6 +- include/fxos/vspace.h | 33 ++-- lib/ai/RelConst.cpp | 2 +- lib/disassembly.cpp | 204 ++++++++++------------ lib/lang.cpp | 108 ++++++------ lib/load-asm.l | 81 ++++----- lib/passes/cfg.cpp | 145 ++++++++-------- lib/passes/pcrel.cpp | 103 +++++------ lib/passes/print.cpp | 245 +++++++++++++-------------- lib/passes/syscall.cpp | 76 +++++---- lib/semantics.cpp | 4 +- lib/vspace.cpp | 15 +- shell/d.cpp | 8 +- shell/main.cpp | 2 + 24 files changed, 863 insertions(+), 877 deletions(-) delete mode 100644 include/fxos/disasm-passes/cfg.h delete mode 100644 include/fxos/disasm-passes/pcrel.h delete mode 100644 include/fxos/disasm-passes/print.h delete mode 100644 include/fxos/disasm-passes/syscall.h create mode 100644 include/fxos/passes/cfg.h create mode 100644 include/fxos/passes/pcrel.h create mode 100644 include/fxos/passes/print.h create mode 100644 include/fxos/passes/syscall.h diff --git a/include/fxos/disasm-passes/cfg.h b/include/fxos/disasm-passes/cfg.h deleted file mode 100644 index 3cfff91..0000000 --- a/include/fxos/disasm-passes/cfg.h +++ /dev/null @@ -1,51 +0,0 @@ -//--- -// fxos.disasm-passes.cfg: Control Flow Graph construction -// -// This pass explores functions and computes the [jmptarget] field of concrete -// instructions as it goes. This is required for other passes that work by -// traversing the CFG, such as the abstract interpretor. -// -// This is the main exploration pass. Other passes do not typically load new -// instructions from the underlying disassembly. Straightforward passes such as -// [print] iterate on instructions loaded by this pass. -// -// The main gimmick of this pass is to "resolve delay slots" by forcing down -// the properties of delayed instructions into their respective delay slots. -// For instance, in -// jmp @r0 -// mov #1, r4 -// the jump is delayed until after the move. To handle this, fxos makes the jmp -// a no-op and applies dual move-jump semantics to the mov below it. -// -// This could be tricky for the abstract interpreter because the jump target -// has to be computed with the environment before the jmp, which is not -// available when considering the mov. Luckily all delayed jumps are state -// no-ops so the state before the mov can be used instead. -// -// Note that jumping into a delay slot will activate the jump in fxos, which is -// not the actual behavior of the processor. fxos usually complains about the -// crazy compiler when this occurs. Note that if it happens but we don't know -// that it's a delay slot (ie. the instruction from above is never executed in -// the current function), then everything's fine. -// -// Take-home message: delay slots are a pain to analyze, so we get rid of them -// as soon as possible and proceed with normal semantics. -//--- - -#ifndef LIBFXOS_DISASM_PASSES_CFG_H -#define LIBFXOS_DISASM_PASSES_CFG_H - -#include - -namespace FxOS { - -class CfgPass: public DisassemblyPass -{ -public: - CfgPass(Disassembly &disasm); - bool analyze(uint32_t pc, ConcreteInstruction &inst) override; -}; - -} /* namespace FxOS */ - -#endif /* LIBFXOS_DISASM_PASSES_CFG_H */ diff --git a/include/fxos/disasm-passes/pcrel.h b/include/fxos/disasm-passes/pcrel.h deleted file mode 100644 index 183ffc5..0000000 --- a/include/fxos/disasm-passes/pcrel.h +++ /dev/null @@ -1,25 +0,0 @@ -//--- -// fxos.disasm-passes.pcrel: Resolution of PC-relative addresses -// -// This pass computes all PC-relatives addresses used in fixed-target jumps and -// in PC-relative mov instructions. It does so by setting the location of each -// PC-relative argument to the associated constant value. -//--- - -#ifndef FXOS_DISASM_PASSES_PCREL_H -#define FXOS_DISASM_PASSES_PCREL_H - -#include - -namespace FxOS { - -class PcrelPass: public InstructionDisassemblyPass -{ -public: - PcrelPass(Disassembly &disasm); - bool analyze(uint32_t pc, ConcreteInstruction &inst) override; -}; - -} /* namespace FxOS */ - -#endif /* FXOS_DISASM_PASSES_PCREL_H */ diff --git a/include/fxos/disasm-passes/print.h b/include/fxos/disasm-passes/print.h deleted file mode 100644 index 1da9ffa..0000000 --- a/include/fxos/disasm-passes/print.h +++ /dev/null @@ -1,86 +0,0 @@ -//--- -// fxos.disasm-passes.print: Concrete program printer -// -// This pass prints the program and some to all of its annotations, depending -// on the specified parameters. -//--- - -#ifndef LIBFXOS_DISASM_PASSES_PRINT_H -#define LIBFXOS_DISASM_PASSES_PRINT_H - -#include -#include - -namespace FxOS { - -class OS; - -class PrintPass: public InstructionDisassemblyPass -{ -public: - PrintPass(Disassembly &disasm); - bool analyze(uint32_t pc, ConcreteInstruction &inst) override; - - //--- - // Print pass parameters - //--- - - /* Promotion parameters. Default is always to append. */ - enum Promotion { - /* Never promote */ - Never=1, - /* Promote but keep the lower-level information */ - Append=0, - /* Promote and hide the lower-level information */ - Promote=2, - }; - - /** In the following, promote_x always means promote *to x* **/ - - /* In jumps, promote "pc+" to the target address */ - int promote_pcjump_loc; - /* In a PC-relative mov, promote "@(,pc)" to computed address */ - int promote_pcrel_loc; - /* In a PC-relative mov, promote address to pointed value */ - int promote_pcrel_value; - /* Promote an integer to a syscall number */ - int promote_syscall; - /* Promote a syscall number to a syscall name */ - int promote_syscallname; - /* Promote an integer to a symbol */ - int promote_symbol; - /* In a mova, promote "pc+" to the computed address */ - int promote_pcaddr_loc; - - /* TODO: More print pass parameters */ - -private: - /* Symbol tables to look up names */ - std::vector> m_symtables; - /* Query symbol tables, most recent first */ - std::optional symquery(Symbol::Type type, uint32_t value); - - /* OS for the target, to mark syscalls before instructions */ - OS *m_os; - - /* Last printed address (for ellipses) */ - uint32_t m_last_address; - - /** Internal promotion tree printers **/ - - void queue(std::string, bool = false); - void queue_flush(); - std::vector m_messages; - - void pcjumploc(ConcreteInstructionArg const &); - void pcrelloc(ConcreteInstructionArg const &); - void pcrelval(ConcreteInstructionArg const &); - void syscall(ConcreteInstructionArg const &); - void syscallname(ConcreteInstructionArg const &); - void symbol(ConcreteInstructionArg const &); - void pcaddrloc(ConcreteInstructionArg const &); -}; - -} /* namespace FxOS */ - -#endif /* LIBFXOS_DISASM_PASSES_PRINT_H */ diff --git a/include/fxos/disasm-passes/syscall.h b/include/fxos/disasm-passes/syscall.h deleted file mode 100644 index e638d63..0000000 --- a/include/fxos/disasm-passes/syscall.h +++ /dev/null @@ -1,31 +0,0 @@ -//--- -// fxos.disasm-passes.syscall: Detection and substitution of syscall addresses -// -// This passes looks for insruction arguments that evaluate to syscall -// addresses, and substitutes to that the syscall number and (hopefully) the -// syscall name will be shown by the print pass if it's available in the -// documentation. -//--- - -#ifndef FXOS_DISASM_PASSES_SYSCALL_H -#define FXOS_DISASM_PASSES_SYSCALL_H - -#include -#include - -namespace FxOS { - -class SyscallPass: public InstructionDisassemblyPass -{ -public: - SyscallPass(Disassembly &disasm, OS *os); - - bool analyze(uint32_t pc, ConcreteInstruction &inst) override; - -private: - OS *m_os; -}; - -} /* namespace FxOS */ - -#endif /* FXOS_DISASM_PASSES_SYSCALL_H */ diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 0fef5c7..a4fd710 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -1,9 +1,19 @@ -//--- -// fxos.disassembly: Disassembler +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/disassembly: Disassembler infrastructure +// +// +// +// TODO: Instead of defining every field for every argument of every +// disassembled instruction, set up a system of external annotations. //--- -#ifndef LIBFXOS_DISASSEMBLY_H -#define LIBFXOS_DISASSEMBLY_H +#ifndef FXOS_DISASSEMBLY_H +#define FXOS_DISASSEMBLY_H #include #include @@ -13,127 +23,99 @@ #include #include #include -#include -#include namespace FxOS { -/* Register an instruction. - @inst Instruction with [opcode] set to the binary pattern +/* Register an instruction. This is called by loader functions from the asm + table lexer. [inst] must have its opcode field set. */ +void register_instruction(AsmInstruction const &inst); - Typically this is called by loader functions from data tables describing - instructions with parameters, not manually. See . */ -void register_instruction(Instruction ins); - -/* Load an assembly instruction table for the disassembler. This function - directly feeds register_instruction() and does not return anything. */ +/* Lex and register an assembly instruction table. */ int load_instructions(Buffer const &file); -/* An argument for a concrete instruction. */ -struct ConcreteInstructionArg + +/* An argument for a disassembled instruction. */ +struct Argument { - ConcreteInstructionArg(); + Argument(); - //--- - // Data set by the pass and abstract interpreter - //--- + // Data set by the pass and abstract interpreter - /* Location in CPU or memory, if that can be determined */ - Location location; - /* Pointed value. If the exact value can't be determined, this object - evaluates to false. Sometimes, the type can be determined anyway, - and in this case its [type] attribute below is not null even though - the object evaluates to false. */ - DataValue value; + /* Location in CPU or memory, if it makes sense and can be determined */ + Location location; + /* Manipulated value. If no information can be obtained, this object + evaluates to false when converted to bool. */ + RelConst value; - //--- - // Data set by the pass - //--- + // Data set by the pass - /* If the value is a syscall address, the syscall's id */ - int syscall_id; + /* If the value is a syscall address, the syscall's id */ + int syscall_id; }; /* A loaded and annotated instruction. */ -struct ConcreteInstruction +struct Instruction { - /* Build from instruction, cannot be nullptr. */ - ConcreteInstruction(Instruction const *inst); - /* Build from opcode, if instruction could not be decoded. */ - ConcreteInstruction(uint16_t opcode); + /* Build from instruction, cannot be nullptr. */ + Instruction(AsmInstruction const *inst); + /* Build from opcode, if instruction could not be decoded. */ + Instruction(uint16_t opcode); - /* What instruction this is. Note that this does not determine all the - properties below. Placement and delay slots greatly alter them. - This pointer is nullptr if the instruction could not be decoded. */ - Instruction const *inst; + /* What instruction this is. Note that this does not determine all the + properties below. Placement and delay slots greatly alter them. + This pointer is nullptr if the instruction could not be decoded. */ + AsmInstruction const *inst; - /* Argument information (contains data set by several passes) */ - ConcreteInstructionArg args[2]; + /* Argument information (contains data set by several passes) */ + Argument args[2]; - /* Opcode, valid only if inst==nullptr */ - uint16_t opcode; + /* Opcode, valid only if inst==nullptr */ + uint16_t opcode; - //--- - // Data set by the cfg pass - //--- + // Data set by the cfg pass - /* Whether this instruction is a leader. This is always set by another - instruction jumping into this one. */ - bool leader; - /* Whether this instruction is in a delay slot. This is always set by - the preceding delayed instruction. */ - bool delayslot; + /* Whether this instruction is a leader. This is always set by another + instruction jumping into this one. */ + bool leader; + /* Whether this instruction is in a delay slot. This is always set by + the preceding delayed instruction. */ + bool delayslot; - /* Whether this instruction is: - -> Terminal, ie. has no successors and is the end of the function. - -> An unconditional jump of target [jmptarget]. This is the case for eg. - bt, but not bt.s; rather the successor of bt.s is the jump. - -> A conditional jump that can hit [jmptarget] and pc+2. - If delayslot==false, these attributes are set when analyzing this - instruction. If delayslot==true, they are set when the preceding - delayed instruction is analyzed. */ - bool terminal; - bool jump; - bool condjump; + /* Whether this instruction is: + -> Terminal, ie. has no successors and is the end of the function. + -> An unconditional jump of target [jmptarget]. This is the case for eg. + bt, but not bt.s; rather the successor of bt.s is the jump. + -> A conditional jump that can hit [jmptarget] and pc+2. + If delayslot==false, these attributes are set when analyzing this + instruction. If delayslot==true, they are set when the preceding + delayed instruction is analyzed. */ + bool terminal; + bool jump; + bool condjump; - /* The jump target, used if jump==true or condjump==true. */ - uint32_t jmptarget; + /* The jump target, used if jump==true or condjump==true. */ + uint32_t jmptarget; }; /* Disassembly interface that automatically loads code from a target */ -class Disassembly +struct Disassembly { -public: - Disassembly(VirtualSpace &space); + Disassembly(VirtualSpace &space); - /* Check whether an instruction has been visited so far */ - bool hasins(uint32_t pc); - /* Get the minimum and maximum loaded instruction addresses */ - uint32_t minpc(); - uint32_t maxpc(); + /* Check whether an instruction has been visited so far */ + bool hasins(uint32_t pc); + /* Get the minimum and maximum loaded instruction addresses */ + uint32_t minpc(); + uint32_t maxpc(); - /* Get the storage to any concrete instruction. The instruction will be - loaded and initialized if it had not been read before. */ - ConcreteInstruction &readins(uint32_t pc); + /* Get the storage to any concrete instruction. The instruction will be + loaded and initialized if it had not been read before. */ + Instruction &readins(uint32_t pc); - /* For other access patterns (careful with write accesses!) */ - std::map &instructions() noexcept { - return m_instructions; - } - - /* Access to memory */ - VirtualSpace &space() noexcept { - return m_space; - } - - /* List of passes that have run so far */ - std::set passes; - -private: - /* Underlying target */ - VirtualSpace &m_space; - /* Loaded instructions by address */ - std::map m_instructions; + /* For other access patterns */ + std::map instructions; + /* Underlying target */ + VirtualSpace &space; }; //--- @@ -143,51 +125,51 @@ private: class DisassemblyPass { public: - DisassemblyPass(Disassembly &disasm, std::string name=""); + DisassemblyPass(Disassembly &disasm, std::string name=""); - /* Analyze a single instruction, probably updating the annotations and - the state of the pass itself. */ - virtual bool analyze(uint32_t pc, ConcreteInstruction &inst) = 0; + /* Analyze a single instruction, probably updating the annotations and + the state of the pass itself. */ + virtual bool analyze(uint32_t pc, Instruction &inst) = 0; - /* Run the pass from the given entry point */ - virtual bool run(uint32_t entry_pc); + /* Run the pass from the given entry point */ + virtual bool run(uint32_t entry_pc); protected: - /* Add an instruction to the queue to analyze next */ - void enqueue(uint32_t pc); - /* Add the next loaded instruction in address space */ - void enqueue_next(uint32_t pc); - /* Enqueue the unseen successors of this instruction */ - void enqueue_unseen_successors(uint32_t pc, ConcreteInstruction &inst); - /* Enqueue all the success of this instruction */ - void enqueue_all_successors(uint32_t pc, ConcreteInstruction &inst); + /* Add an instruction to the queue to analyze next */ + void enqueue(uint32_t pc); + /* Add the next loaded instruction in address space */ + void enqueue_next(uint32_t pc); + /* Enqueue the unseen successors of this instruction */ + void enqueue_unseen_successors(uint32_t pc, Instruction &inst); + /* Enqueue all the success of this instruction */ + void enqueue_all_successors(uint32_t pc, Instruction &inst); - /* Underlying disassembly */ - Disassembly &m_disasm; + /* Underlying disassembly */ + Disassembly &m_disasm; private: - /* Blocks to visit next, ordered for uniqueness */ - std::set m_next; - std::priority_queue m_queue; + /* Blocks to visit next, ordered for uniqueness */ + std::set m_next; + std::priority_queue m_queue; - /* Visited blocks */ - std::set m_seen; + /* Visited blocks */ + std::set m_seen; - /* Name of pass */ - std::string m_name; + /* Name of pass */ + std::string m_name; }; /* A disassembly pass that observes each instruction independently */ class InstructionDisassemblyPass: public DisassemblyPass { public: - InstructionDisassemblyPass(Disassembly &disasm, std::string name=""); + InstructionDisassemblyPass(Disassembly &disasm, std::string name=""); - /* Runs the pass from the first instruction currently loaded, all the - way down to the bottom, as if always using enqueue_next(). */ - virtual bool run(); + /* Runs the pass from the first instruction currently loaded, all the + way down to the bottom, as if always using enqueue_next(). */ + virtual bool run(); }; } /* namespace FxOS */ -#endif /* LIBFXOS_DISASSEMBLY_H */ +#endif /* FXOS_DISASSEMBLY_H */ diff --git a/include/fxos/lang.h b/include/fxos/lang.h index 7822fe4..577ab98 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -81,7 +81,7 @@ private: }; /* Addressing modes for arguments */ -struct Argument +struct AsmArgument { /* Various addressing modes in the language */ enum Kind: int8_t { @@ -93,11 +93,11 @@ struct Argument ArrayDeref, /* @(r0,rn) or @(r0,gbr) */ PcRel, /* @(disp,pc) with 4-alignment correction */ PcJump, /* pc+disp */ - PcAddr, /* pc+disp with special delayed slot semantics */ + PcAddr, /* pc+disp (the address itself, for mova) */ Imm, /* #imm */ }; - Argument() = default; + AsmArgument() = default; /* String representation */ std::string str() const; @@ -119,28 +119,28 @@ struct Argument }; }; -/* Argument constructors */ +/* AsmArgument constructors */ -Argument Argument_Reg(CpuRegister base); -Argument Argument_Deref(CpuRegister base); -Argument Argument_PostInc(CpuRegister base); -Argument Argument_PreDec(CpuRegister base); -Argument Argument_StructDeref(int disp, int opsize, CpuRegister base); -Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base); -Argument Argument_PcRel(int disp, int opsize); -Argument Argument_PcJump(int disp); -Argument Argument_PcAddr(int disp); -Argument Argument_Imm(int imm); +AsmArgument AsmArgument_Reg(CpuRegister base); +AsmArgument AsmArgument_Deref(CpuRegister base); +AsmArgument AsmArgument_PostInc(CpuRegister base); +AsmArgument AsmArgument_PreDec(CpuRegister base); +AsmArgument AsmArgument_StructDeref(int disp, int opsize, CpuRegister base); +AsmArgument AsmArgument_ArrayDeref(CpuRegister index, CpuRegister base); +AsmArgument AsmArgument_PcRel(int disp, int opsize); +AsmArgument AsmArgument_PcJump(int disp); +AsmArgument AsmArgument_PcAddr(int disp); +AsmArgument AsmArgument_Imm(int imm); /* Assembler instruction */ -struct Instruction +struct AsmInstruction { - Instruction() = default; + AsmInstruction() = default; /* Construct with one or several arguments */ - Instruction(char const *mnemonic); - Instruction(char const *mnemonic, Argument arg); - Instruction(char const *mnemonic, Argument arg1, Argument arg2); + AsmInstruction(char const *mnemonic); + AsmInstruction(char const *mnemonic, AsmArgument arg); + AsmInstruction(char const *mnemonic, AsmArgument arg1, AsmArgument arg2); /* Original opcode. Initialized to 0 when unset, which is an invalid instruction by design. */ @@ -153,7 +153,7 @@ struct Instruction /* Mnemonic **without the size indicator** */ char mnemonic[12]; /* Arguments (up to 2) */ - Argument args[2]; + AsmArgument args[2]; //--- // Instruction classes diff --git a/include/fxos/passes/cfg.h b/include/fxos/passes/cfg.h new file mode 100644 index 0000000..3202397 --- /dev/null +++ b/include/fxos/passes/cfg.h @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/passes/cfg: Control Flow Graph construction +// +// This pass explores functions by loading every instruction's potential +// successor into the diassembly store. It also sets the [jmptarget] field of +// the Instructions as it goes, allowing other passes to traverse the (somewhat +// implicit) CFG. +// +// This is the main exploration pass; other passes do not typically load new +// instructions from the underlying disassembly. Straightforward passes such as +// [print] iterate on instructions loaded by this pass. +// +// The main problem that this pass has to deal with is delay slots. These are +// pretty tricky to deal with; for instance, in +// +// bra pc+120 +// mov #1, r4 +// +// the CPU will run [mov #1, r4] while performing the branch to pc+120 in order +// to fill an otherwise-unfillable pipeline cycle. This is annoying for all +// kinds of reasons, and fxos handles this by acting as if the mov itself had +// pc+120 as an uncondition successor. +// +// This could be tricky for the abstract interpreter because the jump target +// has to be computed using the state at the jump instruction, not the one at +// the delay slot. Luckily all delayed jumps are no-ops in termsof state, so +// the confusion has no effect. +// +// Note that jumping into a delay slot will activate the jump in fxos, which is +// not the actual behavior of the processor. I don't believe any compiler does +// this kind of things (most are not inherently designed for delay slots +// anyway). If such an instance is found, fxos will throw an exception and give +// up to make sure no analysis pass returns invalid results. +// +// Take-home message: delay slots are a pain to analyze, so we get rid of them +// as soon as possible and proceed with normal semantics. +//--- + +#ifndef FXOS_PASSES_CFG_H +#define FXOS_PASSES_CFG_H + +#include + +namespace FxOS { + +class CfgPass: public DisassemblyPass +{ +public: + CfgPass(Disassembly &disasm); + bool analyze(uint32_t pc, Instruction &inst) override; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_PASSES_CFG_H */ diff --git a/include/fxos/passes/pcrel.h b/include/fxos/passes/pcrel.h new file mode 100644 index 0000000..8137987 --- /dev/null +++ b/include/fxos/passes/pcrel.h @@ -0,0 +1,36 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/passes/pcrel: Resolution of PC-relative addresses +// +// This passes computes PC-relative addresses in statically-determined jumps +// and in PC-relative mov instructions. It determines the target address and, +// when applicable, the value read from memory. +// +// When an instruction accesses memory, the argument's [location] field holds +// the target address and the [value] field holds the value. When an +// instruction computes an address for a jump or for storage (mova) then both +// fields hold the target address (as the "value" obtained by the instruction +// is the address itself). +//--- + +#ifndef FXOS_PASSES_PCREL_H +#define FXOS_PASSES_PCREL_H + +#include + +namespace FxOS { + +class PcrelPass: public InstructionDisassemblyPass +{ +public: + PcrelPass(Disassembly &disasm); + bool analyze(uint32_t pc, Instruction &inst) override; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_PASSES_PCREL_H */ diff --git a/include/fxos/passes/print.h b/include/fxos/passes/print.h new file mode 100644 index 0000000..939468b --- /dev/null +++ b/include/fxos/passes/print.h @@ -0,0 +1,115 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/passes/print: Disassembly printer +// +// This pass prints the program and adds annotations depending on a number of +// customizable boolean parameters. +// +// Data for an instruction, and arguments in particular, might have a large +// number of equivalent representations, depending on how much information was +// added during disassembly. +// +// The main mechanic of this pass is to define *promotions* which allow high- +// level information to be added or to replace low-level data. For instance, an +// @(disp,pc) argument could promote to a statically-computed address, which +// could promote to its known pointed value in the case of a read, which could +// itself promote to a symbol or syscall number. +// +// Each promotion opportunity has 3 possible settings: +// - Never: the higher-level information is not shown. +// - Append: the higher-level information is shown after the low-level one. +// - Promote: the higher-level information replaces the low-level one. +// +// For example, by default @(disp,pc) is set to Promote to statically-computed +// addresses, their values, and syscall numbers, but syscall names are only set +// to Append. Therefore, a mov.l @(disp,pc) which loads the address of syscall +// %ace on an fx-series model (which is memcp) might show as +// +// mov.l %ace memcmp, r3 +// +// where the first element has been promoted twice and the second appended. +//--- + +#ifndef FXOS_PASSES_PRINT_H +#define FXOS_PASSES_PRINT_H + +#include +#include + +namespace FxOS { + +class OS; + +class PrintPass: public InstructionDisassemblyPass +{ +public: + PrintPass(Disassembly &disasm); + bool analyze(uint32_t pc, Instruction &inst) override; + + //--- + // Print pass parameters + //--- + + /* Promotion parameters. Default is always to append. */ + enum Promotion { + /* Never promote */ + Never=1, + /* Promote but keep the lower-level information */ + Append=0, + /* Promote and hide the lower-level information */ + Promote=2, + }; + + /** In the following, promote_x always means promote *to x* **/ + + /* In jumps, promote "pc+" to the target address */ + int promote_pcjump_loc; + /* In a PC-relative mov, promote "@(,pc)" to computed address */ + int promote_pcrel_loc; + /* In a PC-relative mov, promote address to pointed value */ + int promote_pcrel_value; + /* Promote an integer to a syscall number */ + int promote_syscall; + /* Promote a syscall number to a syscall name */ + int promote_syscallname; + /* Promote an integer to a symbol */ + int promote_symbol; + /* In a mova, promote "pc+" to the computed address */ + int promote_pcaddr_loc; + + /* TODO: More print pass parameters */ + +private: + /* Symbol tables to look up names */ + std::vector> m_symtables; + /* Query symbol tables, most recent first */ + std::optional symquery(Symbol::Type type, uint32_t value); + + /* OS for the target, to mark syscalls before instructions */ + OS *m_os; + + /* Last printed address (for ellipses) */ + uint32_t m_last_address; + + /** Internal promotion tree printers **/ + + void queue(std::string, bool = false); + void queue_flush(); + std::vector m_messages; + + void pcjumploc(Argument const &); + void pcrelloc(Argument const &); + void pcrelval(Argument const &); + void syscall(Argument const &); + void syscallname(Argument const &); + void symbol(Argument const &); + void pcaddrloc(Argument const &); +}; + +} /* namespace FxOS */ + +#endif /* FXOS_PASSES_PRINT_H */ diff --git a/include/fxos/passes/syscall.h b/include/fxos/passes/syscall.h new file mode 100644 index 0000000..e7fcddf --- /dev/null +++ b/include/fxos/passes/syscall.h @@ -0,0 +1,34 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/passes/syscall: Detection and substitution of syscall addresses +// +// This function identifies arguments that refer to syscall addresses and +// annotate them with the syscall number. No name resolution is performed at +// this point, that happens in the print pass. +//--- + +#ifndef FXOS_PASSES_SYSCALL_H +#define FXOS_PASSES_SYSCALL_H + +#include +#include + +namespace FxOS { + +class SyscallPass: public InstructionDisassemblyPass +{ +public: + SyscallPass(Disassembly &disasm, OS *os); + bool analyze(uint32_t pc, Instruction &inst) override; + +private: + OS *m_os; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_PASSES_SYSCALL_H */ diff --git a/include/fxos/semantics.h b/include/fxos/semantics.h index d31b64b..396dee6 100644 --- a/include/fxos/semantics.h +++ b/include/fxos/semantics.h @@ -46,9 +46,9 @@ struct BaseType /* Type size in bytes, as would be returned by sizeof(). Must be 1, 2 or 4 for integral types and bit fields. Cannot be 0 because all considered types are fixed-size and finite. */ - size_t size; + int size; /* Type alignment, can only be 1, 2 or 4 */ - size_t align; + int align; }; /* Integer type; of byte, word or longword size. Plus signedness. This kind is @@ -127,7 +127,7 @@ public: BitfieldType const &bitfield() const; ArrayType const &array() const; StringType const &string() const; - StructType const &structs() const; + StructType const &structure() const; /* Converting constructors from any of these types */ diff --git a/include/fxos/vspace.h b/include/fxos/vspace.h index 4281a0f..0bd6887 100644 --- a/include/fxos/vspace.h +++ b/include/fxos/vspace.h @@ -6,10 +6,14 @@ //---------------------------------------------------------------------------// // fxos/vspace: Virtual address space with loaded code and analyses // -// This is the main structure/entry point of fxos. A Virtu - -//--- -// fxos.vspace: A virtual space where code is being studied +// This is the main structure/entry point of fxos. A virtual space emulates the +// virtual memory of the MPU and can have files loaded ("bound") at chosen +// positions. Usually, there is one virtual space for each OS being studied. +// +// Technically, each virtual space should also come with platform information, +// but currently only the MPU is specified and it's unused. +// +// Virtual spaces also centralize information related to analyses. //--- #ifndef FXOS_VSPACE_H @@ -42,6 +46,7 @@ struct Binding: public AbstractMemory /* Underlying buffer (copy of the original one) */ Buffer buffer; + // - AbstractMemory interface char const *translate_dynamic(uint32_t addr, int *size) override; }; @@ -58,8 +63,8 @@ public: /* List of bindings (most recent first) */ std::vector bindings; - /* OS analysis; performed on-demand. Returns the new or cached OS analysis, - and nullptr only if OS cannot be analyzed */ + /* OS analysis; created on-demand. Returns the new or cached OS analysis, + nullptr OS analysis fails. */ OS *os_analysis(bool force=false); /* Cursor position, used by the interactive shell */ @@ -68,19 +73,13 @@ public: /* Symbol table */ SymbolTable symbols; - /* Bind a memory region from a buffer. The region can either be - standard (see ) or custom. - - If several loaded regions overlap on some addresses, *the last - loaded region will be used*. Thus, new regions can be loaded to - selectively override parts of the target. - - An error is raised if the buffer is smaller than the region being - bound. */ + /* Bind a buffer to a standard or custom memory region. Functions in the + library tend to assume that bindings don't overlap and are not + immediately consecutive in memory. If the buffer is smaller than the + region, it is 0-padded to the proper size. */ void bind_region(MemoryRegion const ®ion, Buffer const &buffer); - /* Implementation of AbstractMemory primitives */ - + // - AbstractMemory interface char const *translate_dynamic(uint32_t addr, int *size) override; private: diff --git a/lib/ai/RelConst.cpp b/lib/ai/RelConst.cpp index 1feb246..c6ab7fb 100644 --- a/lib/ai/RelConst.cpp +++ b/lib/ai/RelConst.cpp @@ -302,7 +302,7 @@ std::string RelConst::str() const noexcept return str + format("%d", v); } else { - return str + format("%08x", uval); + return str + format("0x%08x", uval); } } diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 3189f25..c935da4 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -1,114 +1,104 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + #include -#include +#include #include -#include #include namespace FxOS { /* Instruction map */ -static std::array,65536> insmap; +static std::array, 65536> insmap; -/* Register an instruction at a given opcode. */ - -void register_instruction(Instruction ins) +void register_instruction(AsmInstruction const &ins) { - uint16_t opcode = ins.opcode; + uint16_t opcode = ins.opcode; - if(insmap[opcode]) - { - throw std::logic_error("opcode collision"); - } - - insmap[opcode] = ins; + if(insmap[opcode]) + FxOS_log(ERR, "opcode collision between a %s and a %s at %04x", + insmap[opcode]->mnemonic, ins.mnemonic, opcode); + else + insmap[opcode] = ins; } //--- // Concrete (instantiated) arguments and instructions //--- -ConcreteInstructionArg::ConcreteInstructionArg(): - value(), syscall_id(-1) +Argument::Argument() { - location = RelConstDomain().bottom(); + location = RelConstDomain().bottom(); + value = location; + syscall_id = -1; } -ConcreteInstruction::ConcreteInstruction(Instruction const *inst): - inst(inst), args(), opcode(), - leader(false), delayslot(false), - terminal(false), jump(false), condjump(false), - jmptarget(0xffffffff) +Instruction::Instruction(AsmInstruction const *inst): + inst {inst}, args {}, opcode {inst->opcode}, + leader {false}, delayslot {false}, terminal {false}, jump {false}, + condjump {false}, jmptarget {0xffffffff} { - if(!inst) throw std::logic_error( - "ConcreteInstruction built from a null pointer"); } -ConcreteInstruction::ConcreteInstruction(uint16_t opcode): - inst(nullptr), args(), opcode(opcode), - leader(false), delayslot(false), - terminal(false), jump(false), condjump(false), - jmptarget(0xffffffff) +Instruction::Instruction(uint16_t opcode): + inst {nullptr}, args {}, opcode {opcode}, + leader {false}, delayslot {false}, terminal {false}, jump {false}, + condjump {false}, jmptarget {0xffffffff} { - inst = nullptr; } //--- // Disassembler interface //--- -Disassembly::Disassembly(VirtualSpace &space): - passes {}, m_space {space}, m_instructions {} +Disassembly::Disassembly(VirtualSpace &_space): + instructions {}, space {_space} { } bool Disassembly::hasins(uint32_t pc) { - return m_instructions.count(pc) > 0; + return this->instructions.count(pc) > 0; } uint32_t Disassembly::minpc() { - uint32_t min = 0xffffffff; - - for(auto &it: m_instructions) - { - if(it.first < min) min = it.first; - } - - return min; + if(this->instructions.empty()) + return 0xffffffff; + return this->instructions.cbegin()->first; } uint32_t Disassembly::maxpc() { - uint32_t max = 0x00000000; - - for(auto &it: m_instructions) - { - if(it.first > max) max = it.first; - } - - return max; + if(this->instructions.empty()) + return 0xffffffff; + return this->instructions.crbegin()->first; } -ConcreteInstruction &Disassembly::readins(uint32_t pc) +Instruction &Disassembly::readins(uint32_t pc) { - if(pc & 1) throw std::runtime_error("Disassembly::readins at odd PC"); + if(pc & 1) { + FxOS_log(ERR, "reading instruction for disassembly at %08x", pc); + pc &= -2; + } - try - { - return m_instructions.at(pc); - } - catch(std::out_of_range &e) - { - uint16_t opcode = m_space.read_u16(pc); - ConcreteInstruction ci(opcode); + try { + return this->instructions.at(pc); + } + catch(std::out_of_range &e) { + uint16_t opcode = this->space.read_u16(pc); + Instruction ci(opcode); - if(insmap[opcode]) - ci = ConcreteInstruction(&*insmap[opcode]); + if(insmap[opcode]) + ci = Instruction(&*insmap[opcode]); - m_instructions.emplace(pc, ci); - return m_instructions.at(pc); - } + this->instructions.emplace(pc, ci); + return this->instructions.at(pc); + } } //--- @@ -116,74 +106,63 @@ ConcreteInstruction &Disassembly::readins(uint32_t pc) //--- DisassemblyPass::DisassemblyPass(Disassembly &disasm, std::string name): - m_disasm(disasm), m_name(name) + m_disasm {disasm}, m_name {name} { } void DisassemblyPass::enqueue(uint32_t pc) { - if(m_next.count(pc)) return; + if(m_next.count(pc)) + return; - m_next.insert(pc); - m_queue.push(pc); + m_next.insert(pc); + m_queue.push(pc); } void DisassemblyPass::enqueue_next(uint32_t pc) { - /* TODO: DisassemblyPass::enqueue_next is inefficient */ - do pc += 2; - while(!m_disasm.hasins(pc)); + /* TODO: DisassemblyPass::enqueue_next is inefficient */ + do pc += 2; + while(!m_disasm.hasins(pc)); - enqueue(pc); + enqueue(pc); } -void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, - ConcreteInstruction &inst) +void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, Instruction &i) { - if(!inst.terminal && !inst.jump) - { - if(pc == 0x80000078) printf("t%d j%d\n", inst.terminal, inst.jump); - if(!m_seen.count(pc + 2)) enqueue(pc + 2); - } - if(inst.jump || inst.condjump) - { - if(!m_seen.count(inst.jmptarget)) enqueue(inst.jmptarget); - } + if(!i.terminal && !i.jump) { + if(!m_seen.count(pc + 2)) enqueue(pc + 2); + } + if(i.jump || i.condjump) { + if(!m_seen.count(i.jmptarget)) enqueue(i.jmptarget); + } } -void DisassemblyPass::enqueue_all_successors(uint32_t pc, - ConcreteInstruction &inst) +void DisassemblyPass::enqueue_all_successors(uint32_t pc, Instruction &i) { - if(!inst.terminal && !inst.jump) - { - enqueue(pc + 2); - } - if(inst.jump || inst.condjump) - { - enqueue(inst.jmptarget); - } + if(!i.terminal && !i.jump) + enqueue(pc + 2); + if(i.jump || i.condjump) + enqueue(i.jmptarget); } bool DisassemblyPass::run(uint32_t entry_pc) { - enqueue(entry_pc); + enqueue(entry_pc); - while(m_queue.size()) - { - uint32_t pc = m_queue.top(); + while(m_queue.size()) { + uint32_t pc = m_queue.top(); - m_queue.pop(); - m_next.erase(m_next.find(pc)); + m_queue.pop(); + m_next.erase(m_next.find(pc)); - ConcreteInstruction &ci = m_disasm.readins(pc); - if(!analyze(pc, ci)) - return false; + Instruction &ci = m_disasm.readins(pc); + if(!analyze(pc, ci)) + return false; - m_seen.insert(pc); - } - - if(m_name != "") m_disasm.passes.insert(m_name); - return true; + m_seen.insert(pc); + } + return true; } //--- @@ -191,18 +170,17 @@ bool DisassemblyPass::run(uint32_t entry_pc) //--- InstructionDisassemblyPass::InstructionDisassemblyPass(Disassembly &disasm, - std::string name): DisassemblyPass(disasm, name) + std::string name): DisassemblyPass(disasm, name) { } bool InstructionDisassemblyPass::run() { - for(auto &pair: m_disasm.instructions()) - { - if(!analyze(pair.first, pair.second)) - return false; - } - return true; + for(auto &pair: m_disasm.instructions) { + if(!analyze(pair.first, pair.second)) + return false; + } + return true; } } /* namespace FxOS */ diff --git a/lib/lang.cpp b/lib/lang.cpp index cb11616..0e16f34 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -58,115 +58,115 @@ std::string CpuRegister::str() const noexcept /* External constructors */ -Argument Argument_Reg(CpuRegister base) +AsmArgument AsmArgument_Reg(CpuRegister base) { - Argument arg; - arg.kind = Argument::Reg; + AsmArgument arg; + arg.kind = AsmArgument::Reg; arg.base = base; return arg; } -Argument Argument_Deref(CpuRegister base) +AsmArgument AsmArgument_Deref(CpuRegister base) { - Argument arg; - arg.kind = Argument::Deref; + AsmArgument arg; + arg.kind = AsmArgument::Deref; arg.base = base; return arg; } -Argument Argument_PostInc(CpuRegister base) +AsmArgument AsmArgument_PostInc(CpuRegister base) { - Argument arg; - arg.kind = Argument::PostInc; + AsmArgument arg; + arg.kind = AsmArgument::PostInc; arg.base = base; return arg; } -Argument Argument_PreDec(CpuRegister base) +AsmArgument AsmArgument_PreDec(CpuRegister base) { - Argument arg; - arg.kind = Argument::PreDec; + AsmArgument arg; + arg.kind = AsmArgument::PreDec; arg.base = base; return arg; } -Argument Argument_StructDeref(int disp, int opsize, CpuRegister base) +AsmArgument AsmArgument_StructDeref(int disp, int opsize, CpuRegister base) { - Argument arg; - arg.kind = Argument::StructDeref; + AsmArgument arg; + arg.kind = AsmArgument::StructDeref; arg.base = base; arg.disp = disp; arg.opsize = opsize; return arg; } -Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base) +AsmArgument AsmArgument_ArrayDeref(CpuRegister index, CpuRegister base) { - Argument arg; - arg.kind = Argument::ArrayDeref; + AsmArgument arg; + arg.kind = AsmArgument::ArrayDeref; arg.base = base; arg.index = index; return arg; } -Argument Argument_PcRel(int disp, int opsize) +AsmArgument AsmArgument_PcRel(int disp, int opsize) { - Argument arg; - arg.kind = Argument::PcRel; + AsmArgument arg; + arg.kind = AsmArgument::PcRel; arg.disp = disp; arg.opsize = opsize; return arg; } -Argument Argument_PcJump(int disp) +AsmArgument AsmArgument_PcJump(int disp) { - Argument arg; - arg.kind = Argument::PcJump; + AsmArgument arg; + arg.kind = AsmArgument::PcJump; arg.disp = disp; return arg; } -Argument Argument_PcAddr(int disp) +AsmArgument AsmArgument_PcAddr(int disp) { - Argument arg; - arg.kind = Argument::PcAddr; + AsmArgument arg; + arg.kind = AsmArgument::PcAddr; arg.disp = disp; return arg; } -Argument Argument_Imm(int imm) +AsmArgument AsmArgument_Imm(int imm) { - Argument arg; - arg.kind = Argument::Imm; + AsmArgument arg; + arg.kind = AsmArgument::Imm; arg.imm = imm; return arg; } /* String representation */ -std::string Argument::str() const +std::string AsmArgument::str() const { switch(kind) { - case Argument::Reg: + case AsmArgument::Reg: return base.str(); - case Argument::Deref: + case AsmArgument::Deref: return format("@%s", base.str()); - case Argument::PostInc: + case AsmArgument::PostInc: return format("@%s+", base.str()); - case Argument::PreDec: + case AsmArgument::PreDec: return format("@-%s", base.str()); - case Argument::StructDeref: + case AsmArgument::StructDeref: return format("@(%d,%s)", disp, base.str().c_str()); - case Argument::ArrayDeref: + case AsmArgument::ArrayDeref: return format("@(%s,%s)", index.str().c_str(), base.str().c_str()); - case Argument::PcRel: + case AsmArgument::PcRel: return format("@(%d,pc)", disp); - case Argument::PcJump: + case AsmArgument::PcJump: return format("pc+%d", disp); - case Argument::PcAddr: + case AsmArgument::PcAddr: return format("pc+%u", disp); - case Argument::Imm: + case AsmArgument::Imm: return format("#%d", imm); default: return "(invalid)"; @@ -177,7 +177,7 @@ std::string Argument::str() const // Instruction management //--- -Instruction::Instruction(char const *mn): +AsmInstruction::AsmInstruction(char const *mn): opcode {0}, opsize {0}, arg_count {0} { int len = strlen(mn); @@ -201,15 +201,16 @@ Instruction::Instruction(char const *mn): mnemonic[len] = 0; } -Instruction::Instruction(char const *mn, Argument arg): - Instruction(mn) +AsmInstruction::AsmInstruction(char const *mn, AsmArgument arg): + AsmInstruction(mn) { args[0] = arg; arg_count = 1; } -Instruction::Instruction(char const *mn, Argument arg1, Argument arg2): - Instruction(mn) +AsmInstruction::AsmInstruction(char const *mn, AsmArgument arg1, + AsmArgument arg2): + AsmInstruction(mn) { args[0] = arg1; args[1] = arg2; @@ -220,13 +221,13 @@ Instruction::Instruction(char const *mn, Argument arg1, Argument arg2): // Instruction classes //--- -bool Instruction::isterminal() const noexcept +bool AsmInstruction::isterminal() const noexcept { if(!strcmp(mnemonic, "rte") || !strcmp(mnemonic, "rts")) return true; /* Also jmp @rn which is regarded as a terminal call */ - if(!strcmp(mnemonic,"jmp") && args[0].kind == Argument::Deref) + if(!strcmp(mnemonic,"jmp") && args[0].kind == AsmArgument::Deref) return true; /* Same for braf because we can't analyse further */ @@ -236,12 +237,12 @@ bool Instruction::isterminal() const noexcept return false; } -bool Instruction::isjump() const noexcept +bool AsmInstruction::isjump() const noexcept { return !strcmp(mnemonic, "bra"); } -bool Instruction::iscondjump() const noexcept +bool AsmInstruction::iscondjump() const noexcept { char const *v[] = { "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s", NULL, @@ -254,7 +255,7 @@ bool Instruction::iscondjump() const noexcept return false; } -bool Instruction::isdelayed() const noexcept +bool AsmInstruction::isdelayed() const noexcept { char const *v[] = { "rte", "rts", "jmp", "jsr", "bra", "braf", "bsr", "bsrf", @@ -268,9 +269,10 @@ bool Instruction::isdelayed() const noexcept return false; } -bool Instruction::isvaliddelayslot() const noexcept +bool AsmInstruction::isvaliddelayslot() const noexcept { - return !isdelayed() && !isterminal() && !isjump() && !iscondjump(); + return !isdelayed() && !isterminal() && !isjump() && !iscondjump() + && strcmp(this->mnemonic, "mova") != 0; } } /* namespace FxOS */ diff --git a/lib/load-asm.l b/lib/load-asm.l index 4f666c6..1eddd77 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -166,7 +166,7 @@ static Pattern make_pattern(char const *code) @opsize Operation size indicated in the mnemonic @m @n @d @i Instruction instance Returns a semantic FxOS::Argument. */ -static Argument make_arg(int token, int opsize, int m, int n, int d, int i) +static AsmArgument make_arg(int token, int opsize, int m, int n, int d, int i) { using Reg = CpuRegister; static Reg general_purpose[16] = { @@ -185,50 +185,51 @@ static Argument make_arg(int token, int opsize, int m, int n, int d, int i) int32_t i8 = (int8_t)i; switch(token) { - case R0: return Argument_Reg(Reg::R0); - case RN: return Argument_Reg(Rn); - case RM: return Argument_Reg(Rm); - case R0_BANK: return Argument_Reg(Reg::R0B); - case R1_BANK: return Argument_Reg(Reg::R1B); - case R2_BANK: return Argument_Reg(Reg::R2B); - case R3_BANK: return Argument_Reg(Reg::R3B); - case R4_BANK: return Argument_Reg(Reg::R4B); - case R5_BANK: return Argument_Reg(Reg::R5B); - case R6_BANK: return Argument_Reg(Reg::R6B); - case R7_BANK: return Argument_Reg(Reg::R7B); - case SR: return Argument_Reg(Reg::SR); - case PR: return Argument_Reg(Reg::PR); - case GBR: return Argument_Reg(Reg::GBR); - case VBR: return Argument_Reg(Reg::VBR); - case DBR: return Argument_Reg(Reg::DBR); - case SSR: return Argument_Reg(Reg::SSR); - case SPC: return Argument_Reg(Reg::SPC); - case SGR: return Argument_Reg(Reg::SGR); - case MACH: return Argument_Reg(Reg::MACH); - case MACL: return Argument_Reg(Reg::MACL); - case JUMP8: return Argument_PcJump(d8 * 2); - case JUMP12: return Argument_PcJump(d12 * 2); - case DPC: return Argument_PcAddr(d * 4); - case IMM: return Argument_Imm(i8); - case AT_RN: return Argument_Deref(Rn); - case AT_RM: return Argument_Deref(Rm); - case AT_RMP: return Argument_PostInc(Rm); - case AT_RNP: return Argument_PostInc(Rn); - case AT_MRN: return Argument_PreDec(Rn); - case AT_DRN: return Argument_StructDeref(d*opsize, opsize, Rn); - case AT_DRM: return Argument_StructDeref(d*opsize, opsize, Rm); - case AT_DGBR: return Argument_StructDeref(d*opsize, opsize, Reg::GBR); - case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn); - case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm); - case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR); + case R0: return AsmArgument_Reg(Reg::R0); + case RN: return AsmArgument_Reg(Rn); + case RM: return AsmArgument_Reg(Rm); + case R0_BANK: return AsmArgument_Reg(Reg::R0B); + case R1_BANK: return AsmArgument_Reg(Reg::R1B); + case R2_BANK: return AsmArgument_Reg(Reg::R2B); + case R3_BANK: return AsmArgument_Reg(Reg::R3B); + case R4_BANK: return AsmArgument_Reg(Reg::R4B); + case R5_BANK: return AsmArgument_Reg(Reg::R5B); + case R6_BANK: return AsmArgument_Reg(Reg::R6B); + case R7_BANK: return AsmArgument_Reg(Reg::R7B); + case SR: return AsmArgument_Reg(Reg::SR); + case PR: return AsmArgument_Reg(Reg::PR); + case GBR: return AsmArgument_Reg(Reg::GBR); + case VBR: return AsmArgument_Reg(Reg::VBR); + case DBR: return AsmArgument_Reg(Reg::DBR); + case SSR: return AsmArgument_Reg(Reg::SSR); + case SPC: return AsmArgument_Reg(Reg::SPC); + case SGR: return AsmArgument_Reg(Reg::SGR); + case MACH: return AsmArgument_Reg(Reg::MACH); + case MACL: return AsmArgument_Reg(Reg::MACL); + case JUMP8: return AsmArgument_PcJump(d8 * 2); + case JUMP12: return AsmArgument_PcJump(d12 * 2); + case DPC: return AsmArgument_PcAddr(d * 4); + case IMM: return AsmArgument_Imm(i8); + case AT_RN: return AsmArgument_Deref(Rn); + case AT_RM: return AsmArgument_Deref(Rm); + case AT_RMP: return AsmArgument_PostInc(Rm); + case AT_RNP: return AsmArgument_PostInc(Rn); + case AT_MRN: return AsmArgument_PreDec(Rn); + case AT_DRN: return AsmArgument_StructDeref(d*opsize, opsize, Rn); + case AT_DRM: return AsmArgument_StructDeref(d*opsize, opsize, Rm); + case AT_DGBR: return AsmArgument_StructDeref(d*opsize, opsize, Reg::GBR); + case AT_R0RN: return AsmArgument_ArrayDeref(Reg::R0, Rn); + case AT_R0RM: return AsmArgument_ArrayDeref(Reg::R0, Rm); + case AT_R0GBR: return AsmArgument_ArrayDeref(Reg::R0, Reg::GBR); case AT_DPC: if(!opsize) err("@(disp,pc) must have a size (.w, .l)"); - return Argument_PcRel(d*opsize, opsize); + return AsmArgument_PcRel(d*opsize, opsize); } - throw std::logic_error("lex asm builds args from bad tokens"); + FxOS_log(ERR, "bad token %d found as argument of instruction sped", token); + return AsmArgument_Reg(Reg::UNDEFINED); } /* Record all the instances of an instruction in the disassembly table. @@ -254,7 +255,7 @@ static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1, opcode |= (d << p.d_sh); opcode |= (i << p.i_sh); - Instruction ins(mnemonic); + AsmInstruction ins(mnemonic); ins.opcode = opcode; if(argtoken1) { diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index 7d83576..26efe8f 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -1,8 +1,11 @@ -//--- -// fxos.passes.cfg: Control Flow Graph construction -//--- +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// -#include +#include #include #include #include @@ -10,90 +13,82 @@ namespace FxOS { CfgPass::CfgPass(Disassembly &disasm): - DisassemblyPass(disasm, "cfg") + DisassemblyPass(disasm, "cfg") { } -bool CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) +bool CfgPass::analyze(uint32_t pc, Instruction &i) { - /* Don't explore successors if the instruction cannot be decoded, not - even pc+2. This will prevent wild overshoot. */ - if(!ci.inst) - { - FxOS_log(ERR, "invalid instruction as %08x: %04x", pc, ci.opcode); - return false; - } + /* Don't explore successors if the instruction cannot be decoded, not + even pc+2. This will prevent wild overshoot. */ + if(!i.inst) { + FxOS_log(ERR, "invalid instruction as %08x: %04x", pc, i.opcode); + return false; + } - /* Compute the jump target for jump instructions. This is easy because - they are all trivially computable. (...If they are not we dub them - "terminal" to avoid the computation!) */ - uint32_t jmptarget = 0xffffffff; + /* Compute the jump target for jump instructions. This is easy because + they are all trivially computable. (...If they are not we dub them + "terminal" to avoid the computation!) */ + uint32_t jmptarget = 0xffffffff; - if(ci.inst->isjump() || ci.inst->iscondjump()) - { - auto &args = ci.inst->args; + if(i.inst->isjump() || i.inst->iscondjump()) { + auto &args = i.inst->args; - if(ci.inst->arg_count != 1 || args[0].kind != Argument::PcJump) - { - FxOS_log(ERR, "invalid jump instruction at %08x", pc); - return false; - } + if(i.inst->arg_count != 1 || args[0].kind != AsmArgument::PcJump) { + FxOS_log(ERR, "invalid jump instruction at %08x", pc); + return false; + } - jmptarget = (pc+4) + args[0].disp; + jmptarget = (pc+4) + args[0].disp; - /* Make the target of the jump a leader */ - ConcreteInstruction &target = m_disasm.readins(jmptarget); - target.leader = true; + /* Make the target of the jump a leader */ + Instruction &target = m_disasm.readins(jmptarget); + target.leader = true; - /* Check that it's not in a delay slot */ - if(target.delayslot) - throw std::logic_error(format("%08x jumps into %08x, which is a " - "delay slot - this is unsupported by fxos and will produce " - "garbage analysis! (x_x)", pc, jmptarget)); - } + /* Check that it's not in a delay slot */ + if(target.delayslot) + throw std::logic_error(format("%08x jumps into %08x, which is a " + "delay slot - this is unsupported by fxos and will produce " + "garbage analysis! (x_x)", pc, jmptarget)); + } - /* If this instruction is in a delay slot, check its type. A valid - delay slot has no branching properties on its own, so nothing new to - set in the properties. */ - if(ci.delayslot) - { - if(!ci.inst->isvaliddelayslot()) - { - FxOS_log(ERR, "invalid delay slot at %08x", pc); - return false; - } - } - /* Handle normal instructions */ - else if(!ci.inst->isdelayed()) - { - ci.terminal = ci.inst->isterminal(); - ci.jump = ci.inst->isjump(); - ci.condjump = ci.inst->iscondjump(); - ci.jmptarget = jmptarget; - } - /* Create a new delay slot */ - else - { - ConcreteInstruction &slot = m_disasm.readins(pc+2); - if(slot.leader) - throw std::logic_error(format("%08x is a leader and also a delay " - "slot - this is unsupported by fxos and will produce garbage " - "analysis! (x_x)", pc+2)); - if(!slot.inst->isvaliddelayslot()) - { - FxOS_log(ERR, "invalid delay slot at %08x", pc+2); - return false; - } + /* If this instruction is in a delay slot, check its type. A valid + delay slot has no branching properties on its own, so nothing new to + set in the properties. */ + if(i.delayslot) { + if(!i.inst->isvaliddelayslot()) { + FxOS_log(ERR, "invalid delay slot at %08x", pc); + return false; + } + } + /* If it has a delay slot, create it at the next instruction */ + else if(i.inst->isdelayed()) { + Instruction &slot = m_disasm.readins(pc+2); + if(slot.leader) + throw std::logic_error(format("%08x is a leader and also a delay " + "slot - this is unsupported by fxos and will produce garbage " + "analysis! (x_x)", pc+2)); + if(!slot.inst->isvaliddelayslot()) { + FxOS_log(ERR, "invalid delay slot at %08x", pc+2); + return false; + } - slot.delayslot = true; - slot.terminal = ci.inst->isterminal(); - slot.jump = ci.inst->isjump(); - slot.condjump = ci.inst->iscondjump(); - slot.jmptarget = jmptarget; - } + slot.delayslot = true; + slot.terminal = i.inst->isterminal(); + slot.jump = i.inst->isjump(); + slot.condjump = i.inst->iscondjump(); + slot.jmptarget = jmptarget; + } + /* Otherwise, use standard properties */ + else if(!i.inst->isdelayed()) { + i.terminal = i.inst->isterminal(); + i.jump = i.inst->isjump(); + i.condjump = i.inst->iscondjump(); + i.jmptarget = jmptarget; + } - enqueue_unseen_successors(pc, ci); - return true; + enqueue_unseen_successors(pc, i); + return true; } } /* namespace FxOS */ diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index cab3f48..fd81c80 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -1,75 +1,66 @@ -//--- -// fxos.passes.pcrel: Resolution of PC-relative addresses -//--- +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// -#include +#include namespace FxOS { PcrelPass::PcrelPass(Disassembly &disasm): - InstructionDisassemblyPass(disasm, "pcrel") + InstructionDisassemblyPass(disasm, "pcrel") { } -bool PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) +bool PcrelPass::analyze(uint32_t pc, Instruction &ci) { - Instruction const *i = ci.inst; - if(!i) - return false; + AsmInstruction const *i = ci.inst; + if(!i) + return false; - for(size_t n = 0; n < i->arg_count; n++) - { - Argument const &a = i->args[n]; - ConcreteInstructionArg &ca = ci.args[n]; + for(size_t n = 0; n < i->arg_count; n++) { + AsmArgument const &arg = i->args[n]; + Argument &a = ci.args[n]; - if(a.kind == Argument::PcRel && - (i->opsize == 2 || i->opsize == 4)) - { - uint32_t addr = (pc & ~(a.opsize - 1)) + 4 + a.disp; - ca.location = RelConstDomain().constant(addr); + if(arg.kind == AsmArgument::PcRel && (i->opsize==2 || i->opsize==4)) { + uint32_t addr = (pc & ~(arg.opsize - 1)) + 4 + arg.disp; + a.location = RelConstDomain().constant(addr); - /* Also compute the value. This is sign-extended from 16-bit with - mov.w. There is no mov.b for this instruction. */ - VirtualSpace &space = m_disasm.space(); - uint32_t v = -1; + /* Also compute the value. This is sign-extended from 16-bit with + mov.w. There is no mov.b for this instruction. */ + VirtualSpace &space = m_disasm.space; + uint32_t v = -1; - if(i->opsize == 2 && space.covers(addr, 2)) - { - v = space.read_i16(addr); - ca.value = DataValue(IntegerType::u32); - ca.value.write(0, 4, v); - } - if(i->opsize == 4 && space.covers(addr, 4)) - { - v = space.read_i32(addr); - ca.value = DataValue(IntegerType::u32); - ca.value.write(0, 4, v); - } - } - else if(a.kind == Argument::PcJump) - { - uint32_t addr = pc + 4 + a.disp; - ca.location = RelConstDomain().constant(addr); + if(i->opsize == 2 && space.covers(addr, 2)) { + v = space.read_i16(addr); + a.value = RelConstDomain().constant(v); + } + if(i->opsize == 4 && space.covers(addr, 4)) { + v = space.read_i32(addr); + a.value = RelConstDomain().constant(v); + } + } + else if(arg.kind == AsmArgument::PcJump) { + uint32_t addr = pc + 4 + arg.disp; + a.location = RelConstDomain().constant(addr); + a.value = RelConstDomain().constant(addr); + } + else if(arg.kind == AsmArgument::PcAddr) + { + uint32_t addr = (pc & ~3) + 4 + arg.disp; - ca.value = DataValue(IntegerType::u32); - ca.value.write(0, 4, addr); - } - else if(a.kind == Argument::PcAddr - && m_disasm.passes.count("cfg")) - { - uint32_t addr = (pc & ~3) + 4 + a.disp; + /* SH3 manual says that mova uses the target address of the jump + when in a delay slot. SH4AL-DSP makes it invalid. */ + // if(ci.delayslot) addr = (ci.jmptarget & ~3) + 4 + a.disp; - /* SH3 manual says that the semantics of mova change in a delay - slot. GNU as says they don't. */ -// if(ci.delayslot) addr = (ci.jmptarget&~3) + 4 + a.disp; + a.location = RelConstDomain().constant(addr); + a.value = RelConstDomain().constant(addr); + } + } - ca.location = RelConstDomain().constant(addr); - ca.value = DataValue(IntegerType::u32); - ca.value.write(0, 4, addr); - } - } - - return true; + return true; } } /* namespace FxOS */ diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index e4109c6..377eec4 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -1,8 +1,11 @@ -//--- -// fxos.passes.print: Print disassembly -//--- +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// -#include +#include #include #include @@ -12,199 +15,179 @@ namespace FxOS { PrintPass::PrintPass(Disassembly &disasm): - InstructionDisassemblyPass(disasm, "print"), m_symtables(), - m_last_address(0xffffffff) + InstructionDisassemblyPass(disasm, "print"), m_symtables(), + m_last_address(0xffffffff) { - /* Default parameters: all 0 */ + /* Default parameters: all 0 */ - /* Use an OS observer to describe syscalls in header lines */ - m_os = disasm.space().os_analysis(); + /* Use an OS observer to describe syscalls in header lines */ + m_os = disasm.space.os_analysis(); - /* Use the symbol tables from the virtual space */ - m_symtables.push_back(disasm.space().symbols); + /* Use the symbol tables from the virtual space */ + m_symtables.push_back(disasm.space.symbols); } -bool PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) +bool PrintPass::analyze(uint32_t pc, Instruction &i) { - Instruction const *i = ci.inst; + /* Ellipsis if there is a gap since last instruction */ - /* Ellipsis if there is a gap since last instruction */ + if(m_last_address+1 != 0 && pc != m_last_address+2) + printf(" ...\n"); - if(m_last_address+1 != 0 && pc != m_last_address+2) - printf(" ...\n"); + /* Preliminary syscall number */ - /* Preliminary syscall number */ + int syscall_id; + if(m_os && (syscall_id = m_os->find_syscall(pc)) >= 0) { + printf("\n<%%%03x", syscall_id); + auto maybe_str = symquery(Symbol::Syscall, syscall_id); + if(maybe_str) + printf(" %s", (*maybe_str).c_str()); + printf(">\n"); + } - int syscall_id; - if(m_os && (syscall_id = m_os->find_syscall(pc)) >= 0) - { - printf("\n<%%%03x", syscall_id); + /* Raw data if instruction cannot be decoded */ - auto maybe_str = symquery(Symbol::Syscall, syscall_id); - if(maybe_str) printf(" %s", (*maybe_str).c_str()); + printf(" %08x: %04x", pc, (i.inst ? i.inst->opcode : i.opcode)); + if(!i.inst) { + printf("\n"); + m_last_address = pc; + return true; + } - printf(">\n"); - } + /* Mnemonic */ - /* Raw data if instruction cannot be decoded */ + static char const *suffixes[5] = { "", ".b", ".w", "", ".l" }; + char const *suffix = suffixes[(i.inst->opsize <= 4) ? i.inst->opsize : 0]; - printf(" %08x: %04x", pc, (i ? i->opcode : ci.opcode)); - if(!i) - { - printf("\n"); - m_last_address = pc; - return true; - } + int spacing = i.inst->arg_count + ? 8 - strlen(i.inst->mnemonic) - strlen(suffix) + : 0; + printf(" %s%s%*s", i.inst->mnemonic, suffix, spacing, ""); - /* Mnemonic */ + /* Arguments */ - static char const *suffixes[5] = { "", ".b", ".w", "", ".l" }; - char const *suffix = suffixes[(i->opsize <= 4) ? i->opsize : 0]; + for(size_t n = 0; n < i.inst->arg_count; n++) { + AsmArgument const &arg = i.inst->args[n]; + Argument const &a = i.args[n]; - int spacing = i->arg_count ? 8 - strlen(i->mnemonic) - strlen(suffix) : 0; - printf(" %s%s%*s", i->mnemonic, suffix, spacing, ""); + if(n) printf(", "); - /* Arguments */ + queue(arg.str()); + if(arg.kind == AsmArgument::PcJump) + pcjumploc(a); + else if(arg.kind == AsmArgument::PcRel) + pcrelloc(a); + else if(arg.kind == AsmArgument::PcAddr) + pcaddrloc(a); + queue_flush(); + } - for(size_t n = 0; n < i->arg_count; n++) - { - Argument const &a = i->args[n]; - ConcreteInstructionArg const &arg = ci.args[n]; - - if(n) printf(", "); - - if(a.kind == Argument::PcJump) - { - queue(a.str()); - pcjumploc(arg); - queue_flush(); - } - else if(a.kind == Argument::PcRel) - { - queue(a.str()); - pcrelloc(arg); - queue_flush(); - } - else if(a.kind == Argument::PcAddr) - { - queue(a.str()); - pcaddrloc(arg); - queue_flush(); - } - else - { - queue(a.str()); - queue_flush(); - } - } - - printf("\n"); - m_last_address = pc; - return true; + printf("\n"); + m_last_address = pc; + return true; } std::optional PrintPass::symquery(Symbol::Type type, - uint32_t value) + uint32_t value) { - for(int i = m_symtables.size() - 1; i >= 0; i--) - { - SymbolTable const &st = m_symtables[i]; + for(int i = m_symtables.size() - 1; i >= 0; i--) { + SymbolTable const &st = m_symtables[i]; - auto maybe_str = st.query(type, value); - if(maybe_str) - return maybe_str; - } + auto maybe_str = st.query(type, value); + if(maybe_str) + return maybe_str; + } - return std::nullopt; + return std::nullopt; } void PrintPass::queue(std::string str, bool override) { - if(override && m_messages.size()) - m_messages.pop_back(); + if(override && m_messages.size()) + m_messages.pop_back(); - m_messages.push_back(str); + m_messages.push_back(str); } void PrintPass::queue_flush() { - for(size_t i = 0; i < m_messages.size(); i++) - { - if(i != 0) printf(" "); - printf("%s", m_messages[i].c_str()); - } + for(size_t i = 0; i < m_messages.size(); i++) { + if(i != 0) + printf(" "); + printf("%s", m_messages[i].c_str()); + } - m_messages.clear(); + m_messages.clear(); } -void PrintPass::pcjumploc(ConcreteInstructionArg const &arg) +void PrintPass::pcjumploc(Argument const &a) { - if(!RelConstDomain().is_constant(arg.location)) return; - if(promote_pcjump_loc == Never) return; + if(!RelConstDomain().is_constant(a.location)) return; + if(promote_pcjump_loc == Never) return; - queue(format("<%s>", arg.location.str()), promote_pcjump_loc==Promote); - syscall(arg); + queue(format("<%s>", a.location.str()), promote_pcjump_loc==Promote); + syscall(a); } -void PrintPass::pcrelloc(ConcreteInstructionArg const &arg) +void PrintPass::pcrelloc(Argument const &a) { - if(!RelConstDomain().is_constant(arg.location)) return; - if(promote_pcrel_loc == Never) return; + if(!RelConstDomain().is_constant(a.location)) return; + if(promote_pcrel_loc == Never) return; - queue(format("<%s>", arg.location.str()), promote_pcrel_loc==Promote); - pcrelval(arg); + queue(format("<%s>", a.location.str()), promote_pcrel_loc==Promote); + pcrelval(a); } -void PrintPass::pcrelval(ConcreteInstructionArg const &arg) +void PrintPass::pcrelval(Argument const &a) { - if(!arg.value || arg.value.type->kind() != DataType::Integer) return; - if(promote_pcrel_value == Never) return; + if(!a.value) return; + if(promote_pcrel_value == Never) return; - queue(arg.value.str(), promote_pcrel_value==Promote); - syscall(arg); + queue(a.value.str(), promote_pcrel_value==Promote); + syscall(a); } -void PrintPass::syscall(ConcreteInstructionArg const &arg) +void PrintPass::syscall(Argument const &a) { - if(!arg.value || arg.value.type->kind() != DataType::Integer) return; + if(!a.value) return; - /* If this is not a syscall, try to display as a symbol instead */ - if(promote_syscall == Never || arg.syscall_id < 0) - { - symbol(arg); - return; - } + /* If this is not a syscall, try to display as a symbol instead */ + if(promote_syscall == Never || a.syscall_id < 0) { + symbol(a); + return; + } - queue(format("%%%03x", arg.syscall_id), promote_syscall==Promote); - syscallname(arg); + queue(format("%%%03x", a.syscall_id), promote_syscall==Promote); + syscallname(a); } -void PrintPass::syscallname(ConcreteInstructionArg const &arg) +void PrintPass::syscallname(Argument const &a) { - if(arg.syscall_id < 0) return; + if(a.syscall_id < 0) return; - auto maybe_name = symquery(Symbol::Syscall, arg.syscall_id); - if(!maybe_name) return; + auto maybe_name = symquery(Symbol::Syscall, a.syscall_id); + if(!maybe_name) return; - queue(*maybe_name, promote_syscallname==Promote); + queue(*maybe_name, promote_syscallname==Promote); } -void PrintPass::symbol(ConcreteInstructionArg const &arg) +void PrintPass::symbol(Argument const &a) { - if(!arg.value || arg.value.type->kind() != DataType::Integer) return; + if(!a.value) return; - auto maybe_name = symquery(Symbol::Address, arg.value.uinteger()); - if(!maybe_name) return; + auto maybe_name = symquery(Symbol::Address, + RelConstDomain().constant_value(a.value)); + if(!maybe_name) return; - queue(*maybe_name, promote_symbol==Promote); + queue(*maybe_name, promote_symbol==Promote); } -void PrintPass::pcaddrloc(ConcreteInstructionArg const &arg) +void PrintPass::pcaddrloc(Argument const &a) { - if(!RelConstDomain().is_constant(arg.location)) return; - if(promote_pcaddr_loc == Never) return; + if(!RelConstDomain().is_constant(a.location)) return; + if(promote_pcaddr_loc == Never) return; - queue(format("<%s>", arg.location.str()), promote_pcaddr_loc==Promote); + queue(format("<%s>", a.location.str()), promote_pcaddr_loc==Promote); } } /* namespace FxOS */ diff --git a/lib/passes/syscall.cpp b/lib/passes/syscall.cpp index 1f8404f..bc97071 100644 --- a/lib/passes/syscall.cpp +++ b/lib/passes/syscall.cpp @@ -1,55 +1,57 @@ -//--- -// fxos.passes.syscall: Detection and substitution of syscall addresses -//--- +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// -#include +#include namespace FxOS { SyscallPass::SyscallPass(Disassembly &disasm, OS *os): - InstructionDisassemblyPass(disasm, "syscall"), m_os(os) + InstructionDisassemblyPass(disasm, "syscall"), m_os(os) { } -bool SyscallPass::analyze([[maybe_unused]] uint32_t pc,ConcreteInstruction &ci) +bool SyscallPass::analyze(uint32_t pc, Instruction &ci) { - /* Nothing to do if no syscall table is provided! */ - if(!m_os) - return true; + /* Nothing to do if no syscall table is provided! */ + if(!m_os) + return true; - Instruction const *i = ci.inst; - if(!i) - return false; + (void)pc; - for(size_t n = 0; n < i->arg_count; n++) - { - Argument const &a = i->args[n]; - ConcreteInstructionArg &ca = ci.args[n]; + AsmInstruction const *i = ci.inst; + if(!i) + return false; - bool eligible = false; - uint32_t address; + for(size_t n = 0; n < i->arg_count; n++) { + AsmArgument const &arg = i->args[n]; + Argument &a = ci.args[n]; - if(a.kind == Argument::PcRel && ca.value - && ca.value.type == IntegerType::u32) - { - eligible = true; - address = ca.value.read(0, 4); - } - if(a.kind == Argument::PcJump && ca.location - && RelConstDomain().is_constant(ca.location)) - { - eligible = true; - address = RelConstDomain().constant_value(ca.location); - } + bool eligible = false; + uint32_t address; - if(eligible) - { - int sid = m_os->find_syscall(address); - if(sid >= 0) ca.syscall_id = sid; - } - } + if(arg.kind == AsmArgument::PcRel && a.value + && RelConstDomain().is_constant(a.value)) { + eligible = true; + address = RelConstDomain().constant_value(a.value); + } + if(arg.kind == AsmArgument::PcJump && a.location + && RelConstDomain().is_constant(a.location)) { + eligible = true; + address = RelConstDomain().constant_value(a.location); + } - return true; + if(eligible) { + int sid = m_os->find_syscall(address); + if(sid >= 0) + a.syscall_id = sid; + } + } + + return true; } } /* namespace FxOS */ diff --git a/lib/semantics.cpp b/lib/semantics.cpp index 8251d03..57f0188 100644 --- a/lib/semantics.cpp +++ b/lib/semantics.cpp @@ -45,7 +45,7 @@ size_t DataType::size() const noexcept case Bitfield: return bitfield().size; case Array: return array().size; case String: return string().size; - case Struct: return structs().size; + case Struct: return structure().size; } return 0; @@ -67,7 +67,7 @@ StringType const &DataType::string() const { return std::get(v); } -StructType const &DataType::structs() const +StructType const &DataType::structure() const { return std::get(v); } diff --git a/lib/vspace.cpp b/lib/vspace.cpp index 7d80cb7..e3f2e4c 100644 --- a/lib/vspace.cpp +++ b/lib/vspace.cpp @@ -1,13 +1,16 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + #include #include #include namespace FxOS { -//--- -// Bindings of data buffers into memory regions -//--- - Binding::Binding(MemoryRegion source_region, Buffer source_buffer): region {source_region}, buffer {source_buffer} { @@ -25,10 +28,6 @@ char const *Binding::translate_dynamic(uint32_t addr, int *size) return nullptr; } -//--- -// Composite memory targets -//--- - VirtualSpace::VirtualSpace(): mpu {}, bindings {}, m_os {nullptr} { diff --git a/shell/d.cpp b/shell/d.cpp index 1f20d17..4b3bd17 100644 --- a/shell/d.cpp +++ b/shell/d.cpp @@ -8,10 +8,10 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include static void disassemble(Session &session, Disassembly &disasm, std::vector const &passes, uint32_t address) diff --git a/shell/main.cpp b/shell/main.cpp index 41f75c5..ee6aed5 100644 --- a/shell/main.cpp +++ b/shell/main.cpp @@ -15,6 +15,8 @@ #include "theme.h" #include "parser.h" #include "commands.h" +#include +#include static std::map commands;