diff --git a/Makefile b/Makefile index 7a89fbf..a5d4c4c 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ bin/: # Dependency generation # -include $(wildcard build/*/*.d) +include $(wildcard build/*/*.d build/*/*/*.d) .PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean diff --git a/fxos/main.cpp b/fxos/main.cpp index 143d928..45062fa 100644 --- a/fxos/main.cpp +++ b/fxos/main.cpp @@ -9,6 +9,8 @@ #include #include +#include +#include #include @@ -221,7 +223,7 @@ int main_disassembly(int argc, char **argv) { int error=0, option=0, mpu='4'; std::vector passes { - "cfg", "pcrel", "constprop", "syscall", "regs" + "cfg", "pcrel", "constprop", "syscall", "regs", "print" }; std::string file; @@ -242,18 +244,20 @@ int main_disassembly(int argc, char **argv) mpu = option; break; case 'p': + { passes.clear(); - { - std::istringstream in(optarg); - std::string pass; + std::istringstream in(optarg); + std::string pass; - while(std::getline(in, pass, ',')) { - passes.push_back(pass); - } + while(std::getline(in, pass, ',')) { + passes.push_back(pass); } + if(!passes.size()) error = 1, log(ERR "no pass specified"); + if(passes.back() != "print") passes.push_back("print"); break; + } case 'f': file = optarg; break; @@ -305,16 +309,43 @@ int main_disassembly(int argc, char **argv) log(LOG "Disassembling target %s at %s", tname, refstr); - for(auto pass: passes) + try { - log(LOG "Running pass %s", pass); - - if(pass == "cfg") + for(auto pass: passes) { - CfgPass p(disasm); - p.run(ref); + auto start = timer_start(); + log(LOG "Running pass %s...\\", pass); + + if(pass == "cfg") + { + CfgPass p(disasm); + p.run(ref); + } + else if(pass == "pcrel") + { + PcrelPass p(disasm); + p.run(ref); + } + else if(pass == "print") + { + PrintPass p(disasm); + p.hide_resolved_pcjump = true; + p.hide_resolved_pcrel = true; + p.hide_movpc_address = + PrintPass::Hide_MovPC_Region; + p.run(); + } + log(LOG "%s", timer_format(timer_end(start))); } } + catch(LangError &e) + { + log(ERR "%08x: %s", e.addr(), e.what()); + } + catch(AddrError &e) + { + log(ERR "%08x[%d]: %s", e.addr(), e.size(), e.what()); + } } else { diff --git a/include/fxos/disasm-passes/cfg.h b/include/fxos/disasm-passes/cfg.h index b03a51f..13979b5 100644 --- a/include/fxos/disasm-passes/cfg.h +++ b/include/fxos/disasm-passes/cfg.h @@ -8,6 +8,28 @@ // This is the main exploration pass. Other passes do not typically load new // instructions from the underlying disassembly. Straightforward passes such as // [print] iterate on instructions loaded by this pass. +// +// The main gimmick of this pass is to "resolve delay slots" by forcing down +// the properties of delayed instructions into their respective delay slots. +// For instance, in +// jmp @r0 +// mov #1, r4 +// the jump is delayed until after the move. To handle this, fxos makes the jmp +// a no-op and applies dual move-jump semantics to the mov below it. +// +// This could be tricky for the abstract interpreter because the jump target +// has to be computed with the environment before the jmp, which is not +// available when considering the mov. Luckily all delayed jumps are state +// no-ops so the state before the mov can be used instead. +// +// Note that jumping into a delay slot will activate the jump in fxos, which is +// not the actual behavior of the processor. fxos usually complains about the +// crazy compiler when this occurs. Note that if it happens but we don't know +// that it's a delay slot (ie. the instruction from above is never executed in +// the current function), then everything's fine. +// +// Take-home message: delay slots are a pain to analyze, so we get rid of them +// as soon as possible and proceed with normal semantics. //--- #ifndef LIBFXOS_DISASM_PASSES_CFG_H diff --git a/include/fxos/disasm-passes/pcrel.h b/include/fxos/disasm-passes/pcrel.h new file mode 100644 index 0000000..1352150 --- /dev/null +++ b/include/fxos/disasm-passes/pcrel.h @@ -0,0 +1,25 @@ +//--- +// fxos.disasm-passes.pcrel: Resolution of PC-relative addresses +// +// This pass computes all PC-relatives addresses used in fixed-target jumps and +// in PC-relative mov instructions. It does so by setting the location of each +// PC-relative argument to the associated constant value. +//--- + +#ifndef FXOS_DISASM_PASSES_PCREL_H +#define FXOS_DISASM_PASSES_PCREL_H + +#include + +namespace FxOS { + +class PcrelPass: public DisassemblyPass +{ +public: + PcrelPass(Disassembly &disasm); + void analyze(uint32_t pc, ConcreteInstruction &inst) override; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_DISASM_PASSES_PCREL_H */ diff --git a/include/fxos/disasm-passes/print.h b/include/fxos/disasm-passes/print.h new file mode 100644 index 0000000..7e3ef01 --- /dev/null +++ b/include/fxos/disasm-passes/print.h @@ -0,0 +1,55 @@ +//--- +// fxos.disasm-passes.print: Concrete program printer +// +// This pass prints the program and some to all of its annotations, depending +// on the specified parameters. +//--- + +#ifndef LIBFXOS_DISASM_PASSES_PRINT_H +#define LIBFXOS_DISASM_PASSES_PRINT_H + +#include + +namespace FxOS { + +class PrintPass: public DisassemblyPass +{ +public: + PrintPass(Disassembly &disasm); + void analyze(uint32_t pc, ConcreteInstruction &inst) override; + + /* This pass uses another entry method that starts at the instruction + with the smallest address loaded in the disassembly, then goes down. + The standard run() is not available. */ + void run(void); + + //--- + // Print pass parameters + //--- + + /* In jump instructions, hide the raw value "pc+" if the target + address has been computed */ + bool hide_resolved_pcjump; + /* In PC-relative move instructions, hide the raw value "@(,pc)" + of the argument if the target address has been computed */ + bool hide_resolved_pcrel; + + /* In PC-relative move instructions, hide the target address of the + argument... under conditions */ + enum { + /* Always show the accessed address */ + Hide_MovPC_Never, + /* Hide the address if it's in the same memory region as the + instruction doing the move */ + Hide_MovPC_Region, + /* Always hide the address */ + Hide_MovPC_Always, + + } hide_movpc_address; + + /* TODO: More print pass parameters */ +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_DISASM_PASSES_PRINT_H */ diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index c8732d9..56d3fc3 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -30,17 +30,18 @@ struct ConcreteInstructionArg ConcreteInstructionArg(); //--- - // Data set by the abstract interpretation passes + // Data set by the pass and abstract interpretater //--- /* Location in CPU or memory, if that can be determined */ - std::optional loc; - /* Alternatively, data type, which can sometimes be determined uniquely - even if the location is not constant */ - std::optional type; + Location location; + /* The pointed value, if it can be determined. Valid only if the + location is specified; under some conditions the location can't be + determined by the type can. */ + DataValue value; //--- - // Data set by the syscall and regs passes + // Data set by the and passes //--- /* If the value is a syscall address, the syscall's id */ @@ -52,50 +53,41 @@ struct ConcreteInstructionArg /* A loaded and annotated instruction. */ struct ConcreteInstruction { - ConcreteInstruction(Instruction &inst); + ConcreteInstruction(Instruction const &inst); - /* What instruction it is */ - Instruction &inst; + /* What instruction this is. Note that this does not determine all the + properties below. Placement and delay slots greatly alter them. */ + Instruction const &inst; /* Argument information (contains data set by several passes) */ ConcreteInstructionArg args[2]; - //--- - // Data set by the pcrel pass - //--- - - /* Jump targets, used for jump instructions only. The target might - either be that of an inconditional jump, or the non-trivial target - of a conditional jump. In many situations the jump is forced on a - general instruction by a preceding delated branch. */ - uint32_t jmptarget; - /* Whether the instruction is terminal. Be careful, as this attribute - is often forced onto delayed slot instructions. It is thus NOT the - same as isterminal(), which tells whether the mnemonic implies a - function exit. Said exit is generally delayed. */ - bool terminal; - //--- // Data set by the cfg pass //--- - /* Whether this instruction is a basic block leader */ + /* Whether this instruction is a leader. This is always set by another + instruction jumping into this one. */ bool leader; + /* Whether this instruction is in a delay slot. This is always set by + the preceding delayed instruction. */ + bool delayslot; - //--- - // Methods and utilities - //--- + /* Whether this instruction is: + -> Terminal, ie. has no successors and is the end of the function. + -> An unconditional jump of target [jmptarget]. + -> A conditional jump that can hit [jmptarget] and pc+2. + If delayslot==false, these attributes are set when analyzing this + instruction. If delayslot==true, they are set when the preceding + delayed instruction is analyzed. */ + bool terminal; + bool jump; + bool condjump; - bool isterminal() const noexcept { return inst.isterminal(); } - bool isjump() const noexcept { return inst.isjump(); } - bool iscondjump() const noexcept { return inst.iscondjump(); } - bool isdelayed() const noexcept { return inst.isdelayed(); } + /* The jump target, used if jump==true or condjump==true. */ + uint32_t jmptarget; }; -/* Short aliases */ -using CI = ConcreteInstruction; -using CIArg = ConcreteInstructionArg; - /* Disassembly interface that automatically loads code from a target */ class Disassembly { @@ -112,6 +104,16 @@ public: loaded and initialized if it had not been read before. */ ConcreteInstruction &readins(uint32_t pc); + /* For other access patterns (careful with write accesses!) */ + std::map &instructions() noexcept { + return m_instructions; + } + + /* Access to memory */ + Target &target() noexcept { + return m_target; + } + private: /* Underlying target */ Target &m_target; @@ -158,19 +160,6 @@ private: std::set m_seen; }; - -class PcrelPass: public DisassemblyPass -{ - PcrelPass(Disassembly &disasm); - void analyze(uint32_t pc, ConcreteInstruction &inst) override; -}; - -class PrintPass: public DisassemblyPass -{ - PrintPass(Disassembly &disasm); - void analyze(uint32_t pc, ConcreteInstruction &inst) override; -}; - } /* namespace FxOS */ #endif /* LIBFXOS_DISASSEMBLY_H */ diff --git a/include/fxos/domains.h b/include/fxos/domains.h index 9740378..822da6e 100644 --- a/include/fxos/domains.h +++ b/include/fxos/domains.h @@ -6,16 +6,18 @@ #define FXOS_DOMAINS_H #include +#include namespace FxOS { -/* An abstract domain over any user-defined lattice. */ +/* An abstract domain over a user-defined lattice. */ template class AbstractDomain { +public: /* Bottom and Top constants */ virtual T bottom() const noexcept = 0; - virtual T top() const noexcept = 0; + virtual T top() const noexcept = 0; /* Construct abstract value from integer constant */ virtual T constant(uint32_t value) const noexcept = 0; @@ -96,14 +98,32 @@ struct RelConst int32_t ival; uint32_t uval; }; + + //--- + // RelConst methods + //--- + + /* Default constructors gives zero */ + RelConst() = default; + + /* Evaluates to true if the location is non-trivial, ie. if it is + neither Top nor Bottom. */ + operator bool () const noexcept; + + /* String representation */ + std::string str() const noexcept; }; class RelConstDomain: public AbstractDomain { +public: + /* Trivial instances */ + RelConstDomain() = default; + /* Implementation of the AbstractDomain specification */ RelConst bottom() const noexcept override; - RelConst top() const noexcept override; + RelConst top() const noexcept override; RelConst constant(uint32_t value) const noexcept override; diff --git a/include/fxos/errors.h b/include/fxos/errors.h index 54827f9..8f976f0 100644 --- a/include/fxos/errors.h +++ b/include/fxos/errors.h @@ -60,6 +60,29 @@ private: char const *m_what; }; +/* Address errors */ +class AddrError: public std::exception +{ +public: + AddrError(uint32_t address, int size, char const *what): + m_addr(address), m_size(size), m_what(what) {} + + uint32_t addr() const noexcept { + return m_addr; + } + int size() const noexcept { + return m_size; + } + char const *what() const noexcept override { + return m_what; + } + +private: + uint32_t m_addr; + int m_size; + char const *m_what; +}; + } /* namespace FxOS */ #endif /* LIBFXOS_ERRORS_H */ diff --git a/include/fxos/lang.h b/include/fxos/lang.h index d5e6c29..ebbd353 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -69,7 +69,7 @@ struct Argument StructDeref, /* @(disp,rn) or @(disp,gbr) */ ArrayDeref, /* @(r0,rn) or @(r0,gbr) */ PcRel, /* @(disp,pc) with 4-alignment correction */ - PcJump, /* disp */ + PcJump, /* pc+disp */ Imm, /* #imm */ }; @@ -137,6 +137,8 @@ struct Instruction bool iscondjump() const noexcept; /* Check whether instruction has a delay slot */ bool isdelayed() const noexcept; + /* Check whether instruction can be used in a delay slot */ + bool isvaliddelayslot() const noexcept; }; } /* namespace FxOS */ diff --git a/include/fxos/semantics.h b/include/fxos/semantics.h index 0d058b1..c4443ff 100644 --- a/include/fxos/semantics.h +++ b/include/fxos/semantics.h @@ -6,6 +6,8 @@ #define LIBFXOS_SEMANTICS_H #include +#include +#include namespace FxOS { @@ -21,9 +23,8 @@ namespace FxOS { // Structures struct { } (mem) //--- -class DataType +struct DataType { -public: enum DataKind { /* Base types */ Integral, @@ -59,6 +60,8 @@ public: /* The following members are not in the union because they have non- trivial destructors/copy and I don't want to care. */ + /* For array */ + std::shared_ptr arraytype; /* For bit field types */ std::vector fields; /* For struct types */ @@ -66,39 +69,51 @@ public: }; //--- -// Location representation +// Data values // -// The abstract interpreter keeps track of data stored at the following -// locations (attribute types in parentheses): -// Registers .reg (CpuRegister) -// Memory .addr (uint32_t) -// MappedModule .addr (uint32_t) +// These objects are instances of the types described by DataType. //--- -struct Location +struct DataValue { - enum LocationType { - /* CPU-held registers accessed with instructions */ - Register, - /* Standard randomly-addressable memory */ - Memory, - /* Memory-mapped module registers with specific access */ - MappedModule, - }; + /* Default constructor, gives undetermined values */ + DataValue(); - LocationType location; + /* Data type affected to the value */ + DataType const *type; + /* Whether the value can be determined. If this boolean is false, the + rest of the data must be ignored. */ + bool determined; union { - /* For registers: register identifier */ - CpuRegister reg; - /* For memory and mapped modules: addresses */ - uint32_t addr; + /* Unsigned integer (all sizes) and bit fields */ + uint32_t uinteger; + /* Signed integer (all sizes) */ + int32_t integer; }; - /* String representation */ - std::string str() const noexcept; + /* For arrays (homogeneous) and structures (heterogeneous) */ + std::vector entries; + + /* Perform a read operation at the specified offset from the start of + the object. */ + uint32_t read(int offset, int size); + + /* Perform a write operation at the specified offset from the start of + the object. */ + void write(int offset, int size, uint32_t contents); }; +//--- +// Location representation +// +// The abstract interpreter keeps track of data stored at registers, memory +// addresses and mapped modules as long as the exact location fits within the +// expressive power of a RelConst. +//--- + +using Location = RelConst; + } /* namespace FxOS */ #endif /* LIBFXOS_SEMANTICS_H */ diff --git a/include/fxos/target.h b/include/fxos/target.h index 9a15962..b452d66 100644 --- a/include/fxos/target.h +++ b/include/fxos/target.h @@ -29,10 +29,10 @@ public: various types. (Not a template because of the restriction about template specialization in non-namespaces scopes still in g++.) - When reading data, provide a virtual address. The addres is saved in - the returned object for later printing or inspection. The returned - object Addressable automatically converts to T when used, and - supports operator & which returns the original address. + When reading data, provide a virtual address. The address is saved + in the returned object for later printing or inspection. The + returned object Addressable automatically converts to T when + used, and supports operator & which returns the original address. The size parameter is only meaningful for variable-sized types such as string, and ignored for fixed-size types such as integers. If the @@ -40,7 +40,7 @@ public: throws std::out_of_range. */ /* Read integers with signed or unsigned extension. These functions do - not check alignment, because exceptionnally the processor supports + not check alignment, because exceptionally the processor supports unaligned operations (eg. movual.l). */ Addressable read_i8 (uint32_t addr) const; Addressable read_u8 (uint32_t addr) const; diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 9518840..d437a58 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace FxOS { @@ -26,13 +27,17 @@ void register_instruction(Instruction ins) //--- ConcreteInstructionArg::ConcreteInstructionArg(): - loc {}, type {}, syscall_id {-1} + value {}, syscall_id {-1} { + location = RelConstDomain().bottom(); reg_address = -1; } -ConcreteInstruction::ConcreteInstruction(Instruction &inst): - inst {inst}, jmptarget {}, leader {false} +ConcreteInstruction::ConcreteInstruction(Instruction const &inst): + inst {inst}, args {}, + leader {false}, delayslot {false}, + terminal {false}, jump {false}, condjump {false}, + jmptarget {0xffffffff} { } @@ -76,7 +81,7 @@ uint32_t Disassembly::maxpc() ConcreteInstruction &Disassembly::readins(uint32_t pc) { - if(pc & 1) throw std::runtime_error("Disassembly::ins_read at odd PC"); + if(pc & 1) throw std::runtime_error("Disassembly::readins at odd PC"); try { @@ -90,7 +95,7 @@ ConcreteInstruction &Disassembly::readins(uint32_t pc) throw std::runtime_error("No instruction for opcode"); } - Instruction &inst = *insmap[opcode]; + Instruction const &inst = *insmap[opcode]; ConcreteInstruction ci(inst); m_instructions.emplace(pc, ci); @@ -127,11 +132,12 @@ void DisassemblyPass::enqueue_next(uint32_t pc) void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, ConcreteInstruction &inst) { - if(!inst.isterminal() && !inst.isjump()) + if(!inst.terminal && !inst.jump) { + if(pc == 0x80000078) printf("t%d j%d\n", inst.terminal, inst.jump); if(!m_seen.count(pc + 2)) enqueue(pc + 2); } - if(inst.isjump() || inst.iscondjump()) + if(inst.jump || inst.condjump) { if(!m_seen.count(inst.jmptarget)) enqueue(inst.jmptarget); } @@ -140,11 +146,11 @@ void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, void DisassemblyPass::enqueue_all_successors(uint32_t pc, ConcreteInstruction &inst) { - if(!inst.isterminal() && !inst.isjump()) + if(!inst.terminal && !inst.jump) { enqueue(pc + 2); } - if(inst.isjump() || inst.iscondjump()) + if(inst.jump || inst.condjump) { enqueue(inst.jmptarget); } diff --git a/lib/domains/relconst.cpp b/lib/domains/relconst.cpp index 0ca3d92..d062043 100644 --- a/lib/domains/relconst.cpp +++ b/lib/domains/relconst.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include namespace FxOS { @@ -47,7 +49,7 @@ RelConst RelConstDomain::minus(RelConst r) const noexcept if(r.spe) return r; /* This domain does not support multiplicative coefficients for the base. If the base is non-zero, return Top. */ - if(r.arg || r.org || r.reg) return top(); + if(r.base) return top(); r.ival = -r.ival; return r; @@ -217,7 +219,7 @@ bool RelConstDomain::cmp(RelConst r1, RelConst r2) const noexcept int RelConstDomain::cmpu(RelConst r1, RelConst r2) const noexcept { - /* We can't just substract because of overflows (information is lost + /* We can't just subtract because of overflows (information is lost because we don't have the V bit) */ return (r1.uval > r2.uval) - (r1.uval < r2.uval); } @@ -227,4 +229,42 @@ int RelConstDomain::cmps(RelConst r1, RelConst r2) const noexcept return (r1.ival > r2.ival) - (r1.ival < r2.ival); } +//--- +// Other functions +//--- + +RelConst::operator bool () const noexcept +{ + return !spe; +} + +std::string RelConst::str() const noexcept +{ + using RegName = CpuRegister::CpuRegisterName; + + if(!base && !uval) return "0"; + if(spe == Bottom) return "Bottom"; + if(spe == Top) return "Top"; + + std::string str; + if(arg) str = format("arg%d", arg); + if(org) str = format("org_%s", CpuRegister((RegName)org).str()); + if(reg) str = CpuRegister((RegName)org).str(); + + if(!uval) return str; + + if(ival >= -256 && ival < 256) + { + uint32_t v = 0; + if(str.size() && ival > 0) str += "+", v = ival; + if(str.size() && ival < 0) str += "-", v = -ival; + + return str + format("%d", v); + } + else + { + return str + format("%08x", uval); + } +} + } /* namespace FxOS */ diff --git a/lib/lang.cpp b/lib/lang.cpp index 7eb2f14..f57d1d2 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -53,13 +53,10 @@ static std::map regnames = { /* Construction from string - pretty slow */ CpuRegister::CpuRegister(std::string name) { - for(auto it = regnames.begin(); it != regnames.end(); it++) + for(auto &it: regnames) if(it.second == name) { - if(it->second == name) - { - m_name = it->first; - return; - } + m_name = it.first; + return; } throw std::invalid_argument("invalid CpuRegister name"); @@ -159,13 +156,13 @@ std::string Argument::str() const switch(kind) { case Argument::Reg: - return format("r%d", base); + return base.str(); case Argument::Deref: - return format("@r%d", base); + return format("@%s", base.str()); case Argument::PostInc: - return format("@r%d+", base); + return format("@%s+", base.str()); case Argument::PreDec: - return format("@-%dr", base); + return format("@-%s", base.str()); case Argument::StructDeref: return format("@(%d,%s)", disp, base.str().c_str()); case Argument::ArrayDeref: @@ -175,6 +172,8 @@ std::string Argument::str() const return format("@(%d,pc)", disp); case Argument::PcJump: return format("pc+%d", disp); + case Argument::Imm: + return format("#%d", imm); default: return "(invalid)"; } @@ -263,4 +262,9 @@ bool Instruction::isdelayed() const noexcept return false; } +bool Instruction::isvaliddelayslot() const noexcept +{ + return !isdelayed() && !isterminal() && !isjump() && !iscondjump(); +} + } /* namespace FxOS */ diff --git a/lib/library.cpp b/lib/library.cpp index e97061a..8bbde2c 100644 --- a/lib/library.cpp +++ b/lib/library.cpp @@ -91,7 +91,7 @@ void Library::load(std::string path) long long ns = timer_end(start); - log(LOG "done (%s)", timer_format(ns)); + log(LOG "%s", timer_format(ns)); } } /* namespace FxOS */ diff --git a/lib/load-asm.l b/lib/load-asm.l index 734333e..3a909e8 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -214,16 +214,16 @@ static Argument make_arg(int token, int opsize, int m, int n, int d, int i) case AT_RMP: return Argument_PostInc(Rm); case AT_RNP: return Argument_PostInc(Rn); case AT_MRN: return Argument_PreDec(Rn); - case AT_DRN: return Argument_StructDeref(d, opsize, Rn); - case AT_DRM: return Argument_StructDeref(d, opsize, Rm); - case AT_DGBR: return Argument_StructDeref(d, opsize, Reg::GBR); + case AT_DRN: return Argument_StructDeref(d*opsize, opsize, Rn); + case AT_DRM: return Argument_StructDeref(d*opsize, opsize, Rm); + case AT_DGBR: return Argument_StructDeref(d*opsize, opsize, Reg::GBR); case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn); case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm); case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR); case AT_DPC: if(!opsize) err("@(disp,pc) must have a size (.w, .l)"); - return Argument_PcRel(d, opsize); + return Argument_PcRel(d*opsize, opsize); } throw std::logic_error("lex asm builds args from bad tokens"); diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index 40254a5..c7fa211 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -1,12 +1,15 @@ //--- -// fxos.passes.cfg: CFG construction, as used by other passes +// fxos.passes.cfg: Control Flow Graph construction //--- #include #include #include +#include #include +using namespace FxOS::Log; + namespace FxOS { CfgPass::CfgPass(Disassembly &disasm): @@ -16,21 +19,59 @@ CfgPass::CfgPass(Disassembly &disasm): void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) { - std::vector jump_mnemonics { - "bra", "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s", - }; + /* Compute the jump target for jump instructions. This is easy because + they are all trivially computable. (...If they are not we dub them + "terminal" to avoid the computation!) */ + uint32_t jmptarget = 0xffffffff; - /* Set the jmptarget fields whenever needed. This is easy because jump - instructions have trivially computable destinations */ - for(auto mnemonic: jump_mnemonics) + if(ci.inst.isjump() || ci.inst.iscondjump()) { - if(ci.inst.mnemonic != mnemonic) continue; auto &args = ci.inst.args; if(args.size() != 1 || args[0].kind != Argument::PcJump) throw LangError(pc, "invalid jump instruction"); - ci.jmptarget = pc + args[0].disp; + jmptarget = (pc+4) + args[0].disp; + + /* Make the target of the jump a leader */ + ConcreteInstruction &target = m_disasm.readins(jmptarget); + target.leader = true; + + /* Check that it's not in a delay slot */ + if(target.delayslot) + throw LangError(pc, "jump into a delay slot!"); + } + + /* If this instruction is in a delay slot, check its type. A valid + delay slot has no branching properties on its own, so nothing new to + set in the properties. */ + if(ci.delayslot) + { + if(!ci.inst.isvaliddelayslot()) + throw LangError(pc, "invalid delay slot"); + } + /* Handle normal instructions */ + else if(!ci.inst.isdelayed()) + { + ci.terminal = ci.inst.isterminal(); + ci.jump = ci.inst.isjump(); + ci.condjump = ci.inst.iscondjump(); + ci.jmptarget = jmptarget; + } + /* Create a new delay slot */ + else + { + ConcreteInstruction &slot = m_disasm.readins(pc+2); + if(slot.leader) + throw LangError(pc+2, "leader in a delay slot!"); + if(!slot.inst.isvaliddelayslot()) + throw LangError(pc+2, "invalid delay slot"); + + slot.delayslot = true; + slot.terminal = ci.inst.isterminal(); + slot.jump = ci.inst.isjump(); + slot.condjump = ci.inst.iscondjump(); + slot.jmptarget = jmptarget; } enqueue_unseen_successors(pc, ci); diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index 9fad8ce..b91f153 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -1,8 +1,8 @@ //--- -// fxos.passes.pcrel: PC-relative addressing resolution +// fxos.passes.pcrel: Resolution of PC-relative addresses //--- -#include +#include namespace FxOS { @@ -13,6 +13,28 @@ PcrelPass::PcrelPass(Disassembly &disasm): void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) { + Instruction const &i = ci.inst; + + for(size_t n = 0; n < i.args.size(); n++) + { + Argument const &a = i.args[n]; + ConcreteInstructionArg &ca = ci.args[n]; + + if(a.kind == Argument::PcRel) + { + uint32_t target = (pc & ~(a.opsize - 1)) + 4 + a.disp; + ca.location = RelConstDomain().constant(target); + + /* Also compute the value */ + uint32_t value = m_disasm.target(). + } + else if(a.kind == Argument::PcJump) + { + uint32_t target = pc + 4 + a.disp; + ca.location = RelConstDomain().constant(target); + } + } + enqueue_unseen_successors(pc, ci); } diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index b377219..94b1288 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -2,6 +2,7 @@ // fxos.passes.print: Print disassembly //--- +#include #include namespace FxOS { @@ -9,12 +10,60 @@ namespace FxOS { PrintPass::PrintPass(Disassembly &disasm): DisassemblyPass(disasm) { + /* Default parameter set */ + hide_resolved_pcjump = false; + hide_resolved_pcrel = false; + hide_movpc_address = Hide_MovPC_Never; +} + +void PrintPass::run(void) +{ + for(auto &pair: m_disasm.instructions()) + { + analyze(pair.first, pair.second); + } } void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) { - std::cout << ci.inst.mnemonic << "\n"; - enqueue_next(pc); + Instruction const &i = ci.inst; + + /* Mnemonic */ + + static std::map suffixes = { + { 1, ".b" }, { 2, ".w" }, { 4, ".l" } }; + + std::string mnemonic = i.mnemonic + suffixes[i.opsize]; + if(i.args.size()) + mnemonic += std::string(8 - mnemonic.size(), ' '); + + printf(" %08x: %04x %s", pc, ci.inst.opcode, mnemonic.c_str()); + + /* Arguments */ + + for(size_t n = 0; n < i.args.size(); n++) + { + auto &a = i.args[n]; + Location &l = ci.args[n].location; + + if(n) printf(", "); + + if(a.kind == Argument::PcJump && l && hide_resolved_pcjump) + { + printf("<%s>", l.str().c_str()); + } + else if(a.kind == Argument::PcRel && l && hide_resolved_pcrel) + { + printf("<%s>", l.str().c_str()); + } + else + { + printf("%s", a.str().c_str()); + if(l) printf(" <%s>", l.str().c_str()); + } + } + + printf("\n"); } } /* namespace FxOS */ diff --git a/lib/semantics.cpp b/lib/semantics.cpp index 9579456..4696d97 100644 --- a/lib/semantics.cpp +++ b/lib/semantics.cpp @@ -2,4 +2,92 @@ namespace FxOS { +DataValue::DataValue(): + type(nullptr), determined(false) +{ +} + +uint32_t DataValue::read(int offset, int size) +{ + if(offset < 0 || size < 0 || (type->size && offset+size >= type->size)) + throw std::logic_error("Invalid read into simulated data"); + if(!size || size & (size - 1)) + throw std::logic_error("Simulated read not a power of 2"); + + if(type->kind==DataType::Integral || type->kind==DataType::BitField) + { + int shift = (4 - size - offset) << 3; + uint32_t u = uinteger >> shift; + return (size == 4) ? u : + (size == 2) ? (uint16_t)u : + (uint8_t)u; + } + else if(type->kind == DataType::Array) + { + int elsize = type->arraytype->size; + int index = offset / elsize; + + if(index >= (int)entries.size()) + throw std::logic_error("Read out of array bounds"); + + /* Will throw if reading from two entries simultaneously */ + return entries[index].read(offset % elsize, size); + } + else if(type->kind == DataType::Array) + { + int index = 0; + while(offset >= type->attributes[index].size) + { + index++; + offset -= type->attributes[index].size; + } + + return entries[index].read(offset, size); + } + + throw std::logic_error("Read into unknown DataValue kind"); +} + +void DataValue::write(int offset, int size, uint32_t contents) +{ + if(offset < 0 || size < 0 || (type->size && offset+size >= type->size)) + throw std::logic_error("Invalid write into simulated data"); + if(!size || size & (size - 1)) + throw std::logic_error("Simulated write not a power of 2"); + + if(type->kind==DataType::Integral || type->kind==DataType::BitField) + { + uint32_t mask = (size == 1) ? 0xff : + (size == 2) ? 0xffff : + 0xffffffff; + + int shift = (4 - size - offset) << 3; + + uinteger = (uinteger & ~(mask << shift)) | (contents << shift); + } + else if(type->kind == DataType::Array) + { + int elsize = type->arraytype->size; + int index = offset / elsize; + + if(index >= (int)entries.size()) + throw std::logic_error("Write out of array bounds"); + + entries[index].write(offset % elsize, size, contents); + } + else if(type->kind == DataType::Array) + { + int index = 0; + while(offset >= type->attributes[index].size) + { + index++; + offset -= type->attributes[index].size; + } + + entries[index].write(offset, size, contents); + } + + throw std::logic_error("Write into unknown DataValue kind"); +} + } /* namespace FxOS */ diff --git a/lib/target.cpp b/lib/target.cpp index 6ba0868..2644462 100644 --- a/lib/target.cpp +++ b/lib/target.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -150,7 +151,7 @@ char const *Target::translate(uint32_t addr, int size) const catch(std::out_of_range &e) {} } - throw std::out_of_range("Out of target bindings"); + throw AddrError(addr, size, "out of target bindings"); } uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern, @@ -159,7 +160,7 @@ uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern, uint32_t occurrence; if(end < start || !covers(start, end - start)) { - throw std::out_of_range("Out of target bindings"); + throw AddrError(start, end-start, "out of target bindings"); } for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++)