From d5c5fa6aeb5c5406230c51b39cd0f3909178adaa Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sat, 15 Feb 2020 18:42:14 +0100 Subject: [PATCH] implement the syscall pass, and symbol resolution New features: * The syscall pass now resolves syscalls for the input target, provided that an OS is mapped on the ROM region. * Formalized the variations of print's arguments as a sequence (tree, to be precise) of /promotions/. * Added a short notion of Symbol and SymbolTable, and a loader for them. Data files of type "symbol" are read as such and provide name to syscalls or arbitrary addresses. Code changes: * The disassembly operation of the command-line interface is now finally in its own file with more room. * Encoded the tree structure of promotions as a sequence of (mainly tail-calling) inter-calling methods in the print pass. --- Makefile | 2 - data/base-types.txt | 24 ----- fxos/disassembly.cpp | 62 ++++++++++++ fxos/fxos-cli.h | 6 ++ fxos/main.cpp | 48 ++-------- include/fxos/disasm-passes/print.h | 65 +++++++++---- include/fxos/disasm-passes/syscall.h | 31 ++++++ include/fxos/disassembly.h | 6 +- include/fxos/domains.h | 4 +- include/fxos/library.h | 6 ++ include/fxos/load.h | 5 + include/fxos/semantics.h | 14 ++- include/fxos/symbols.h | 45 +++++++++ include/fxos/target.h | 6 ++ lib/disassembly.cpp | 2 +- lib/domains/relconst.cpp | 7 ++ lib/library.cpp | 10 ++ lib/load-symbols.l | 114 ++++++++++++++++++++++ lib/passes/pcrel.cpp | 5 +- lib/passes/print.cpp | 138 ++++++++++++++++++++------- lib/passes/syscall.cpp | 52 ++++++++++ lib/semantics.cpp | 8 ++ lib/symbols.cpp | 21 ++++ lib/target.cpp | 15 +++ 24 files changed, 563 insertions(+), 133 deletions(-) delete mode 100644 data/base-types.txt create mode 100644 fxos/disassembly.cpp create mode 100644 include/fxos/disasm-passes/syscall.h create mode 100644 include/fxos/symbols.h create mode 100644 lib/load-symbols.l create mode 100644 lib/passes/syscall.cpp create mode 100644 lib/symbols.cpp diff --git a/Makefile b/Makefile index a18179d..4dc68b7 100644 --- a/Makefile +++ b/Makefile @@ -101,9 +101,7 @@ endif install: $(TARGETS) install -d $(PREFIX)/bin - install -d $(PREFIX)/share/fxos install $(TARGETS) $(m755) $(PREFIX)/bin - cp -ra data/* $(PREFIX)/share/fxos uninstall: rm -f $(TARGETS:%=$(PREFIX)/%) diff --git a/data/base-types.txt b/data/base-types.txt deleted file mode 100644 index 9fe73b5..0000000 --- a/data/base-types.txt +++ /dev/null @@ -1,24 +0,0 @@ -type: types -name: base ---- - -u32 StatusRegister { - u _ :1; - u MD :1; - u RB :1; - u BL :1; - u _ :20; - u IMASK :4; - u _ :3; - u T :1; -} - -struct MountTableEntry { - u32 _; - u32 _; - u32 _; - char[20] path2; - char[18] path1; - u8 mounted; - u8 _; -} diff --git a/fxos/disassembly.cpp b/fxos/disassembly.cpp new file mode 100644 index 0000000..e3ffdda --- /dev/null +++ b/fxos/disassembly.cpp @@ -0,0 +1,62 @@ +#include "fxos-cli.h" +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace FxOS; +using namespace FxOS::Log; + +void disassembly(Library &library, Target &target, uint32_t ref, + std::vector passes) +{ + Disassembly disasm(target); + + /* Observe the target only if it has an OS mapped */ + std::unique_ptr os; + if(target.covers(MemoryRegion::ROM)) + os = std::make_unique(target); + + for(auto pass: passes) + { + auto start = timer_start(); + log(LOG "Running pass %s...\\", pass); + + if(pass == "cfg") + { + CfgPass p(disasm); + p.run(ref); + } + else if(pass == "pcrel") + { + PcrelPass p(disasm); + p.run(ref); + } + else if(pass == "syscall") + { + SyscallPass p(disasm, os.get()); + p.run(ref); + } + else if(pass == "print") + { + PrintPass p(disasm, library.sym_tables()); + + p.promote_pcjump_loc = PrintPass::Promote; + p.promote_pcrel_loc = PrintPass::Promote; + p.promote_pcrel_value = PrintPass::Promote; + p.promote_syscall = PrintPass::Promote; + p.promote_syscallname = PrintPass::Append; + p.promote_symbol = PrintPass::Append; + + p.run(); + } + log(LOG "%s", timer_format(timer_end(start))); + } +} diff --git a/fxos/fxos-cli.h b/fxos/fxos-cli.h index 900da19..ede5e4f 100644 --- a/fxos/fxos-cli.h +++ b/fxos/fxos-cli.h @@ -6,9 +6,15 @@ #define FXOS_CLI_H #include +#include #include +#include /* Print general information on an OS file */ void os_info(FxOS::Target &target); +/* Disassemble */ +void disassembly(FxOS::Library &library, FxOS::Target &target, uint32_t ref, + std::vector passes); + #endif /* FXOS_CLI_H */ diff --git a/fxos/main.cpp b/fxos/main.cpp index 45062fa..9105e88 100644 --- a/fxos/main.cpp +++ b/fxos/main.cpp @@ -8,16 +8,12 @@ #include #include -#include -#include -#include - #include #include -#include #include #include +#include #include #include @@ -80,11 +76,10 @@ Available passes: pcrel Resolve PC-relative references as their target address cstprop Propagate constants by abstract interpretation syscall Annotate code with reverse syscalls - regs Annotate code with peripheral register addresses -The default sequence of passes is cfg,pcrel,cstprop,syscall,regs. When -disassembling a function (ie. no size specified on the command-line), the cfg -pass is always executed to explore the function. +The default sequence of passes is cfg,pcrel,cstprop,syscall. When disassembling +a function (ie. no size specified on the command-line), the cfg pass is always +executed to explore the function. ANALYZE COMMAND @@ -221,9 +216,10 @@ int main_info(int argc, char **argv) int main_disassembly(int argc, char **argv) { - int error=0, option=0, mpu='4'; + int error=0, option=0; + __attribute__((unused)) int mpu='4'; std::vector passes { - "cfg", "pcrel", "constprop", "syscall", "regs", "print" + "cfg", "pcrel", "constprop", "syscall", "print" }; std::string file; @@ -304,39 +300,11 @@ int main_disassembly(int argc, char **argv) uint32_t ref; sscanf(refstr, "%x", &ref); - Disassembly disasm(target); - OS *os = nullptr; - log(LOG "Disassembling target %s at %s", tname, refstr); try { - for(auto pass: passes) - { - auto start = timer_start(); - log(LOG "Running pass %s...\\", pass); - - if(pass == "cfg") - { - CfgPass p(disasm); - p.run(ref); - } - else if(pass == "pcrel") - { - PcrelPass p(disasm); - p.run(ref); - } - else if(pass == "print") - { - PrintPass p(disasm); - p.hide_resolved_pcjump = true; - p.hide_resolved_pcrel = true; - p.hide_movpc_address = - PrintPass::Hide_MovPC_Region; - p.run(); - } - log(LOG "%s", timer_format(timer_end(start))); - } + disassembly(lib, target, ref, passes); } catch(LangError &e) { diff --git a/include/fxos/disasm-passes/print.h b/include/fxos/disasm-passes/print.h index 7b5f5ab..b489976 100644 --- a/include/fxos/disasm-passes/print.h +++ b/include/fxos/disasm-passes/print.h @@ -9,13 +9,15 @@ #define LIBFXOS_DISASM_PASSES_PRINT_H #include +#include namespace FxOS { class PrintPass: public DisassemblyPass { public: - PrintPass(Disassembly &disasm); + PrintPass(Disassembly &disasm, + std::vector const &symtables); void analyze(uint32_t pc, ConcreteInstruction &inst) override; /* This pass uses another entry method that starts at the instruction @@ -27,31 +29,54 @@ public: // Print pass parameters //--- - /* In jump instructions, hide the raw value "pc+" if the target - address has been computed */ - bool hide_resolved_pcjump; - /* In PC-relative move instructions, hide the raw value "@(,pc)" - of the argument if the target address has been computed */ - bool hide_resolved_pcrel; + /* Promotion parameters. Default is always to append. */ + enum Promotion { + /* Never promote */ + Never=1, + /* Promote but keep the lower-level information */ + Append=0, + /* Promove and hide the lower-level information */ + Promote=2, + }; - /* In PC-relative move instructions, hide the target address of the - argument... under conditions */ - enum { - /* Always show the accessed address */ - Hide_MovPC_Never, - /* Hide the address if it's in the same memory region as the - instruction doing the move */ - Hide_MovPC_Region, - /* Always hide the address */ - Hide_MovPC_Always, + /** In the following, promote_x always means promote *to x* **/ - } hide_movpc_address; + /* In jumps, promote "pc+" to the target address */ + int promote_pcjump_loc; + /* In a PC-relative mov, promote "@(,pc)" to computed address */ + int promote_pcrel_loc; + /* In a PC-relative mov, promote address to pointed value */ + int promote_pcrel_value; + /* Promote an integer to a syscall number */ + int promote_syscall; + /* Promote a syscall number to a syscall name */ + int promote_syscallname; + /* Promote an integer to a symbol */ + int promote_symbol; /* TODO: More print pass parameters */ private: - void pcrel(uint32_t pc, Argument const &a, Location const &l, - std::optional v); + /* Symbol tables to look up names */ + std::vector const &m_symtables; + + /* Query symbol tables, most recent first */ + std::optional symquery(Symbol::Type type, uint32_t value); + + /** Internal promotion printing stuff **/ + + void queue(std::string, bool = false); + void queue_flush(); + std::vector m_messages; + + void pcjumploc(ConcreteInstructionArg const &); + void pcrelloc(ConcreteInstructionArg const &); + void pcrelval(ConcreteInstructionArg const &); + void syscall(ConcreteInstructionArg const &); + void syscallname(ConcreteInstructionArg const &); + void symbol(ConcreteInstructionArg const &); + + /* To be removed */ }; } /* namespace FxOS */ diff --git a/include/fxos/disasm-passes/syscall.h b/include/fxos/disasm-passes/syscall.h new file mode 100644 index 0000000..351d6a6 --- /dev/null +++ b/include/fxos/disasm-passes/syscall.h @@ -0,0 +1,31 @@ +//--- +// fxos.disasm-passes.syscall: Detection and substitution of syscall addresses +// +// This passes looks for insruction arguments that evaluate to syscall +// addresses, and substitutes to that the syscall number and (hopefully) the +// syscall name will be shown by the print pass if it's available in the +// documentation. +//--- + +#ifndef FXOS_DISASM_PASSES_SYSCALL_H +#define FXOS_DISASM_PASSES_SYSCALL_H + +#include +#include + +namespace FxOS { + +class SyscallPass: public DisassemblyPass +{ +public: + SyscallPass(Disassembly &disasm, OS *os); + + void analyze(uint32_t pc, ConcreteInstruction &inst) override; + +private: + OS *m_os; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_DISASM_PASSES_SYSCALL_H */ diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 6e78473..1ecf248 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -30,7 +30,7 @@ struct ConcreteInstructionArg ConcreteInstructionArg(); //--- - // Data set by the pass and abstract interpretater + // Data set by the pass and abstract interpreter //--- /* Location in CPU or memory, if that can be determined */ @@ -42,13 +42,11 @@ struct ConcreteInstructionArg DataValue value; //--- - // Data set by the and passes + // Data set by the pass //--- /* If the value is a syscall address, the syscall's id */ int syscall_id; - /* If the value is a peripheral register, its address */ - uint32_t reg_address; }; /* A loaded and annotated instruction. */ diff --git a/include/fxos/domains.h b/include/fxos/domains.h index 85cee51..83ed228 100644 --- a/include/fxos/domains.h +++ b/include/fxos/domains.h @@ -21,9 +21,10 @@ public: /* Construct abstract value from integer constant */ virtual T constant(uint32_t value) const noexcept = 0; - /* Check if value is constant */ virtual bool is_constant(T) const noexcept = 0; + /* Unpack a constant */ + virtual uint32_t constant_value(T) const = 0; /* Basic arithmetic. Division and modulo are both non-trivial instruction sequences usually isolated in easily-identifiable @@ -130,6 +131,7 @@ public: RelConst constant(uint32_t value) const noexcept override; bool is_constant(RelConst) const noexcept override; + uint32_t constant_value(RelConst) const override; RelConst minus(RelConst) const noexcept override; RelConst add(RelConst, RelConst) const noexcept override; diff --git a/include/fxos/library.h b/include/fxos/library.h index 2ba6ec5..af4a27c 100644 --- a/include/fxos/library.h +++ b/include/fxos/library.h @@ -6,6 +6,7 @@ #define FXOS_LIBRARY_H #include +#include #include #include @@ -39,11 +40,16 @@ struct Library const std::vector &asm_tables() { return m_asmtables; } + /* List of symbol tables */ + const std::vector &sym_tables() { + return m_symtables; + } private: std::vector m_paths; std::map m_targets; std::vector m_asmtables; + std::vector m_symtables; }; } /* namespace FxOS */ diff --git a/include/fxos/load.h b/include/fxos/load.h index 09912e6..30e7e92 100644 --- a/include/fxos/load.h +++ b/include/fxos/load.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -41,6 +42,10 @@ void load_asm(Buffer const &file, size_t offset, size_t line); the complete target description */ TargetDescription load_target(Buffer const &file, size_t offset, size_t line); +/* Load a symbol table. This function returns the full table, which may contain + duplicates or unused syscall numbers and addresses. */ +SymbolTable load_symbols(Buffer const &file, size_t offset, size_t line); + } /* namespace FxOS */ #endif /* LIBFXOS_LOAD_H */ diff --git a/include/fxos/semantics.h b/include/fxos/semantics.h index 66aafc5..17e3ce8 100644 --- a/include/fxos/semantics.h +++ b/include/fxos/semantics.h @@ -85,7 +85,7 @@ struct StringType: public BaseType bool nul_terminated; }; -/* Heterogenous structure types. */ +/* Heterogeneous structure types. */ struct StructType: public BaseType { /* Fields can be of any type since all are fixed-size. */ @@ -143,17 +143,20 @@ struct DataValue DataType const *type; std::vector mem; - /* Create value with no memory and no tyê */ + /* Create value with no memory and no type */ DataValue(); /* Create value with uninitialized memory for that data type */ DataValue(DataType const *type); /* Check whether the value is fully defined and initialized */ - bool defined() { + bool defined() const { return std::find(mem.begin(), mem.end(), -1) == mem.end(); } + operator bool() const { + return defined(); + } - /* Checks that the access is correct and fits witin the value. */ + /* Checks that the access is correct and fits within the value. */ void access(size_t offset, size_t size) const; /* Read data from the value. Access must be 1, 2 or 4 bytes (possibly unaligned) and must be in bounds. */ @@ -161,6 +164,9 @@ struct DataValue /* Write data. Access must be 1, 2 or 4 bytes and in bounds. */ void write(size_t offset, size_t size, uint32_t contents); + /* Retrieve value as uin32_t - only valid for Integer types */ + uint32_t uinteger() const; + /* Byte-based string representation */ std::string str() const noexcept; }; diff --git a/include/fxos/symbols.h b/include/fxos/symbols.h new file mode 100644 index 0000000..9b3ebf1 --- /dev/null +++ b/include/fxos/symbols.h @@ -0,0 +1,45 @@ +//--- +// fxos.symbols: User-defined symbols for OS objects +//--- + +#ifndef LIBFXOS_SYMBOLS_H +#define LIBFXOS_SYMBOLS_H + +#include +#include +#include + +namespace FxOS +{ + +/* A named symbol that can be substituted to literal values in the code. */ +struct Symbol +{ + /* Syscall: The value is a syscall number. The syscall number for an + address is determined by querying the OS object. + Address: The value is a fixed 32-bit virtual address. */ + enum Type { Syscall=1, Address=2 }; + + enum Type type; + uint32_t value; + + /* Symbol name, no particular conventions */ + std::string name; +}; + +/* A symbol table, essentially a set of symbols loaded from the same file */ +struct SymbolTable +{ + std::string table_name; + std::vector symbols; + + /* Add a symbol to the table */ + void add(Symbol s); + /* Query a value for a certain type of symbol */ + std::optional query(Symbol::Type type, uint32_t value) + const; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_SYMBOLS_H */ diff --git a/include/fxos/target.h b/include/fxos/target.h index b452d66..05c77a7 100644 --- a/include/fxos/target.h +++ b/include/fxos/target.h @@ -20,6 +20,8 @@ class AbstractMemory public: /* Checks if an address or interval is simulated */ virtual bool covers(uint32_t addr, int size=1) const noexcept = 0; + /* Check if a full region is simulated */ + virtual bool covers(MemoryRegion const ®ion) const noexcept; /* Returns the data located at the provided virtual address. Throws std::out_of_range if the interval is not entirely simulated */ @@ -75,6 +77,8 @@ struct Binding: public AbstractMemory /* Checks if an address is covered by the binding */ bool covers(uint32_t addr, int size=1) const noexcept override; + /* Check if a region is covered by the binding */ + bool covers(MemoryRegion const ®ion) const noexcept override; /* Returns this process' address (in [data]) corresponding to the provided virtual address */ @@ -118,6 +122,8 @@ public: /* Check if an address is bound */ bool covers(uint32_t addr, int size=1) const noexcept override; + /* Check if a full region is bound */ + bool covers(MemoryRegion const ®ion) const noexcept override; /* Returns the data at the provided virtual address */ char const *translate(uint32_t addr, int size=1) const override; diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index fff8964..fec81b5 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -27,7 +27,7 @@ void register_instruction(Instruction ins) //--- ConcreteInstructionArg::ConcreteInstructionArg(): - value(), syscall_id(-1), reg_address((uint32_t)-1) + value(), syscall_id(-1) { location = RelConstDomain().bottom(); } diff --git a/lib/domains/relconst.cpp b/lib/domains/relconst.cpp index f651397..f8ee80b 100644 --- a/lib/domains/relconst.cpp +++ b/lib/domains/relconst.cpp @@ -44,6 +44,13 @@ bool RelConstDomain::is_constant(RelConst r) const noexcept return r.base == 0; } +uint32_t RelConstDomain::constant_value(RelConst r) const +{ + if(!is_constant(r)) + throw std::invalid_argument("Not a constant RelConst"); + return r.uval; +} + //--- // Basic arithmetic //--- diff --git a/lib/library.cpp b/lib/library.cpp index 8bbde2c..2c89ffc 100644 --- a/lib/library.cpp +++ b/lib/library.cpp @@ -83,6 +83,16 @@ void Library::load(std::string path) log(ERR "%s", e.str()); } } + else if(type == "symbols") + { + try { + SymbolTable st = load_symbols(file, offset, line); + m_symtables.push_back(st); + } + catch(FxOS::SyntaxError &e) { + log(ERR "%s", e.str()); + } + } else { log(ERR "unknown file type '%s' in '%s'", type, path); diff --git a/lib/load-symbols.l b/lib/load-symbols.l new file mode 100644 index 0000000..315d7c8 --- /dev/null +++ b/lib/load-symbols.l @@ -0,0 +1,114 @@ +%{ +#include +#include +#include +#include + +#include + +/* Text value and integer value for parser */ +static char *yylval; +uint32_t yyival; + +/* Tokens */ +#define SYSCALL 1 +#define ADDRESS 2 +#define NAME 3 + +/* Current file name */ +static std::string filename; + +/* Error messages and exceptions */ +static void err(char const *format, ...) +{ + static char buf[256]; + + va_list args; + va_start(args, format); + vsnprintf(buf, 256, format, args); + va_end(args); + + throw FxOS::SyntaxError(filename.c_str(), yylineno, buf); +} + +%} + +%option prefix="symbols" +%option noyywrap +%option nounput + +syscall ^%[0-9A-Fa-f]{3,} +address ^[0-9A-Fa-f]{8} +name [a-zA-Z_][a-zA-Z_0-9.]* +space [ \t]+ + +%% + +^#[^\n]* ; +{space} ; +[\n] yylineno++; + +{syscall} { yyival = strtol(yytext+1, NULL, 16); return SYSCALL; } +{address} { yyival = strtol(yytext, NULL, 16); return ADDRESS; } +{name} { yylval = strdup(yytext); return NAME; } + +. { err("lex error near '%s'", yytext); } +<> { return -1; } + +%% + +namespace FxOS { + +/* Load a symbol table into the disassembler */ +SymbolTable load_symbols(Buffer const &file, size_t start_offset, + size_t start_line) +{ + YY_BUFFER_STATE buf = yy_scan_bytes(file.data + start_offset, + file.size - start_offset); + yylineno = start_line; + filename = file.path; + + SymbolTable table; + + /* Current symbol and line */ + Symbol symbol; + int line = -1; + + while(1) + { + int t = yylex(); + + if(line >= 0 && (yylineno != line || t != NAME || t == -1)) + { + /* Finalize current symbol */ + if(symbol.name == "") err("%d: missing name", line); + else table.add(symbol); + + symbol = Symbol(); + } + if(t == -1) break; + + if(t == SYSCALL) + { + symbol.type = Symbol::Syscall; + symbol.value = yyival; + line = yylineno; + } + else if(t == ADDRESS) + { + symbol.type = Symbol::Address; + symbol.value = yyival; + line = yylineno; + } + else if(t == NAME) + { + symbol.name = yylval; + free(yylval); + } + } + + yy_delete_buffer(buf); + return table; +} + +} /* namespace FxOS */ diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index 18ae315..a5bae6c 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -44,12 +44,15 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) } ca.value = DataValue(IntegerType::u32); - ca.value.write(0,4,v); + ca.value.write(0, 4, v); } else if(a.kind == Argument::PcJump) { uint32_t addr = pc + 4 + a.disp; ca.location = RelConstDomain().constant(addr); + + ca.value = DataValue(IntegerType::u32); + ca.value.write(0, 4, addr); } } diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index 2c60588..d8f9cc6 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -5,15 +5,16 @@ #include #include +#include + namespace FxOS { -PrintPass::PrintPass(Disassembly &disasm): - DisassemblyPass(disasm) +PrintPass::PrintPass(Disassembly &disasm, + std::vector const &symtables): + DisassemblyPass(disasm), m_symtables(symtables) { /* Default parameter set */ - hide_resolved_pcjump = false; - hide_resolved_pcrel = false; - hide_movpc_address = Hide_MovPC_Never; + /* All 0 */ } void PrintPass::run(void) @@ -43,59 +44,124 @@ void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) for(size_t n = 0; n < i.args.size(); n++) { - auto &a = i.args[n]; - Location &l = ci.args[n].location; - std::optional v = ci.args[n].value; + Argument const &a = i.args[n]; + ConcreteInstructionArg const &arg = ci.args[n]; if(n) printf(", "); if(a.kind == Argument::PcJump) { - if(!l || !hide_resolved_pcjump) - printf("%s", a.str().c_str()); - if(l) - printf("<%s>", l.str().c_str()); + queue(a.str()); + pcjumploc(arg); + queue_flush(); } else if(a.kind == Argument::PcRel) { - pcrel(pc, a, l, v); + queue(a.str()); + pcrelloc(arg); + queue_flush(); } else { - printf("%s", a.str().c_str()); + queue(a.str()); + queue_flush(); } } printf("\n"); } -void PrintPass::pcrel(uint32_t pc, Argument const &a, Location const &l, - std::optional v) +std::optional PrintPass::symquery(Symbol::Type type, + uint32_t value) { - if(!l || !hide_resolved_pcrel) + for(int i = m_symtables.size() - 1; i >= 0; i--) { - printf("%s", a.str().c_str()); + auto maybe_str = m_symtables[i].query(type, value); + if(maybe_str) return maybe_str; } - if(!l || !RelConstDomain().is_constant(l)) return; - auto reg_code = MemoryRegion::region_for(pc); - auto reg_data = MemoryRegion::region_for(l.uval); - - bool hma = hide_movpc_address; - bool same_region = (reg_code && reg_code == reg_data); - - if(!v || hma == Hide_MovPC_Never || - (hma == Hide_MovPC_Region && !same_region)) - { - printf("<%s>", l.str().c_str()); - if(v) - printf("(%s)", v->str().c_str()); - } - else if(v) - { - printf("%s", v->str().c_str()); - } + return std::nullopt; } +void PrintPass::queue(std::string str, bool override) +{ + if(override && m_messages.size()) + m_messages.pop_back(); + + m_messages.push_back(str); +} + +void PrintPass::queue_flush() +{ + for(size_t i = 0; i < m_messages.size(); i++) + { + if(i != 0) printf(" "); + printf("%s", m_messages[i].c_str()); + } + + m_messages.clear(); +} + +void PrintPass::pcjumploc(ConcreteInstructionArg const &arg) +{ + if(!RelConstDomain().is_constant(arg.location)) return; + if(promote_pcjump_loc == Never) return; + + queue(format("<%s>", arg.location.str()), promote_pcjump_loc==Promote); + syscall(arg); +} + +void PrintPass::pcrelloc(ConcreteInstructionArg const &arg) +{ + if(!RelConstDomain().is_constant(arg.location)) return; + if(promote_pcrel_loc == Never) return; + + queue(format("<%s>", arg.location.str()), promote_pcrel_loc==Promote); + pcrelval(arg); +} + +void PrintPass::pcrelval(ConcreteInstructionArg const &arg) +{ + if(!arg.value || arg.value.type->kind() != DataType::Integer) return; + if(promote_pcrel_value == Never) return; + + queue(arg.value.str(), promote_pcrel_value==Promote); + syscall(arg); +} + +void PrintPass::syscall(ConcreteInstructionArg const &arg) +{ + if(!arg.value || arg.value.type->kind() != DataType::Integer) return; + + /* If this is not a syscall, try to display as a symbol instead */ + if(promote_syscall == Never || arg.syscall_id < 0) + { + symbol(arg); + return; + } + + queue(format("%%%03x", arg.syscall_id), promote_syscall==Promote); + syscallname(arg); +} + +void PrintPass::syscallname(ConcreteInstructionArg const &arg) +{ + if(arg.syscall_id < 0) return; + + auto maybe_name = symquery(Symbol::Syscall, arg.syscall_id); + if(!maybe_name) return; + + queue(*maybe_name, promote_syscallname==Promote); +} + +void PrintPass::symbol(ConcreteInstructionArg const &arg) +{ + if(!arg.value || arg.value.type->kind() != DataType::Integer) return; + + auto maybe_name = symquery(Symbol::Address, arg.value.uinteger()); + if(!maybe_name) return; + + queue(*maybe_name, promote_symbol==Promote); +} } /* namespace FxOS */ diff --git a/lib/passes/syscall.cpp b/lib/passes/syscall.cpp new file mode 100644 index 0000000..7d9da5c --- /dev/null +++ b/lib/passes/syscall.cpp @@ -0,0 +1,52 @@ +//--- +// fxos.passes.syscall: Detection and substitution of syscall addresses +//--- + +#include + +namespace FxOS { + +SyscallPass::SyscallPass(Disassembly &disasm, OS *os): + DisassemblyPass(disasm), m_os(os) +{ +} + +void SyscallPass::analyze(uint32_t pc, ConcreteInstruction &ci) +{ + /* Nothing to do if no syscall table is provided! */ + if(!m_os) return; + + Instruction const &i = ci.inst; + + for(size_t n = 0; n < i.args.size(); n++) + { + Argument const &a = i.args[n]; + ConcreteInstructionArg &ca = ci.args[n]; + + bool eligible = false; + uint32_t address; + + if(a.kind == Argument::PcRel && ca.value + && ca.value.type == IntegerType::u32) + { + eligible = true; + address = ca.value.read(0, 4); + } + if(a.kind == Argument::PcJump && ca.location + && RelConstDomain().is_constant(ca.location)) + { + eligible = true; + address = RelConstDomain().constant_value(ca.location); + } + + if(eligible) + { + int sid = m_os->find_syscall(address); + if(sid >= 0) ca.syscall_id = sid; + } + } + + enqueue_unseen_successors(pc, ci); +} + +} /* namespace FxOS */ diff --git a/lib/semantics.cpp b/lib/semantics.cpp index 60c10c5..bb3328e 100644 --- a/lib/semantics.cpp +++ b/lib/semantics.cpp @@ -124,6 +124,14 @@ void DataValue::write(size_t offset, size_t size, uint32_t contents) } } +uint32_t DataValue::uinteger() const +{ + if(!type || type->kind() != DataType::Integer) + throw std::logic_error("uinteger() on non-int DataValue"); + + return read(0, type->size()); +} + std::string DataValue::str() const noexcept { std::string result; diff --git a/lib/symbols.cpp b/lib/symbols.cpp new file mode 100644 index 0000000..576b93c --- /dev/null +++ b/lib/symbols.cpp @@ -0,0 +1,21 @@ +#include + +namespace FxOS { + +void SymbolTable::add(Symbol s) +{ + symbols.push_back(s); +} + +std::optional SymbolTable::query(Symbol::Type type, + uint32_t value) const +{ + for(auto &sym: symbols) + { + if(sym.type == type && sym.value == value) return sym.name; + } + + return std::nullopt; +} + +} /* namespace FxOS */ diff --git a/lib/target.cpp b/lib/target.cpp index 2644462..3156578 100644 --- a/lib/target.cpp +++ b/lib/target.cpp @@ -9,6 +9,11 @@ namespace FxOS { // Simulated memory access primitives //--- +bool AbstractMemory::covers(MemoryRegion const ®ion) const noexcept +{ + return covers(region.start, region.size()); +} + Addressable AbstractMemory::read_i8(uint32_t addr) const { int8_t *i8 = (int8_t *)translate(addr, 1); @@ -74,6 +79,11 @@ bool Binding::covers(uint32_t addr, int size) const noexcept return addr >= region.start && addr + size <= region.end; } +bool Binding::covers(MemoryRegion const ®ion) const noexcept +{ + return covers(region.start, region.size()); +} + char const *Binding::translate(uint32_t addr, int size) const { if(!covers(addr, size)) @@ -140,6 +150,11 @@ bool Target::covers(uint32_t addr, int size) const noexcept return false; } +bool Target::covers(MemoryRegion const ®ion) const noexcept +{ + return covers(region.start, region.size()); +} + char const *Target::translate(uint32_t addr, int size) const { for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++)