diff --git a/CMakeLists.txt b/CMakeLists.txt index 092e48f..6f1ab39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,11 +42,11 @@ set(fxos_core_SOURCES lib/os.cpp lib/passes/cfg.cpp lib/passes/pcrel.cpp - lib/passes/print.cpp lib/passes/syscall.cpp lib/project.cpp lib/semantics.cpp lib/symbols.cpp + lib/view/assembly.cpp lib/vspace.cpp lib/ai/RelConst.cpp diff --git a/include/fxos/binary.h b/include/fxos/binary.h index 2e99216..b385156 100644 --- a/include/fxos/binary.h +++ b/include/fxos/binary.h @@ -64,8 +64,19 @@ struct Binary multiple objects with the same name, returns an arbitrary one. */ std::optional objectAddress(std::string const &name) const; + /* Return the address of an object defined at the specified address if + there is one, nullptr otherwise. If multiple objects are defined at the + specified address, an arbitrary one is returned. */ + BinaryObject *objectAt(u32 address); + BinaryObject const *objectAt(u32 address) const; + + /* Returns the list of all objects defined at the specified address. */ + std::vector objectsAt(u32 address); + std::vector objectsAt(u32 address) const; + /* Locate all objects that intersect an address. */ std::vector objectsCovering(u32 address); + std::vector objectsCovering(u32 address) const; private: VirtualSpace m_vspace; diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 51f1e29..692f6de 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -23,10 +23,12 @@ #include #include #include +#include namespace FxOS { class VirtualSpace; +class Binary; /* Register an instruction. This is called by loader functions from the asm table lexer. [inst] must have its opcode field set. */ @@ -35,6 +37,9 @@ void register_instruction(AsmInstruction const &inst); /* Lex and register an assembly instruction table. */ int load_instructions(Buffer const &file); +/* Map of all decodable instructions. */ +extern std::array, 65536> insmap; + //--- // Dynamic information on instructions //--- @@ -231,17 +236,17 @@ struct Disassembly class DisassemblyPass { public: - DisassemblyPass(Disassembly &disasm); + DisassemblyPass(Binary &binary); /* Underlying disassembly */ - Disassembly &m_disasm; + Binary &m_binary; }; /* A disassembly pass that observes each function independently */ class FunctionPass: public DisassemblyPass { public: - FunctionPass(Disassembly &disasm); + FunctionPass(Binary &binary); /* Analyze the whole disassembly */ bool analyzeAllFunctions(); @@ -267,7 +272,7 @@ private: class InstructionPass: public FunctionPass { public: - InstructionPass(Disassembly &disasm); + InstructionPass(Binary &binary); /* If set, this pass loads instructions from the disassembly automatically. This is useful for passes that explore new functions. By default, diff --git a/include/fxos/passes/cfg.h b/include/fxos/passes/cfg.h index f3a19c3..ba65fe4 100644 --- a/include/fxos/passes/cfg.h +++ b/include/fxos/passes/cfg.h @@ -53,7 +53,7 @@ namespace FxOS { class CfgPass: public InstructionPass { public: - CfgPass(Disassembly &disasm); + CfgPass(Binary &binary); bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override; /* Explore a new function at the specified address. This method creates the diff --git a/include/fxos/passes/pcrel.h b/include/fxos/passes/pcrel.h index 8ea0c88..1beb217 100644 --- a/include/fxos/passes/pcrel.h +++ b/include/fxos/passes/pcrel.h @@ -27,7 +27,7 @@ namespace FxOS { class PcrelPass: public InstructionPass { public: - PcrelPass(Disassembly &disasm); + PcrelPass(Binary &binary); bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override; }; diff --git a/include/fxos/passes/print.h b/include/fxos/passes/print.h deleted file mode 100644 index 33349fc..0000000 --- a/include/fxos/passes/print.h +++ /dev/null @@ -1,115 +0,0 @@ -//---------------------------------------------------------------------------// -// 1100101 |_ mov #0, r4 __ // -// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // -// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // -// |_ base# + offset |_| /_\_\___/__/ // -//---------------------------------------------------------------------------// -// fxos/passes/print: Disassembly printer -// -// This pass prints the program and adds annotations depending on a number of -// customizable boolean parameters. -// -// Data for an instruction, and arguments in particular, might have a large -// number of equivalent representations, depending on how much information was -// added during disassembly. -// -// The main mechanic of this pass is to define *promotions* which allow high- -// level information to be added or to replace low-level data. For instance, an -// @(disp,pc) argument could promote to a statically-computed address, which -// could promote to its known pointed value in the case of a read, which could -// itself promote to a symbol or syscall number. -// -// Each promotion opportunity has 3 possible settings: -// - Never: the higher-level information is not shown. -// - Append: the higher-level information is shown after the low-level one. -// - Promote: the higher-level information replaces the low-level one. -// -// For example, by default @(disp,pc) is set to Promote to statically-computed -// addresses, their values, and syscall numbers, but syscall names are only set -// to Append. Therefore, a mov.l @(disp,pc) which loads the address of syscall -// %ace on an fx-series model (which is memcp) might show as -// -// mov.l %ace memcmp, r3 -// -// where the first element has been promoted twice and the second appended. -//--- - -#ifndef FXOS_PASSES_PRINT_H -#define FXOS_PASSES_PRINT_H - -#include -#include - -namespace FxOS { - -class OS; - -class PrintPass: public InstructionPass -{ -public: - PrintPass(Disassembly &disasm); - bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override; - - //--- - // Print pass parameters - //--- - - /* Promotion parameters. Default is always to append. */ - enum Promotion { - /* Never promote */ - Never = 1, - /* Promote but keep the lower-level information */ - Append = 0, - /* Promote and hide the lower-level information */ - Promote = 2, - }; - - /** In the following, promote_x always means promote *to x* **/ - - /* In jumps, promote "pc+" to the target address */ - int promote_pcjump_loc; - /* In a PC-relative mov, promote "@(,pc)" to computed address */ - int promote_pcrel_loc; - /* In a PC-relative mov, promote address to pointed value */ - int promote_pcrel_value; - /* Promote an integer to a syscall number */ - int promote_syscall; - /* Promote a syscall number to a syscall name */ - int promote_syscallname; - /* Promote an integer to a symbol */ - int promote_symbol; - /* In a mova, promote "pc+" to the computed address */ - int promote_pcaddr_loc; - - /* TODO: More print pass parameters */ - -private: - /* Symbol tables to look up names */ - std::vector> m_symtables; - /* Query symbol tables, most recent first */ - std::optional symquery(Symbol::Type type, uint32_t value); - - /* OS for the target, to mark syscalls before instructions */ - OS *m_os; - - /* Last printed address (for ellipses) */ - uint32_t m_last_address; - - /** Internal promotion tree printers **/ - - void queue(std::string, bool = false); - void queue_flush(); - std::vector m_messages; - - void pcjumploc(Argument const &); - void pcrelloc(Argument const &); - void pcrelval(Argument const &); - void syscall(Argument const &); - void syscallname(Argument const &); - void symbol(Argument const &); - void pcaddrloc(Argument const &); -}; - -} /* namespace FxOS */ - -#endif /* FXOS_PASSES_PRINT_H */ diff --git a/include/fxos/passes/syscall.h b/include/fxos/passes/syscall.h index 881d969..97ebe49 100644 --- a/include/fxos/passes/syscall.h +++ b/include/fxos/passes/syscall.h @@ -22,7 +22,7 @@ namespace FxOS { class SyscallPass: public InstructionPass { public: - SyscallPass(Disassembly &disasm, OS *os); + SyscallPass(Binary &binary, OS *os); bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override; private: diff --git a/include/fxos/view/assembly.h b/include/fxos/view/assembly.h new file mode 100644 index 0000000..724001c --- /dev/null +++ b/include/fxos/view/assembly.h @@ -0,0 +1,105 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/view/assembly: Assembly code visualization +// +// This header provides a configurable assembly code pretty-printer. Options +// are provided to select between equivalent representations of arguments. +// +// Code is usually printed for a function. On-the-fly disassembly of non- +// functions is supported by either: +// - Building a function rooted at a specified address with a CFG analysis; +// - Building a function with a single block covering a specified interval. +// +// # Visualizing instructions +// +// The main mechanic is the _promotion_ of low-level information to a higher +// level. For instance, a @(disp,pc) argument can promote to its statically- +// determined taret, which can promote to its pointer value for a ROM read, +// which might itself be an object address or a syscall number. +// +// Each promotion thus has 3 settings: +// - Never: the higher-level information is not shown. +// - Append: the higher-level information is shown after the low-level one. +// - Promote: the higher-level information replaces the low-level one. +// +// For example, by default @(disp,pc) promotes to the target address, accessed +// value, and syscall number, but syscall names are set to Append. Hence, a +// mov.l @(disp,pc) loading the address of syscall %ace on FX (which is memcp) +// into r3 will show as "mov.l %ace memcmp, r3". +// +// # Visualizing blocks and functions +// TODO: CFG layout and rendering algorithms +// +// The rendering is split into three steps: +// 1. Generating text for each instruction or object +// 2. Grouping by basic block +// 3. Laying out basic blocks as a graph +//--- + +#ifndef FXOS_VIEW_ASSEMBLY_H +#define FXOS_VIEW_ASSEMBLY_H + +#include +#include + +namespace FxOS { + +class OS; +struct Binary; +struct Function; +struct BasicBlock; +struct Instruction; + +struct ViewAssemblyOptions +{ + enum Promotion : u8 { + Never, /* Never promote */ + Append, /* Promote but keep the lower-level information */ + Promote, /* Promote and hide the lower-level information */ + }; + + struct + { + /* In jumps, promote "pc+" to the target address */ + Promotion PCJump_to_Location = Promote; + /* In a PC-relative mov, promote "@(,pc)" to computed address */ + Promotion PCRelative_to_Location = Promote; + /* In a mova, promote "pc+" to the computed address */ + Promotion PCAddr_to_Location = Promote; + /* In a read with a know location, promote address to pointed value */ + Promotion ReadLocation_to_Constant = Promote; + /* Promote an integer to a binary object's name */ + Promotion Constant_to_ObjectName = Promote; + /* Promote an integer to a syscall number (if no name is available) */ + Promotion Constant_to_SyscallNumber = Promote; + } promotions; + + /* Binary to get symbols from */ + Binary *binary = nullptr; + + /* TODO: More view assembly options, including CFG layout */ + /* TODO: View assembly options: syntax highlighting */ +}; + +void viewAssemblyInstruction( + Instruction const &inst, ViewAssemblyOptions *opts = nullptr); + +void viewAssemblyBasicBlock( + BasicBlock const &bb, ViewAssemblyOptions *opts = nullptr); + +void viewAssemblyFunction( + Function const &fun, ViewAssemblyOptions *opts = nullptr); + +/* Legacy functions. Automatically set b as the binary in the options. */ +void viewAssemblyLegacyRegion( + Binary &binary, MemoryRegion r, ViewAssemblyOptions *opts = nullptr); +void viewAssemblyLegacyAddress( + Binary &binary, u32 pc, ViewAssemblyOptions *opts = nullptr); + +} /* namespace FxOS */ + +#endif /* FXOS_VIEW_ASSEMBLY_H */ diff --git a/include/fxos/vspace.h b/include/fxos/vspace.h index e0cbbf0..facd1a0 100644 --- a/include/fxos/vspace.h +++ b/include/fxos/vspace.h @@ -127,11 +127,8 @@ public: char const *translate_dynamic(uint32_t addr, int *size) override; // TODO: Remove these - std::string mpu; SymbolTable symbols; - uint32_t cursor; Disassembly disasm; - OS *os_analysis(bool force = false); private: std::unique_ptr m_os; }; diff --git a/lib/binary.cpp b/lib/binary.cpp index 71c14a0..7dc02f8 100644 --- a/lib/binary.cpp +++ b/lib/binary.cpp @@ -64,6 +64,34 @@ std::optional Binary::objectAddress(std::string const &name) const return {}; } +BinaryObject *Binary::objectAt(u32 address) +{ + auto it = m_objects.find(address); + return (it == m_objects.end()) ? nullptr : it->second.get(); +} + +BinaryObject const *Binary::objectAt(u32 address) const +{ + auto it = m_objects.find(address); + return (it == m_objects.end()) ? nullptr : it->second.get(); +} + +std::vector Binary::objectsAt(u32 address) +{ + std::vector objects; + for(auto [it, end] = m_objects.equal_range(address); it != end; ++it) + objects.push_back(it->second.get()); + return objects; +} + +std::vector Binary::objectsAt(u32 address) const +{ + std::vector objects; + for(auto [it, end] = m_objects.equal_range(address); it != end; ++it) + objects.push_back(it->second.get()); + return objects; +} + std::vector Binary::objectsCovering(u32 address) { std::vector objects; @@ -76,6 +104,18 @@ std::vector Binary::objectsCovering(u32 address) return objects; } +std::vector Binary::objectsCovering(u32 address) const +{ + std::vector objects; + + for(auto const &[obj_address, obj]: m_objects) { + if(obj_address <= address && obj_address + obj->size() < address) + objects.push_back(obj.get()); + } + + return objects; +} + //=== BinaryObject ===// bool BinaryObject::intersects(BinaryObject const &other) const diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 406ba82..72a85b0 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -14,7 +15,7 @@ namespace FxOS { /* Instruction map */ -static std::array, 65536> insmap; +std::array, 65536> insmap; void register_instruction(AsmInstruction const &ins) { @@ -245,7 +246,7 @@ std::vector Disassembly::findClaimsOwnedBy(uint32_t address) // DisassemblyPass //--- -DisassemblyPass::DisassemblyPass(Disassembly &disasm): m_disasm {disasm} +DisassemblyPass::DisassemblyPass(Binary &binary): m_binary {binary} { } @@ -253,7 +254,7 @@ DisassemblyPass::DisassemblyPass(Disassembly &disasm): m_disasm {disasm} // FunctionPass //--- -FunctionPass::FunctionPass(Disassembly &disasm): DisassemblyPass(disasm) +FunctionPass::FunctionPass(Binary &binary): DisassemblyPass(binary) { } @@ -261,7 +262,8 @@ bool FunctionPass::analyzeAllFunctions() { bool ok = true; - for(auto &pair: m_disasm.functions) + // TODO: Use Binary's functions + for(auto &pair: m_binary.vspace().disasm.functions) ok &= this->analyzeFunction(pair.second); return ok; @@ -269,7 +271,8 @@ bool FunctionPass::analyzeAllFunctions() bool FunctionPass::analyzeFunction(uint32_t pc) { - OldFunction *func = m_disasm.getFunctionAt(pc); + // TODO: Use Binary's functions + OldFunction *func = m_binary.vspace().disasm.getFunctionAt(pc); if(!func) { FxOS_log(ERR, "no function at 0x%08x", pc); return false; @@ -289,7 +292,8 @@ bool FunctionPass::analyzeFunctionRecursively(uint32_t pc) while(!m_queue.empty()) { uint32_t pc = m_queue.pop(); - OldFunction *next = m_disasm.getFunctionAt(pc); + // TODO: Use Binary's functions + OldFunction *next = m_binary.vspace().disasm.getFunctionAt(pc); if(this->analyzeFunction(*next)) this->enqueueSubfunctions(*next); else @@ -315,8 +319,8 @@ void FunctionPass::updateSubfunctions(OldFunction &func) // InstructionPass //--- -InstructionPass::InstructionPass(Disassembly &disasm): - FunctionPass(disasm), m_allowDiscovery {false} +InstructionPass::InstructionPass(Binary &binary): + FunctionPass(binary), m_allowDiscovery {false} { } @@ -329,7 +333,8 @@ bool InstructionPass::analyzeAllInstructions() { bool ok = true; - for(auto &pair: m_disasm.instructions) + // TODO: Use Binary's instructions + for(auto &pair: m_binary.vspace().disasm.instructions) ok &= this->analyzeInstruction(pair.first, pair.second); return ok; @@ -349,7 +354,9 @@ bool InstructionPass::analyzeAnonymousFunction(uint32_t pc) while(!m_queue.empty()) { uint32_t pc = m_queue.pop(); - OldInstruction *i = m_disasm.getInstructionAt(pc, m_allowDiscovery); + // TODO: Use Binary's instructions + OldInstruction *i + = m_binary.vspace().disasm.getInstructionAt(pc, m_allowDiscovery); if(i != nullptr && this->analyzeInstruction(pc, *i)) this->enqueueSuccessors(pc, *i); diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index 9b8d144..b590d28 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -8,12 +8,13 @@ #include #include #include +#include #include namespace FxOS { -CfgPass::CfgPass(Disassembly &disasm): - InstructionPass(disasm), m_claimedInstructions {}, m_pcrel {disasm} +CfgPass::CfgPass(Binary &binary): + InstructionPass(binary), m_claimedInstructions {}, m_pcrel {binary} { this->setAllowDiscovery(true); } @@ -45,7 +46,9 @@ bool CfgPass::analyzeInstruction(uint32_t pc, OldInstruction &i) jmptarget = (pc + 4) + args[0].disp; /* Make the target of the jump a leader */ - OldInstruction &target = *m_disasm.getInstructionAt(jmptarget, true); + // TODO: Use Binary instructions + OldInstruction &target + = *m_binary.vspace().disasm.getInstructionAt(jmptarget, true); target.leader = true; /* Check that it's not in a delay slot */ @@ -68,7 +71,9 @@ bool CfgPass::analyzeInstruction(uint32_t pc, OldInstruction &i) } /* If it has a delay slot, create it at the next instruction */ else if(i.inst->hasDelaySlot()) { - OldInstruction &slot = *m_disasm.getInstructionAt(pc + 2, true); + // TODO: Use Binary instructions + OldInstruction &slot + = *m_binary.vspace().disasm.getInstructionAt(pc + 2, true); if(slot.leader) throw std::logic_error(format( "0x%08x is a leader and also a delay" @@ -103,7 +108,8 @@ bool CfgPass::exploreFunction(uint32_t pc) m_lastFunction = pc; m_claimedInstructions.clear(); - OldFunction *func = m_disasm.getOrCreateFunctionAt(pc); + // TODO: Use Binary functions + OldFunction *func = m_binary.vspace().disasm.getOrCreateFunctionAt(pc); if(!this->analyzeFunction(pc)) return false; @@ -111,7 +117,9 @@ bool CfgPass::exploreFunction(uint32_t pc) /* Look for call targets */ for(uint32_t pc: m_claimedInstructions) { - OldInstruction const *ci = m_disasm.getInstructionAt(pc); + // TODO: Use Binary instructions + OldInstruction const *ci + = m_binary.vspace().disasm.getInstructionAt(pc); if(!ci) continue; AsmInstruction const &i = *ci->inst; diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index 5a3f77c..5eef900 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -6,11 +6,11 @@ //---------------------------------------------------------------------------// #include -#include +#include namespace FxOS { -PcrelPass::PcrelPass(Disassembly &disasm): InstructionPass(disasm) +PcrelPass::PcrelPass(Binary &binary): InstructionPass(binary) { } @@ -31,7 +31,7 @@ bool PcrelPass::analyzeInstruction(uint32_t pc, OldInstruction &ci) /* Also compute the value. This is sign-extended from 16-bit with mov.w. There is no mov.b for this instruction. */ - VirtualSpace &space = m_disasm.vspace; + VirtualSpace &space = m_binary.vspace(); uint32_t v = -1; if(i->opsize == 2 && space.covers(addr, 2)) { diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp deleted file mode 100644 index 1d58221..0000000 --- a/lib/passes/print.cpp +++ /dev/null @@ -1,205 +0,0 @@ -//---------------------------------------------------------------------------// -// 1100101 |_ mov #0, r4 __ // -// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // -// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // -// |_ base# + offset |_| /_\_\___/__/ // -//---------------------------------------------------------------------------// - -#include -#include -#include - -#include -#include - -namespace FxOS { - -PrintPass::PrintPass(Disassembly &disasm): - InstructionPass(disasm), m_symtables {}, m_last_address {0xffffffff} -{ - /* Default parameters: all 0 */ - - /* Use an OS observer to describe syscalls in header lines */ - m_os = disasm.vspace.os_analysis(); - - /* Use the symbol tables from the virtual space */ - m_symtables.push_back(disasm.vspace.symbols); -} - -bool PrintPass::analyzeInstruction(uint32_t pc, OldInstruction &i) -{ - /* Ellipsis if there is a gap since last instruction */ - - if(m_last_address + 1 != 0 && pc != m_last_address + 2) - printf(" ...\n"); - - /* Preliminary syscall number */ - - int syscall_id; - if(m_os && (syscall_id = m_os->find_syscall(pc)) >= 0) { - printf("\n<%%%04x", syscall_id); - auto maybe_str = symquery(Symbol::Syscall, syscall_id); - if(maybe_str) - printf(" %s", (*maybe_str).c_str()); - printf(">\n"); - } - - /* Raw data if instruction cannot be decoded */ - - printf(" %08x: %04x", pc, (i.inst ? i.inst->opcode : i.opcode)); - if(!i.inst) { - printf("\n"); - m_last_address = pc; - return true; - } - - /* Mnemonic */ - - static char const *suffixes[5] = {"", ".b", ".w", "", ".l"}; - char const *suffix = suffixes[(i.inst->opsize <= 4) ? i.inst->opsize : 0]; - - int spacing - = i.inst->arg_count ? 8 - strlen(i.inst->mnemonic) - strlen(suffix) : 0; - printf(" %s%s%*s", i.inst->mnemonic, suffix, spacing, ""); - - /* Arguments */ - - for(size_t n = 0; n < i.inst->arg_count; n++) { - AsmArgument const &arg = i.inst->args[n]; - Argument const &a = i.args[n]; - - if(n) - printf(", "); - - queue(arg.str()); - if(arg.kind == AsmArgument::PcJump) - pcjumploc(a); - else if(arg.kind == AsmArgument::PcRel) - pcrelloc(a); - else if(arg.kind == AsmArgument::PcAddr) - pcaddrloc(a); - queue_flush(); - } - - printf("\n"); - m_last_address = pc; - return true; -} - -std::optional PrintPass::symquery( - Symbol::Type type, uint32_t value) -{ - for(int i = m_symtables.size() - 1; i >= 0; i--) { - SymbolTable const &st = m_symtables[i]; - - auto maybe_str = st.query(type, value); - if(maybe_str) - return maybe_str; - } - - return std::nullopt; -} - -void PrintPass::queue(std::string str, bool override) -{ - if(override && m_messages.size()) - m_messages.pop_back(); - - m_messages.push_back(str); -} - -void PrintPass::queue_flush() -{ - for(size_t i = 0; i < m_messages.size(); i++) { - if(i != 0) - printf(" "); - printf("%s", m_messages[i].c_str()); - } - - m_messages.clear(); -} - -void PrintPass::pcjumploc(Argument const &a) -{ - if(!RelConstDomain().is_constant(a.location)) - return; - if(promote_pcjump_loc == Never) - return; - - queue(format("<%s>", a.location.str()), promote_pcjump_loc == Promote); - syscall(a); -} - -void PrintPass::pcrelloc(Argument const &a) -{ - if(!RelConstDomain().is_constant(a.location)) - return; - if(promote_pcrel_loc == Never) - return; - - queue(format("<%s>", a.location.str()), promote_pcrel_loc == Promote); - pcrelval(a); -} - -void PrintPass::pcrelval(Argument const &a) -{ - if(!a.value) - return; - if(promote_pcrel_value == Never) - return; - - queue(a.value.str(), promote_pcrel_value == Promote); - syscall(a); -} - -void PrintPass::syscall(Argument const &a) -{ - if(!a.value) - return; - - /* If this is not a syscall, try to display as a symbol instead */ - if(promote_syscall == Never || a.syscall_id < 0) { - symbol(a); - return; - } - - queue(format("%%%04x", a.syscall_id), promote_syscall == Promote); - syscallname(a); -} - -void PrintPass::syscallname(Argument const &a) -{ - if(a.syscall_id < 0) - return; - - auto maybe_name = symquery(Symbol::Syscall, a.syscall_id); - if(!maybe_name) - return; - - queue(*maybe_name, promote_syscallname == Promote); -} - -void PrintPass::symbol(Argument const &a) -{ - if(!a.value) - return; - - auto maybe_name - = symquery(Symbol::Address, RelConstDomain().constant_value(a.value)); - if(!maybe_name) - return; - - queue(*maybe_name, promote_symbol == Promote); -} - -void PrintPass::pcaddrloc(Argument const &a) -{ - if(!RelConstDomain().is_constant(a.location)) - return; - if(promote_pcaddr_loc == Never) - return; - - queue(format("<%s>", a.location.str()), promote_pcaddr_loc == Promote); -} - -} /* namespace FxOS */ diff --git a/lib/passes/syscall.cpp b/lib/passes/syscall.cpp index 6e266e6..7d50642 100644 --- a/lib/passes/syscall.cpp +++ b/lib/passes/syscall.cpp @@ -9,8 +9,8 @@ namespace FxOS { -SyscallPass::SyscallPass(Disassembly &disasm, OS *os): - InstructionPass(disasm), m_os {os} +SyscallPass::SyscallPass(Binary &binary, OS *os): + InstructionPass(binary), m_os {os} { } diff --git a/lib/view/assembly.cpp b/lib/view/assembly.cpp new file mode 100644 index 0000000..a8f4e08 --- /dev/null +++ b/lib/view/assembly.cpp @@ -0,0 +1,211 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace FxOS { + +/* Output for a single argument, which consists of one or more text segments + each with their own text style. */ +using ArgumentOutput = std::vector>; + +static inline bool output(ArgumentOutput &out, ViewAssemblyOptions::Promotion p, + fmt::text_style style, std::string str) +{ + if(p == ViewAssemblyOptions::Never) + return true; + if(p == ViewAssemblyOptions::Promote) + out.pop_back(); + + out.push_back({style, std::move(str)}); + return false; +} + +static void renderArgument(AsmArgument const &arg, Argument const &a, + ArgumentOutput &out, ViewAssemblyOptions const &opts) +{ + out.push_back({{}, arg.str()}); + + // clang-format off + enum { None, PCJump, PCRelative, PCAddr, Location, Constant, SyscallNumber, + ObjectName } + type = None; + // clang-format on + + if(arg.kind == AsmArgument::PcJump) + type = PCJump; + else if(arg.kind == AsmArgument::PcRel) + type = PCRelative; + else if(arg.kind == AsmArgument::PcAddr) + type = PCAddr; + + if(type == PCJump || type == PCRelative || type == PCAddr) { + auto p = (type == PCJump) ? opts.promotions.PCJump_to_Location + : (type == PCAddr) ? opts.promotions.PCAddr_to_Location + : opts.promotions.PCRelative_to_Location; + if(!RelConstDomain().is_constant(a.location)) + return; + if(output(out, p, {}, format("<%s>", a.location.str()))) + return; + type = (type == PCRelative) ? Location : Constant; + } + + if(type == Location) { + // TODO: Check that this is a read operation! + auto p = opts.promotions.ReadLocation_to_Constant; + if(!a.value || output(out, p, {}, a.value.str())) + return; + type = Constant; + } + + /* Promote to object name first if available... */ + if(type == Constant && a.value && opts.binary) { + auto p = opts.promotions.Constant_to_ObjectName; + u32 address = RelConstDomain().constant_value(a.value); + BinaryObject *obj = opts.binary->objectAt(address); + + if(obj) { + if(output(out, p, {}, obj->name())) + return; + type = ObjectName; + } + } + /* ... or, as a default, a syscall number */ + if(type == Constant && a.value && a.syscall_id >= 0) { + auto p = opts.promotions.Constant_to_SyscallNumber; + if(output(out, p, {}, format("%%%04x", a.syscall_id))) + return; + type = SyscallNumber; + } +} + +//=== Legacy-style instruction printer ===// + +static void doOldInst(u32 pc, OldInstruction &i, + ViewAssemblyOptions const &opts, u32 &m_lastAddress) +{ + OS *os = opts.binary ? opts.binary->OSAnalysis() : nullptr; + ArgumentOutput argout; + + /* Ellipsis if there is a gap since last instruction */ + if(m_lastAddress + 1 != 0 && pc != m_lastAddress + 2) + printf(" ...\n"); + + /* Preliminary syscall number */ + int syscall_id; + if(os && (syscall_id = os->find_syscall(pc)) >= 0) { + printf("\n<%%%04x", syscall_id); + BinaryObject *obj = opts.binary ? opts.binary->objectAt(pc) : nullptr; + if(obj) + printf(" %s", obj->name().c_str()); + printf(">\n"); + } + + /* Raw data if instruction cannot be decoded */ + printf(" %08x: %04x", pc, (i.inst ? i.inst->opcode : i.opcode)); + if(!i.inst) { + printf("\n"); + m_lastAddress = pc; + return; + } + + /* Mnemonic */ + static char const *suffixes[5] = {"", ".b", ".w", "", ".l"}; + char const *suffix = suffixes[(i.inst->opsize <= 4) ? i.inst->opsize : 0]; + + int spacing + = i.inst->arg_count ? 8 - strlen(i.inst->mnemonic) - strlen(suffix) : 0; + printf(" %s%s%*s", i.inst->mnemonic, suffix, spacing, ""); + + /* Arguments */ + for(size_t n = 0; n < i.inst->arg_count; n++) { + if(n) + printf(", "); + + renderArgument(i.inst->args[n], i.args[n], argout, opts); + + for(size_t i = 0; i < argout.size(); i++) { + if(i != 0) + printf(" "); + printf("%s", argout[i].second.c_str()); + } + + argout.clear(); + } + + printf("\n"); + m_lastAddress = pc; +} + +void viewAssemblyLegacyRegion( + Binary &binary, MemoryRegion r, ViewAssemblyOptions *opts_ptr) +{ + ViewAssemblyOptions opts; + if(opts_ptr) + opts = *opts_ptr; + opts.binary = &binary; + + u32 lastAddress = 0xffffffff; + + for(u32 pc = r.start & -2; pc <= r.end; pc += 2) { + OldInstruction *i = binary.vspace().disasm.getInstructionAt(pc, true); + if(i != nullptr) + doOldInst(pc, *i, opts, lastAddress); + } +} + +void viewAssemblyLegacyAddress( + Binary &binary, u32 pc, ViewAssemblyOptions *opts_ptr) +{ + ViewAssemblyOptions opts; + if(opts_ptr) + opts = *opts_ptr; + opts.binary = &binary; + + u32 lastAddress = 0xffffffff; + + Queue queue; + queue.enqueue(pc); + + while(!queue.empty()) { + u32 pc = queue.pop(); + OldInstruction *i = binary.vspace().disasm.getInstructionAt(pc, true); + if(i == nullptr) + continue; + + /* Enqueue successors */ + if(!i->terminal && !i->jump) + queue.enqueue(pc + 2); + if(i->jump || i->condjump) + queue.enqueue(i->jmptarget); + } + + /* Print explored instructions in increasing order of addresses */ + for(u32 pc: queue.seen) { + OldInstruction *i = binary.vspace().disasm.getInstructionAt(pc, false); + if(i) + doOldInst(pc, *i, opts, lastAddress); + } +} + +//=== Binary-API assembly printer ===// + +void viewAssemblyInstruction( + Instruction const &inst, ViewAssemblyOptions *opts); + +void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts); + +void viewAssemblyFunction(Function const &fun, ViewAssemblyOptions *opts); + +} /* namespace FxOS */ diff --git a/lib/vspace.cpp b/lib/vspace.cpp index 3b179b3..0eaabc1 100644 --- a/lib/vspace.cpp +++ b/lib/vspace.cpp @@ -143,8 +143,7 @@ char const *Binding::translate_dynamic(uint32_t addr, int *size) //=== VirtualSpace ===// -VirtualSpace::VirtualSpace(): - bindings {}, mpu {}, cursor {0}, disasm {*this}, m_os {nullptr} +VirtualSpace::VirtualSpace(): bindings {}, disasm {*this} { } @@ -163,17 +162,6 @@ void VirtualSpace::deserialize(BSON const &b) this->bindings.push_back(Binding(b[i])); } -OS *VirtualSpace::os_analysis(bool force) -{ - if(!m_os || force) { - m_os = std::make_unique(*this); - /* We don't keep an OS analysis result that failed */ - if(m_os->type == OS::UNKNOWN) - m_os = nullptr; - } - return m_os.get(); -} - void VirtualSpace::bind_region(MemoryRegion const ®ion, Buffer const &buf) { this->bindings.emplace(this->bindings.begin(), region, buf); diff --git a/shell/a.cpp b/shell/a.cpp index 261fd31..516b610 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -19,7 +19,7 @@ //--- static void ad_disassemble_all( - VirtualSpace &space, std::vector const &addresses, bool force) + Binary &binary, std::vector const &addresses, bool force) { int successes = 0, errors = 0; Timer timer; @@ -27,7 +27,7 @@ static void ad_disassemble_all( /* Analyze the CFGs of all functions */ timer.start(); - CfgPass cfg_pass(space.disasm); + CfgPass cfg_pass(binary); /* We collect subfunction addresses while running the pass */ for(int i = 0; i < (int)addresses.size(); i++) { @@ -40,11 +40,8 @@ static void ad_disassemble_all( if(!force) return; } - else { - for(Claim const &c: cfg_pass.resultClaims()) - space.disasm.addExclusiveClaim(c); + else successes++; - } } timer.stop(); printf("\n"); @@ -52,9 +49,9 @@ static void ad_disassemble_all( printr("[syscall] Finding syscall references..."); timer.restart(); - OS *os = space.os_analysis(); + OS *os = binary.OSAnalysis(); if(os) { - SyscallPass syscall_pass(space.disasm, os); + SyscallPass syscall_pass(binary, os); if(!syscall_pass.analyzeAllInstructions()) { errors++; if(!force) @@ -90,7 +87,10 @@ static std::vector parse_ad(Session &session, Parser &parser) void _ad(Session &session, std::vector const &addresses) { - ad_disassemble_all(session.currentBinary()->vspace(), addresses, false); + Binary *b = session.currentBinary(); + if(!b) + return FxOS_log(ERR, "No current binary!\n"); + ad_disassemble_all(*b, addresses, false); } //-- @@ -106,11 +106,9 @@ void _ads(Session &session) { Binary *b = session.currentBinary(); if(!b) - return; + return FxOS_log(ERR, "No current binary!\n"); - VirtualSpace &space = b->vspace(); OS *os = b->OSAnalysis(); - if(!os) { printf("ads: OS analysis failed, cannot enumerate syscalls"); return; @@ -120,7 +118,7 @@ void _ads(Session &session) for(int i = 0; i < os->syscall_count(); i++) addresses.push_back(os->syscall(i)); - ad_disassemble_all(space, addresses, true); + ad_disassemble_all(*b, addresses, true); } //--- diff --git a/shell/d.cpp b/shell/d.cpp index f6bb674..b1a6406 100644 --- a/shell/d.cpp +++ b/shell/d.cpp @@ -8,13 +8,13 @@ #include #include #include -#include #include +#include #include #include -static void disassemble(Session &session, Disassembly &disasm, - std::vector const &passes, uint32_t address) +static void disassemble( + Binary &binary, std::vector const &passes, u32 address) { for(auto pass: passes) { Timer timer; @@ -23,32 +23,22 @@ static void disassemble(Session &session, Disassembly &disasm, bool ok; if(pass == "cfg") { - CfgPass p(disasm); + CfgPass p(binary); ok = p.analyzeAnonymousFunction(address); } else if(pass == "pcrel") { - PcrelPass p(disasm); + PcrelPass p(binary); ok = p.analyzeAllInstructions(); } else if(pass == "syscall") { - OS *os = session.currentBinary()->OSAnalysis(); + OS *os = binary.OSAnalysis(); if(os) { - SyscallPass p(disasm, os); + SyscallPass p(binary, os); ok = p.analyzeAllInstructions(); } } else if(pass == "print") { - PrintPass p(disasm); - - p.promote_pcjump_loc = PrintPass::Promote; - p.promote_pcrel_loc = PrintPass::Promote; - p.promote_pcrel_value = PrintPass::Promote; - p.promote_syscall = PrintPass::Promote; - p.promote_syscallname = PrintPass::Append; - p.promote_symbol = PrintPass::Append; - p.promote_pcaddr_loc = PrintPass::Promote; - - ok = p.analyzeAllInstructions(); + viewAssemblyLegacyAddress(binary, address); } else { FxOS_log(ERR, "unknown pass <%s>", pass); @@ -88,10 +78,9 @@ static _d_args parse_d(Session &session, Parser &parser) void _d(Session &session, std::variant location) { - if(!session.currentBinary()) - return; - FxOS::Disassembly disasm(session.currentBinary()->vspace()); - + Binary *b = session.currentBinary(); + if(!b) + return FxOS_log(ERR, "No current binary!\n"); if(std::holds_alternative(location)) { Range range = std::get(location); @@ -111,10 +100,9 @@ void _d(Session &session, std::variant location) /* Load the block into memory */ for(uint32_t pc = range.start; pc < range.end; pc += 2) - disasm.getInstructionAt(pc, true); + b->vspace().disasm.getInstructionAt(pc, true); - disassemble(session, disasm, - {"pcrel", /*"constprop",*/ "syscall", "print"}, -1); + disassemble(*b, {"pcrel", /*"constprop",*/ "syscall", "print"}, -1); } else { uint32_t address = std::get(location); @@ -126,8 +114,7 @@ void _d(Session &session, std::variant location) } /* cfg implicitly does pcrel */ - disassemble(session, disasm, - {"cfg", /*"constprop",*/ "syscall", "print"}, address); + disassemble(*b, {"cfg", /*"constprop",*/ "syscall", "print"}, address); } } @@ -148,10 +135,6 @@ Disassembles code starting at the specified address, exploring branches until function terminators, invalid instructions, or dynamically-computed jumps. The default address is $ (the cursor of the current virtual space). -This command does not extend the virtual space's main disassembly. It reads -analysis results from the virtual space, but doesn't add new information. Try -as? to disassemble in the space's main disassembly. - The following disassembler passes are run: cfg Explores the code reachable from the start address pcrel Computes PC-relative addresses (eg mov.l, mova, bf, bra...)