diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index fdba634..83f5d7f 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,9 @@ void register_instruction(AsmInstruction const &inst); /* Lex and register an assembly instruction table. */ int load_instructions(Buffer const &file); +//--- +// Dynamic information on instructions +//--- /* An argument for a disassembled instruction. */ struct Argument @@ -98,6 +102,18 @@ struct Instruction uint32_t jmptarget; }; +//--- +// Dynamic information on functions +//--- + +struct Function +{ + uint32_t address; + + /* List of subfunctions called. TODO: Not yet populated by anyone */ + std::vector callTargets; +}; + /* Disassembly interface that automatically loads code from a target */ struct Disassembly { @@ -113,8 +129,14 @@ struct Disassembly loaded and initialized if it had not been read before. */ Instruction &readins(uint32_t pc); + /* Find a function by address; returns nullptr if not yet defined */ + Function *getFunctionAt(uint32_t pc); + /* For other access patterns */ std::map instructions; + /* List of functions being documented */ + std::map functions; + /* Underlying target */ VirtualSpace &space; }; @@ -126,49 +148,61 @@ struct Disassembly class DisassemblyPass { public: - DisassemblyPass(Disassembly &disasm, std::string name=""); - - /* Analyze a single instruction, probably updating the annotations and - the state of the pass itself. */ - virtual bool analyze(uint32_t pc, Instruction &inst) = 0; - - /* Run the pass from the given entry point */ - virtual bool run(uint32_t entry_pc); - -protected: - /* Add an instruction to the queue to analyze next */ - void enqueue(uint32_t pc); - /* Add the next loaded instruction in address space */ - void enqueue_next(uint32_t pc); - /* Enqueue the unseen successors of this instruction */ - void enqueue_unseen_successors(uint32_t pc, Instruction &inst); - /* Enqueue all the success of this instruction */ - void enqueue_all_successors(uint32_t pc, Instruction &inst); + DisassemblyPass(Disassembly &disasm); /* Underlying disassembly */ Disassembly &m_disasm; +}; + +/* A disassembly pass that observes each function independently */ +class FunctionPass: public DisassemblyPass +{ +public: + FunctionPass(Disassembly &disasm); + + /* Analyze the whole disassembly */ + bool analyzeAllFunctions(); + + /* Analyze a function */ + virtual bool analyzeFunction(Function &func) = 0; + bool analyzeFunction(uint32_t pc); + + /* Analyze a function and its subfunctions, recursively */ + bool analyzeFunctionRecursively(Function &func); + bool analyzeFunctionRecursively(uint32_t pc); + + /* Enqueue subfunctions of function (this is done automatically by + analyzeFunctionRecursively()) */ + void enqueueSubfunctions(Function &func); + /* Same, even for functions that were already seen */ + void updateSubfunctions(Function &func); private: - /* Blocks to visit next, ordered for uniqueness */ - std::set m_next; - std::priority_queue m_queue; - - /* Visited blocks */ - std::set m_seen; - - /* Name of pass */ - std::string m_name; + Queue m_queue; }; /* A disassembly pass that observes each instruction independently */ -class InstructionDisassemblyPass: public DisassemblyPass +class InstructionPass: public DisassemblyPass { public: - InstructionDisassemblyPass(Disassembly &disasm, std::string name=""); + InstructionPass(Disassembly &disasm); - /* Runs the pass from the first instruction currently loaded, all the - way down to the bottom, as if always using enqueue_next(). */ - virtual bool run(); + /* Analyze the whole disassembly in increasing address order */ + bool analyzeAllInstructions(); + + /* Analyze a single instruction */ + virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0; + + /* Analyze a function by following its CFG */ + bool analyzeFunction(uint32_t pc); + + /* Enqueue successors (analyzeFunction() does this automatically) */ + void enqueueSuccessors(uint32_t pc, Instruction &ins); + /* Same, even for instructions that were already seen */ + void updateSuccessors(uint32_t pc, Instruction &ins); + +private: + Queue m_queue; }; } /* namespace FxOS */ diff --git a/include/fxos/util/Queue.h b/include/fxos/util/Queue.h new file mode 100644 index 0000000..bf0088f --- /dev/null +++ b/include/fxos/util/Queue.h @@ -0,0 +1,53 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/util/Queue: Simple queue that handles recursivity +//--- + +#ifndef FXOS_UTIL_QUEUE_H +#define FXOS_UTIL_QUEUE_H + +#include +#include + +template +struct Queue +{ + Queue(); + + bool empty() const { + return pending.empty(); + } + T &pop() { + T &object = pending.front(); + pending.pop(); + return object; + } + void clear() { + pending.clear(); + seen.clear(); + } + + /* Enqueue an object to visit later (if not already visited) */ + void enqueue(T object) { + if(!seen.count(object)) { + pending.push(object); + seen.insert(object); + } + } + /* Enqueue an object even for later update, regardless of loops */ + void update(T object) { + pending.push(object); + seen.insert(object); + } + + /* Queue of objects to visit next */ + std::queue pending; + /* Objects already visited or currently pending */ + std::set seen; +}; + +#endif /* FXOS_UTIL_QUEUE_H */ diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index dcd8945..a807894 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -57,7 +57,7 @@ Instruction::Instruction(uint16_t opcode): //--- Disassembly::Disassembly(VirtualSpace &_space): - instructions {}, space {_space} + instructions {}, functions {}, space {_space} { } @@ -102,86 +102,139 @@ Instruction &Disassembly::readins(uint32_t pc) } } +Function *Disassembly::getFunctionAt(uint32_t pc) +{ + auto it = this->functions.find(pc); + + if(it == this->functions.end()) + return nullptr; + else + return &it->second; +} + //--- -// Base pass +// DisassemblyPass //--- -DisassemblyPass::DisassemblyPass(Disassembly &disasm, std::string name): - m_disasm {disasm}, m_name {name} +DisassemblyPass::DisassemblyPass(Disassembly &disasm): + m_disasm {disasm} { } -void DisassemblyPass::enqueue(uint32_t pc) -{ - if(m_next.count(pc)) - return; +//--- +// FunctionPass +//--- - m_next.insert(pc); - m_queue.push(pc); +FunctionPass::FunctionPass(Disassembly &disasm): + DisassemblyPass(disasm) +{ } -void DisassemblyPass::enqueue_next(uint32_t pc) +bool FunctionPass::analyzeAllFunctions() { - /* TODO: DisassemblyPass::enqueue_next is inefficient */ - do pc += 2; - while(!m_disasm.hasins(pc)); + bool ok = true; - enqueue(pc); + for(auto &pair: m_disasm.functions) + ok &= this->analyzeFunction(pair.second); + + return ok; } -void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, Instruction &i) +bool FunctionPass::analyzeFunction(uint32_t pc) { - if(!i.terminal && !i.jump) { - if(!m_seen.count(pc + 2)) enqueue(pc + 2); - } - if(i.jump || i.condjump) { - if(!m_seen.count(i.jmptarget)) enqueue(i.jmptarget); + Function *func = m_disasm.getFunctionAt(pc); + if(!func) { + FxOS_log(ERR, "no function at %08x", pc); + return false; } + return this->analyzeFunction(*func); } -void DisassemblyPass::enqueue_all_successors(uint32_t pc, Instruction &i) +bool FunctionPass::analyzeFunctionRecursively(Function &func) +{ + return this->analyzeFunctionRecursively(func.address); +} + +bool FunctionPass::analyzeFunctionRecursively(uint32_t pc) +{ + bool ok = true; + m_queue.enqueue(pc); + + while(!m_queue.empty()) { + uint32_t pc = m_queue.pop(); + Function *next = m_disasm.getFunctionAt(pc); + ok &= this->analyzeFunction(*next); + this->enqueueSubfunctions(*next); + } + + return ok; +} + +void FunctionPass::enqueueSubfunctions(Function &func) +{ + for(uint32_t pc: func.callTargets) + m_queue.enqueue(pc); +} + +void FunctionPass::updateSubfunctions(Function &func) +{ + for(uint32_t pc: func.callTargets) + m_queue.update(pc); +} + +//--- +// InstructionPass +//--- + +InstructionPass::InstructionPass(Disassembly &disasm): + DisassemblyPass(disasm) +{ +} + +bool InstructionPass::analyzeAllInstructions() +{ + bool ok = true; + + for(auto &pair: m_disasm.instructions) + ok &= this->analyzeInstruction(pair.first, pair.second); + + return ok; +} + +bool InstructionPass::analyzeFunction(uint32_t pc) +{ + bool ok = true; + m_queue.enqueue(pc); + + while(!m_queue.empty()) { + uint32_t pc = m_queue.pop(); + if(!m_disasm.instructions.count(pc)) { + FxOS_log(ERR, "no instruction at %08x", pc); + continue; + } + + Instruction &i = m_disasm.instructions.at(pc); + ok &= this->analyzeInstruction(pc, i); + this->enqueueSuccessors(pc, i); + } + + return ok; +} + +void InstructionPass::enqueueSuccessors(uint32_t pc, Instruction &i) { if(!i.terminal && !i.jump) - enqueue(pc + 2); + m_queue.enqueue(pc + 2); if(i.jump || i.condjump) - enqueue(i.jmptarget); + m_queue.enqueue(i.jmptarget); } -bool DisassemblyPass::run(uint32_t entry_pc) +void InstructionPass::updateSuccessors(uint32_t pc, Instruction &i) { - enqueue(entry_pc); - - while(m_queue.size()) { - uint32_t pc = m_queue.top(); - - m_queue.pop(); - m_next.erase(m_next.find(pc)); - - Instruction &ci = m_disasm.readins(pc); - if(!analyze(pc, ci)) - return false; - - m_seen.insert(pc); - } - return true; -} - -//--- -// Base instruction-level pass -//--- - -InstructionDisassemblyPass::InstructionDisassemblyPass(Disassembly &disasm, - std::string name): DisassemblyPass(disasm, name) -{ -} - -bool InstructionDisassemblyPass::run() -{ - for(auto &pair: m_disasm.instructions) { - if(!analyze(pair.first, pair.second)) - return false; - } - return true; + if(!i.terminal && !i.jump) + m_queue.update(pc + 2); + if(i.jump || i.condjump) + m_queue.update(i.jmptarget); } } /* namespace FxOS */ diff --git a/shell/a.cpp b/shell/a.cpp index 0ba334a..87448a1 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -237,14 +237,20 @@ static void ad_disassemble_all(VirtualSpace &space, if(pass == "cfg") { CfgPass p(space.disasm); - for(uint32_t addr: addresses) { - printr("[cfg] Disassembling %08x...", addr); - if(!p.run(addr)) { - FxOS_log(ERR, "while processing %08x", addr); + + /* We collect subfunction addresses while running the pass */ + for(int i = 0; i < (int)addresses.size(); i++) { + uint32_t entry = addresses[i]; + printr("[cfg %d/%zu] Disassembling %08x...", + i+1, addresses.size(), entry); + if(!p.run(entry)) { + FxOS_log(ERR, "while processing %08x", entry); errors++; if(!force) break; } else successes++; + + /* TODO: Get subfunction addresses here */ } } else if(pass == "pcrel") {