diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index c505dcd..8dd67e0 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -114,31 +114,42 @@ struct Function std::vector callTargets; }; +//--- +// Storage for disassembled data +//--- + /* Disassembly interface that automatically loads code from a target */ struct Disassembly { - Disassembly(VirtualSpace &space); + Disassembly(VirtualSpace &vspace); - /* Check whether an instruction has been visited so far */ - bool hasins(uint32_t pc); - /* Get the minimum and maximum loaded instruction addresses */ - uint32_t minpc(); - uint32_t maxpc(); + VirtualSpace &vspace; - /* Get the storage to any concrete instruction. The instruction will be - loaded and initialized if it had not been read before. */ - Instruction &readins(uint32_t pc); + // Instruction information + + std::map instructions; + + /* Check whether an instruction is loaded at PC */ + bool hasInstructionAt(uint32_t pc); + /* Find an instruction by address. If the instruction is not loaded, + returns nullptr, unless [allowDiscovery] is set, in which case it's + loaded normally. */ + Instruction *getInstructionAt(uint32_t pc, bool allowDiscovery=false); + + + // Function information + + std::map functions; + + /* Check whether a function is defined at PC */ + bool hasFunctionAt(uint32_t pc); /* Find a function by address; returns nullptr if not yet defined */ Function *getFunctionAt(uint32_t pc); - /* For other access patterns */ - std::map instructions; - /* List of functions being documented */ - std::map functions; - /* Underlying target */ - VirtualSpace &space; + // TODO: We don't want to deal with instructions loaded without a minimum + // amount of analysis; can we tie instruction loading to some analysis? }; //--- @@ -182,7 +193,7 @@ private: }; /* A disassembly pass that observes each instruction independently */ -class InstructionPass: public DisassemblyPass +class InstructionPass: public FunctionPass { public: InstructionPass(Disassembly &disasm); @@ -199,7 +210,11 @@ public: virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0; /* Analyze a function by following its CFG */ - bool analyzeFunction(uint32_t pc); + using FunctionPass::analyzeFunction; + bool analyzeFunction(Function &func) override; + + /* Analyze an anonymous function; just assume one starts at PC */ + bool analyzeAnonymousFunction(uint32_t pc); /* Enqueue successors (analyzeFunction() does this automatically) */ void enqueueSuccessors(uint32_t pc, Instruction &ins); diff --git a/include/fxos/passes/cfg.h b/include/fxos/passes/cfg.h index a77cf92..657778d 100644 --- a/include/fxos/passes/cfg.h +++ b/include/fxos/passes/cfg.h @@ -53,6 +53,10 @@ class CfgPass: public InstructionPass public: CfgPass(Disassembly &disasm); bool analyzeInstruction(uint32_t pc, Instruction &inst) override; + + /* Explore a new function at the specified address. This is a wrapper + around [analyzeFunction()] that creates the function. */ + bool exploreFunction(uint32_t pc); }; } /* namespace FxOS */ diff --git a/include/fxos/util/Timer.h b/include/fxos/util/Timer.h index ae3c278..faaa685 100644 --- a/include/fxos/util/Timer.h +++ b/include/fxos/util/Timer.h @@ -34,6 +34,10 @@ struct Timer void start(); void stop(); + /* Reset the total time; reset and start */ + void reset(); + void restart(); + /* String representation of time */ std::string format_time() const; diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 7e13555..aa49ad5 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -53,53 +53,48 @@ Instruction::Instruction(uint16_t opcode): } //--- -// Disassembler interface +// Storage for disassembled data //--- -Disassembly::Disassembly(VirtualSpace &_space): - instructions {}, functions {}, space {_space} +Disassembly::Disassembly(VirtualSpace &_vspace): + vspace {_vspace}, instructions {}, functions {} { } -bool Disassembly::hasins(uint32_t pc) +bool Disassembly::hasInstructionAt(uint32_t pc) { return this->instructions.count(pc) > 0; } -uint32_t Disassembly::minpc() -{ - if(this->instructions.empty()) - return 0xffffffff; - return this->instructions.cbegin()->first; -} - -uint32_t Disassembly::maxpc() -{ - if(this->instructions.empty()) - return 0xffffffff; - return this->instructions.crbegin()->first; -} - -Instruction &Disassembly::readins(uint32_t pc) +Instruction *Disassembly::getInstructionAt(uint32_t pc, bool allowDiscovery) { if(pc & 1) { FxOS_log(ERR, "reading instruction for disassembly at %08x", pc); pc &= -2; } - try { - return this->instructions.at(pc); + if(this->hasInstructionAt(pc)) { + return &this->instructions.at(pc); } - catch(std::out_of_range &e) { - uint16_t opcode = this->space.read_u16(pc); - Instruction ci(opcode); + else if(allowDiscovery) { + uint16_t opcode = this->vspace.read_u16(pc); + Instruction i(opcode); if(insmap[opcode]) - ci = Instruction(&*insmap[opcode]); + i = Instruction(&*insmap[opcode]); - this->instructions.emplace(pc, ci); - return this->instructions.at(pc); + this->instructions.emplace(pc, i); + return &this->instructions.at(pc); } + else { + FxOS_log(ERR, "reading non-existing instruction at %08x", pc); + return nullptr; + } +} + +bool Disassembly::hasFunctionAt(uint32_t pc) +{ + return this->functions.count(pc) > 0; } Function *Disassembly::getFunctionAt(uint32_t pc) @@ -188,7 +183,7 @@ void FunctionPass::updateSubfunctions(Function &func) //--- InstructionPass::InstructionPass(Disassembly &disasm): - DisassemblyPass(disasm), m_allowDiscovery {false} + FunctionPass(disasm), m_allowDiscovery {false} { } @@ -207,28 +202,23 @@ bool InstructionPass::analyzeAllInstructions() return ok; } -bool InstructionPass::analyzeFunction(uint32_t pc) +bool InstructionPass::analyzeFunction(Function &func) +{ + /* We don't have any function-specific information to pass yet, so we can + fall back to the anonymous version */ + return this->analyzeAnonymousFunction(func.address); +} + +bool InstructionPass::analyzeAnonymousFunction(uint32_t pc) { bool ok = true; m_queue.enqueue(pc); while(!m_queue.empty()) { uint32_t pc = m_queue.pop(); - Instruction *i = nullptr; + Instruction *i = m_disasm.getInstructionAt(pc, m_allowDiscovery); - if(m_allowDiscovery) { - i = &m_disasm.readins(pc); - } - else { - if(!m_disasm.instructions.count(pc)) { - FxOS_log(ERR, "no instruction at %08x", pc); - continue; - } - - i = &m_disasm.instructions.at(pc); - } - - if(this->analyzeInstruction(pc, *i)) + if(i != nullptr && this->analyzeInstruction(pc, *i)) this->enqueueSuccessors(pc, *i); else ok = false; } diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index 371d631..23af3c6 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -43,7 +43,7 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i) jmptarget = (pc+4) + args[0].disp; /* Make the target of the jump a leader */ - Instruction &target = m_disasm.readins(jmptarget); + Instruction &target = *m_disasm.getInstructionAt(jmptarget, true); target.leader = true; /* Check that it's not in a delay slot */ @@ -64,7 +64,7 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i) } /* If it has a delay slot, create it at the next instruction */ else if(i.inst->isdelayed()) { - Instruction &slot = m_disasm.readins(pc+2); + Instruction &slot = *m_disasm.getInstructionAt(pc+2, true); if(slot.leader) throw std::logic_error(format("%08x is a leader and also a delay " "slot - this is unsupported by fxos and will produce garbage " @@ -91,4 +91,15 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i) return true; } +bool CfgPass::exploreFunction(uint32_t pc) +{ + if(!m_disasm.hasFunctionAt(pc)) { + // TODO: Have proper function creation methods in Disassembly + Function func = { .address=pc, .callTargets={} }; + m_disasm.functions[pc] = func; + } + + return this->analyzeFunction(pc); +} + } /* namespace FxOS */ diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index c7e5d28..81247b0 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -31,7 +31,7 @@ bool PcrelPass::analyzeInstruction(uint32_t pc, Instruction &ci) /* Also compute the value. This is sign-extended from 16-bit with mov.w. There is no mov.b for this instruction. */ - VirtualSpace &space = m_disasm.space; + VirtualSpace &space = m_disasm.vspace; uint32_t v = -1; if(i->opsize == 2 && space.covers(addr, 2)) { diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index 1b161e5..23c5a21 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -20,10 +20,10 @@ PrintPass::PrintPass(Disassembly &disasm): /* Default parameters: all 0 */ /* Use an OS observer to describe syscalls in header lines */ - m_os = disasm.space.os_analysis(); + m_os = disasm.vspace.os_analysis(); /* Use the symbol tables from the virtual space */ - m_symtables.push_back(disasm.space.symbols); + m_symtables.push_back(disasm.vspace.symbols); } bool PrintPass::analyzeInstruction(uint32_t pc, Instruction &i) diff --git a/lib/util/Timer.cpp b/lib/util/Timer.cpp index a634fe0..6cd1954 100644 --- a/lib/util/Timer.cpp +++ b/lib/util/Timer.cpp @@ -36,6 +36,17 @@ void Timer::stop(void) } } +void Timer::reset(void) +{ + this->time_ns = 0; +} + +void Timer::restart(void) +{ + this->reset(); + this->start(); +} + std::string Timer::format_time(uint64_t time_ns) { if(time_ns < 2000) return format("%lld ns", time_ns); diff --git a/lib/util/log.cpp b/lib/util/log.cpp index 8ecc943..8ba9412 100644 --- a/lib/util/log.cpp +++ b/lib/util/log.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace FxOS { @@ -61,20 +60,22 @@ void logmsg(int level, char const *file, int line, char const *func, message.pop_back(); } + printf("\x1b[K"); + if(prefix) { if(level == LOG_LEVEL_LOG) - std::cerr << "[" << file << ":" << line << "@" << func << "] "; + fprintf(stderr, "\e[30;1m[%s:%d@%s]\e[0m ", file, line, func); if(level == LOG_LEVEL_WRN) - std::cerr << "warning: "; + fprintf(stderr, "warning: "); if(level == LOG_LEVEL_ERR) - std::cerr << "\x1b[31;1merror:\x1b[0m "; + fprintf(stderr, "\x1b[31;1merror:\x1b[0m "); } - else std::cerr << " "; + else fprintf(stderr, " "); - std::cerr << message; + fputs(message.c_str(), stderr); if(endline) { - std::cerr << '\n'; + fputc('\n', stderr); lastlevel = -1; } else { diff --git a/shell/a.cpp b/shell/a.cpp index dbea009..21281f9 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -228,62 +228,58 @@ void _af4(Session &session, uint32_t value, std::vector ®ions) static void ad_disassemble_all(VirtualSpace &space, std::vector const &addresses, bool force) { - std::vector passes = { "cfg", "pcrel", "syscall" }; int successes=0, errors=0; + Timer timer; - for(auto pass: passes) { - Timer timer; - timer.start(); + /* Analyze the CFGs of all functions */ - if(pass == "cfg") { - CfgPass p(space.disasm); + timer.start(); + CfgPass cfg_pass(space.disasm); - /* We collect subfunction addresses while running the pass */ - for(int i = 0; i < (int)addresses.size(); i++) { - uint32_t entry = addresses[i]; - printr("[cfg %d/%zu] Disassembling %08x...", - i+1, addresses.size(), entry); - if(!p.analyzeFunction(entry)) { - FxOS_log(ERR, "while processing %08x", entry); - errors++; - if(!force) break; - } - else successes++; - - /* TODO: Get subfunction addresses here */ - } + /* We collect subfunction addresses while running the pass */ + for(int i = 0; i < (int)addresses.size(); i++) { + uint32_t entry = addresses[i]; + printr("[cfg %d/%zu] Disassembling %08x...", + i+1, addresses.size(), entry); + if(!cfg_pass.exploreFunction(entry)) { + FxOS_log(ERR, "while processing %08x", entry); + errors++; + if(!force) return; } - else if(pass == "pcrel") { - printr("[pcrel] Resolving PC-relative addressing modes..."); - PcrelPass p(space.disasm); - if(!p.analyzeAllInstructions()) { - errors++; - if(!force) break; - } - } - else if(pass == "syscall") { - printr("[syscall] Finding syscall references..."); - OS *os = space.os_analysis(); - if(os) { - SyscallPass p(space.disasm, os); - if(!p.analyzeAllInstructions()) { - errors++; - if(!force) break; - } - } - } - else { - FxOS_log(ERR, "unknown pass <%s>", pass); - break; - } - printf("\n"); + else successes++; - timer.stop(); - FxOS_log(LOG, "Finished pass <%s> in %s", pass, timer.format_time()); - - if(errors && !force) - break; + /* TODO: Get subfunction addresses here */ } + timer.stop(); + printf("\n"); + FxOS_log(LOG, "Finished pass in %s", timer.format_time()); + + /* Annotate all decoded instructions with pcrel/syscall + TODO: analyze only the functions, if possible */ + printr("[pcrel] Resolving PC-relative addressing modes..."); + timer.restart(); + PcrelPass pcrel_pass(space.disasm); + if(!pcrel_pass.analyzeAllInstructions()) { + errors++; + if(!force) return; + } + timer.stop(); + printf("\n"); + FxOS_log(LOG, "Finished pass in %s", timer.format_time()); + + printr("[syscall] Finding syscall references..."); + timer.restart(); + OS *os = space.os_analysis(); + if(os) { + SyscallPass syscall_pass(space.disasm, os); + if(!syscall_pass.analyzeAllInstructions()) { + errors++; + if(!force) return; + } + } + timer.stop(); + printf("\n"); + FxOS_log(LOG, "Finished pass in %s", timer.format_time()); printf("Successfully analyzed %d functions (%d errors)\n", successes, errors); diff --git a/shell/d.cpp b/shell/d.cpp index d9c2cc1..a165cb5 100644 --- a/shell/d.cpp +++ b/shell/d.cpp @@ -26,7 +26,7 @@ static void disassemble(Session &session, Disassembly &disasm, if(pass == "cfg") { CfgPass p(disasm); - ok = p.analyzeFunction(address); + ok = p.analyzeAnonymousFunction(address); } else if(pass == "pcrel") { @@ -136,7 +136,7 @@ void _dr(Session &session, Range range) /* Load the block into memory */ for(uint32_t pc = range.start; pc < range.end; pc += 2) - disasm.readins(pc); + disasm.getInstructionAt(pc, true); disassemble(session, disasm, { "pcrel", /*"constprop",*/ "syscall", "print" }, -1); diff --git a/shell/v.cpp b/shell/v.cpp index 78f087b..78c116c 100644 --- a/shell/v.cpp +++ b/shell/v.cpp @@ -42,6 +42,8 @@ static void show_vspace(std::string name, VirtualSpace &s, Session &session) s.symbols.symbols.size()); fmt::print(" Main disassembly: {} instructions\n", s.disasm.instructions.size()); + fmt::print(" Functions: {}\n", + s.disasm.functions.size()); fmt::print(" Region--Start---------End---------File--------------------\n"); for(auto &b: s.bindings) {