diff --git a/.gitignore b/.gitignore index 17839b7..5963663 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # Build files -/build +/build* /bin # Semantic exclude diff --git a/include/fxos/function.h b/include/fxos/function.h index 5f1d900..7bd9218 100644 --- a/include/fxos/function.h +++ b/include/fxos/function.h @@ -105,7 +105,12 @@ struct Function: public BinaryObject return m_analysisResult.get(); } - /* Construction functions to be used only by the analysis pass. */ + /* List all function calls. This iterates on all instructions in the + function to find calls. Requires analysis for useful targets. Returns a + series of pairs {instruction address, call target}. */ + std::vector> findFunctionCalls() const; + + /* Construction functions to be used only by the exploration pass. */ bool exploreFunctionAt(u32 address); BasicBlock &createBasicBlock(u32 address, bool isEntryBlock); void sortBasicBlocks(); diff --git a/lib/function.cpp b/lib/function.cpp index afb7689..751d406 100644 --- a/lib/function.cpp +++ b/lib/function.cpp @@ -20,7 +20,7 @@ Function::Function(Binary &binary, u32 address): { /* Size is not determined at first. */ - m_blocks.reserve(4); + m_blocks.reserve(16); /* Entry block index is not determined at first. */ m_entryBlockIndex = -1; @@ -69,6 +69,24 @@ BasicBlock const &Function::basicBlockByAddress(u32 pc) const __builtin_unreachable(); } +std::vector> Function::findFunctionCalls() const +{ + std::vector> calls; + + for(BasicBlock const &bb: *this) { + for(auto const &[ins, state]: bb.instructionsWithState()) { + if(!ins.opcode().isCall()) + continue; + + CpuRegister targetReg = ins.opcode().operand(0).base(); + RelConst target = state.getRegister(targetReg); + calls.emplace_back(ins.address(), target); + } + } + + return calls; +} + /* Create a new basic block and add it to the function. Invalidates previous pointers to other blocks! */ BasicBlock &Function::createBasicBlock(u32 address, bool isEntryBlock) @@ -171,7 +189,11 @@ static Superblock exploreSuperblock(Function &function, u32 entry, bool *error) int vspaceLen; u8 const *vspaceData = (u8 *)vspace.translate_dynamic(entry, &vspaceLen); u32 vspaceEnd = entry + vspaceLen; - assert(vspaceData); + if(!vspaceData) { + FxOS_log(ERR, "superblock %08x out of vspace", entry); + *error = true; + return sb; + } while(!terminatorFound || inDelaySlot) { sb.addresses.push_back(pc); diff --git a/shell/a.cpp b/shell/a.cpp index a4d0fb9..c0834a9 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -21,6 +21,7 @@ struct _af_args { bool update = false; bool force = false; + bool recursive = false; std::string name = ""; std::vector addresses; bool consistency = false; @@ -33,6 +34,7 @@ static _af_args parse_af(Session &session, Parser &parser) parser.option("--force", [&args](Parser &) { args.force = true; }); parser.option("-n", [&args](Parser &p) { args.name = p.symbol(""); }); parser.option("-c", [&args](Parser &) { args.consistency = true; }); + parser.option("-r", [&args](Parser &) { args.recursive = true; }); parser.accept_options(); if(args.consistency && parser.at_end()) @@ -98,43 +100,83 @@ static void _af_consistency(Binary const &binary) } } -static void af_analyze(Binary &binary, _af_args const &args) +static void af_analyze(Session &session, Binary &binary, _af_args const &args) { - int successes = 0, skipped = 0, errors = 0; Timer timer; timer.start(); - auto const &addresses = args.addresses; + std::deque functionQueue; + std::set seen; - for(int i = 0; i < (int)addresses.size(); i++) { - u32 entry = addresses[i]; - printr("[%d/%zu] Analyzing 0x%08x...", i + 1, addresses.size(), entry); + for(u32 entry: args.addresses) { + functionQueue.push_back(entry); + seen.insert(entry); + } + + std::set TEST_knownErrors; + + int done = 0, successes = 0, skipped = 0, errors = 0; + int total = functionQueue.size(); + int unresolvedCalls = 0; + + for(; !functionQueue.empty(); done++) { + u32 entry = functionQueue.front(); + functionQueue.pop_front(); + printr("[%d/%d] Analyzing 0x%08x...", done + 1, total, entry); /* Check if there is already a function defined here */ Function *existing = binary.functionAt(entry); - if(!existing) { + if(existing) { + if(!existing->hasAnalysis()) { + existing->runAnalysis(); + successes++; + } + else { + skipped++; + } + } + else { auto f = std::make_unique(binary, entry); if(f->exploreFunctionAt(entry)) { f->updateFunctionSize(); f->runAnalysis(); binary.addObject(std::move(f)); + existing = binary.functionAt(entry); successes++; } else { FxOS_log(ERR, "... while analyzing 0x%08x", entry); + TEST_knownErrors.insert(entry); errors++; + continue; } } - else { - skipped++; - } - /* TODO: Queue subfunctions for recursive analysis */ + RelConstDomain RCD; + if(args.recursive) { + for(auto const &[pc, target]: existing->findFunctionCalls()) { + if(!RCD.is_constant(target)) { + unresolvedCalls++; + continue; + } + u32 targetAddress = RCD.constant_value(target); + if(!seen.contains(targetAddress)) { + functionQueue.push_back(targetAddress); + seen.insert(targetAddress); + total++; + } + } + } } timer.stop(); - printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n", + printf("\nAnalyzed %d functions (+%d already done, +%d errors) in %s\n", successes, skipped, errors, timer.format_time().c_str()); + if(args.recursive) + printf("There were %d unresolved call sites.\n", unresolvedCalls); + + if(successes > 0) + session.project().setDirty(); // _af_consistency(binary); } @@ -144,7 +186,7 @@ void _af(Session &session, _af_args const &args) Binary *b = session.currentBinary(); if(!b) return FxOS_log(ERR, "No current binary!\n"); - af_analyze(*b, args); + af_analyze(session, *b, args); } //-- @@ -156,6 +198,7 @@ static _af_args parse_afs(Session &, Parser &parser) _af_args args; parser.option("-u", [&args](Parser &) { args.update = true; }); parser.option("--force", [&args](Parser &) { args.force = true; }); + parser.option("-r", [&args](Parser &) { args.recursive = true; }); parser.accept_options(); parser.end(); return args; @@ -181,7 +224,7 @@ void _afs(Session &session, _af_args &args) for(int i = 0; i < os->syscall_count(); i++) args.addresses.push_back(os->syscall(i)); - af_analyze(*b, args); + af_analyze(session, *b, args); } //--- @@ -294,13 +337,17 @@ static ShellCommand _af_cmd( _af(s, args); }, [](Session &s, Parser &p) { parse_af(s, p); }, "Analysis: Functions", R"( -af [-u|--force] [-n ] [...] +af [-r] [-u|--force] [-n ] [...] af -c Explore and disassemble functions starting at the specified addresses. For each explored function, a binary object of Function type is created, and the function is statically analyzed. +With -r (recursive), also analyze called functions, recursively. This might not +be exhaustive because function calls can't always be resolved to a statically- +known address. + By default, addresses where functions already exist are not reanalyzed. Specifying -u (update) causes all functions to be re-processed, while keeping user-specified information (name, prototype, etc). Specifying --force causes @@ -325,7 +372,7 @@ static ShellCommand _afs_cmd( }, [](Session &s, Parser &p) { parse_afs(s, p); }, "Analysis: Functions (Syscalls)", R"( -afs [-u|--force] +afs [-r] [-u|--force] Explore and disassemble syscalls. Like af, but automatically pulls function names and prototypes out of the predefined syscall table, when there is one.