diff --git a/CMakeLists.txt b/CMakeLists.txt index 28dff18..2e37abc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,6 @@ set(fxos_core_SOURCES lib/function.cpp lib/memory.cpp lib/os.cpp - lib/passes/cfg.cpp lib/passes/pcrel.cpp lib/passes/syscall.cpp lib/project.cpp diff --git a/include/fxos/binary.h b/include/fxos/binary.h index f6794da..6ec6f12 100644 --- a/include/fxos/binary.h +++ b/include/fxos/binary.h @@ -60,6 +60,9 @@ struct Binary return m_objects; } + /* Add an object to the binary. */ + void addObject(std::unique_ptr &&obj); + /* Return the address of an object by name, if it exists. If there are multiple objects with the same name, returns an arbitrary one. */ std::optional objectAddress(std::string const &name) const; @@ -78,6 +81,12 @@ struct Binary std::vector objectsCovering(u32 address); std::vector objectsCovering(u32 address) const; + /* Return one or all functions defined at a given address. */ + Function *functionAt(u32 address); + Function const *functionAt(u32 address) const; + std::vector functionsAt(u32 address); + std::vector functionsAt(u32 address) const; + private: VirtualSpace m_vspace; diff --git a/include/fxos/function.h b/include/fxos/function.h index 00ef895..3b869e1 100644 --- a/include/fxos/function.h +++ b/include/fxos/function.h @@ -78,14 +78,24 @@ struct Function: public BinaryObject return m_blocks.end(); } - /* Construction functions to be used only by the cfg pass. */ - void exploreFunctionAt(u32 address); + /* Version number of the analysis that was run on the function. Used to + avoid re-analyzing unless there are new features. */ + int analysisVersion() const + { + return m_analysisVersion; + } + + /* Construction functions to be used only by the analysis pass. */ + bool exploreFunctionAt(u32 address); BasicBlock &addBasicBlock(BasicBlock &&bb); void updateFunctionSize(); + void setAnalysisVersion(int version); private: /* List of basic blocks (entry block is always number 0) */ std::vector m_blocks; + /* Analysis version */ + int m_analysisVersion = 0; }; /* Basic block within a function. */ @@ -338,6 +348,10 @@ struct Instruction assert(insmap[m_opcode] && "use of Instruction with invalid opcode"); return *insmap[m_opcode]; } + bool hasValidOpcode() const + { + return insmap[m_opcode].has_value(); + } /* Instruction's size in bytes. */ uint size() const { diff --git a/include/fxos/passes/cfg.h b/include/fxos/passes/cfg.h deleted file mode 100644 index 8bab215..0000000 --- a/include/fxos/passes/cfg.h +++ /dev/null @@ -1,75 +0,0 @@ -//---------------------------------------------------------------------------// -// 1100101 |_ mov #0, r4 __ // -// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // -// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // -// |_ base# + offset |_| /_\_\___/__/ // -//---------------------------------------------------------------------------// -// fxos/passes/cfg: Control Flow Graph construction -// -// This pass explores functions by loading every instruction's potential -// successor into the diassembly store. It also sets the [jmptarget] field of -// the Instructions as it goes, allowing other passes to traverse the (somewhat -// implicit) CFG. -// -// This is the main exploration pass; other passes do not typically load new -// instructions from the underlying disassembly. Straightforward passes such as -// [print] iterate on instructions loaded by this pass. -// -// The main problem that this pass has to deal with is delay slots. These are -// pretty tricky to deal with; for instance, in -// -// bra pc+120 -// mov #1, r4 -// -// the CPU will run [mov #1, r4] while performing the branch to pc+120 in order -// to fill an otherwise-unfillable pipeline cycle. This is annoying for all -// kinds of reasons, and fxos handles this by acting as if the mov itself had -// pc+120 as an unconditional successor. -// -// This could be tricky for the abstract interpreter because the jump target -// has to be computed using the state at the jump instruction, not the one at -// the delay slot. Luckily all delayed jumps are no-ops in terms of state, so -// the confusion has no effect. -// -// Note that jumping into a delay slot will activate the jump in fxos, which is -// not the actual behavior of the processor. I don't believe any compiler does -// this kind of things (most are not inherently designed for delay slots -// anyway). If such an instance is found, fxos will throw an exception and give -// up to make sure no analysis pass returns invalid results. -// -// Take-home message: delay slots are a pain to analyze, so we get rid of them -// as soon as possible and proceed with normal semantics. -//--- - -#ifndef FXOS_PASSES_CFG_H -#define FXOS_PASSES_CFG_H - -#include -#include -#include - -namespace FxOS { - -class CfgPass: public InstructionPass -{ -public: - CfgPass(Binary &binary); - bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override; - - /* Explore a new function at the specified address. This method creates the - function if it doesn't exist yet, explores its CFG, and generates claims - over relevant parts of the binary. */ - bool exploreFunction(uint32_t pc); - -private: - /* Last explored function */ - uint32_t m_lastFunction; - /* Set of instructions in a function, used to generate new claims */ - std::set m_claimedInstructions; - /* pcrel pass used to find call to other functions */ - PcrelPass m_pcrel; -}; - -} /* namespace FxOS */ - -#endif /* FXOS_PASSES_CFG_H */ diff --git a/lib/binary.cpp b/lib/binary.cpp index 8954fb9..9ae63b2 100644 --- a/lib/binary.cpp +++ b/lib/binary.cpp @@ -59,6 +59,11 @@ OS *Binary::OSAnalysis(bool force) const return m_os.get(); } +void Binary::addObject(std::unique_ptr &&obj) +{ + m_objects.insert({obj->address(), std::move(obj)}); +}; + std::optional Binary::objectAddress(std::string const &name) const { for(auto const &[address, obj]: m_objects) { @@ -101,7 +106,7 @@ std::vector Binary::objectsCovering(u32 address) std::vector objects; for(auto const &[obj_address, obj]: m_objects) { - if(obj_address <= address && obj_address + obj->size() < address) + if(obj_address <= address && obj_address + obj->size() > address) objects.push_back(obj.get()); } @@ -113,13 +118,53 @@ std::vector Binary::objectsCovering(u32 address) const std::vector objects; for(auto const &[obj_address, obj]: m_objects) { - if(obj_address <= address && obj_address + obj->size() < address) + if(obj_address <= address && obj_address + obj->size() > address) objects.push_back(obj.get()); } return objects; } +Function *Binary::functionAt(u32 address) +{ + for(auto obj: objectsAt(address)) { + if(obj->isFunction()) + return &obj->getFunction(); + } + return nullptr; +} + +Function const *Binary::functionAt(u32 address) const +{ + for(auto obj: objectsAt(address)) { + if(obj->isFunction()) + return &obj->getFunction(); + } + return nullptr; +} + +std::vector Binary::functionsAt(u32 address) +{ + std::vector funcs; + for(auto obj: objectsAt(address)) { + if(obj->isFunction()) + funcs.push_back(&obj->getFunction()); + } + return funcs; +} + +std::vector Binary::functionsAt(u32 address) const +{ + std::vector funcs; + for(auto obj: objectsAt(address)) { + if(obj->isFunction()) + funcs.push_back(&obj->getFunction()); + } + return funcs; +} + + + //=== BinaryObject ===// bool BinaryObject::intersects(BinaryObject const &other) const diff --git a/lib/function.cpp b/lib/function.cpp index 0fec445..4f38126 100644 --- a/lib/function.cpp +++ b/lib/function.cpp @@ -45,6 +45,11 @@ void Function::updateFunctionSize() this->setSize(max_address - this->address()); } +void Function::setAnalysisVersion(int version) +{ + m_analysisVersion = version; +} + /* The first step in building function CFGs is delimiting the blocks. Starting from the entry point, we generate "superblocks" by reading instructions linearly until we find a terminator. @@ -75,10 +80,11 @@ struct Superblock }; // TODO: Unclear what the exit status of the superblock is in case of error -static Superblock exploreSuperblock(Function &function, u32 entry) +static Superblock exploreSuperblock(Function &function, u32 entry, bool *error) { Superblock sb; sb.leaders.insert(entry); + *error = false; VirtualSpace &vspace = function.parentBinary().vspace(); bool inDelaySlot = false; @@ -92,16 +98,24 @@ static Superblock exploreSuperblock(Function &function, u32 entry) // TODO: Handle 32-bit DSP instructions if(!vspace.covers(pc, 2)) { FxOS_log(ERR, "superblock %08x exits vspace at %08x", entry, pc); + *error = true; break; } u32 opcodeBits = vspace.read_u16(pc); Instruction ins(function, pc, opcodeBits); + if(!ins.hasValidOpcode()) { + FxOS_log(ERR, "invalid instruction %08x: %04x in superblock", pc, + opcodeBits); + *error = true; + break; + } AsmInstruction opcode = ins.opcode(); if(inDelaySlot && !opcode.isValidDelaySlot()) { FxOS_log(ERR, "superblock %08x has invalid delay slot at %08x", entry, pc); + *error = true; break; } @@ -121,6 +135,9 @@ static Superblock exploreSuperblock(Function &function, u32 entry) pc += 2; } + if(*error) + return sb; + if(sb.mayFallthrough) sb.fallthroughTarget = pc; return sb; @@ -140,7 +157,7 @@ static bool cutSuperblockAt(std::vector &blocks, u32 address) return false; } -void Function::exploreFunctionAt(u32 functionAddress) +bool Function::exploreFunctionAt(u32 functionAddress) { assert(!(functionAddress & 1) && "function starts at unaligned address"); @@ -158,7 +175,11 @@ void Function::exploreFunctionAt(u32 functionAddress) if(cutSuperblockAt(blocks, entry)) continue; - Superblock sb = exploreSuperblock(*this, entry); + bool error = false; + Superblock sb = exploreSuperblock(*this, entry, &error); + + if(error) + return false; /* Process static jump targets and fallthrough targets to queue new superblocks or cut existing ones */ @@ -199,6 +220,8 @@ void Function::exploreFunctionAt(u32 functionAddress) } // TODO: Set successors and predecessors + + return true; } //=== BasicBlock ===// diff --git a/lib/load-asm.l b/lib/load-asm.l index d7a3a91..6ef843e 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -7,7 +7,7 @@ //---------------------------------------------------------------------------// #include -#include +#include #include #include diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp deleted file mode 100644 index 74873f6..0000000 --- a/lib/passes/cfg.cpp +++ /dev/null @@ -1,145 +0,0 @@ -//---------------------------------------------------------------------------// -// 1100101 |_ mov #0, r4 __ // -// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // -// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // -// |_ base# + offset |_| /_\_\___/__/ // -//---------------------------------------------------------------------------// - -#include -#include -#include -#include -#include - -namespace FxOS { - -CfgPass::CfgPass(Binary &binary): - InstructionPass(binary), m_claimedInstructions {}, m_pcrel {binary} -{ - this->setAllowDiscovery(true); -} - -bool CfgPass::analyzeInstruction(uint32_t pc, OldInstruction &i) -{ - /* Don't explore successors if the instruction cannot be decoded, not - even pc+2. This will prevent wild overshoot. */ - if(!i.inst) { - FxOS_log(ERR, "invalid instruction at 0x%08x: 0x%04x", pc, i.opcode); - return false; - } - - m_claimedInstructions.insert(pc); - - /* Compute the jump target for jump instructions. This is easy because - they are all trivially computable. (...If they are not we dub them - "terminal" to avoid the computation!) */ - uint32_t jmptarget = 0xffffffff; - - if(i.inst->isAnyStaticJump()) { - auto &args = i.inst->args; - - if(i.inst->arg_count != 1 || args[0].kind != AsmArgument::PcJump) { - FxOS_log(ERR, "invalid jump instruction at 0x%08x", pc); - return false; - } - - jmptarget = (pc + 4) + args[0].disp; - - /* Make the target of the jump a leader */ - // TODO: Use Binary instructions - OldInstruction &target - = *m_binary.vspace().disasm.getInstructionAt(jmptarget, true); - target.leader = true; - - /* Check that it's not in a delay slot */ - if(target.delayslot) - throw std::logic_error(format( - "0x%08x jumps into 0x%08x, which is " - "a delay slot - this is unsupported by fxos and will produce " - "garbage analysis! (x_x)", - pc, jmptarget)); - } - - /* If this instruction is in a delay slot, check its type. A valid - delay slot has no branching properties on its own, so nothing new to - set in the properties. */ - if(i.delayslot) { - if(!i.inst->isValidDelaySlot()) { - FxOS_log(ERR, "invalid delay slot at 0x%08x", pc); - return false; - } - } - /* If it has a delay slot, create it at the next instruction */ - else if(i.inst->hasDelaySlot()) { - // TODO: Use Binary instructions - OldInstruction &slot - = *m_binary.vspace().disasm.getInstructionAt(pc + 2, true); - if(slot.leader) - throw std::logic_error(format( - "0x%08x is a leader and also a delay" - " slot - this is unsupported by fxos and will produce garbage " - "analysis! (x_x)", - pc + 2)); - if(!slot.inst->isValidDelaySlot()) { - FxOS_log(ERR, "invalid delay slot at 0x%08x", pc + 2); - return false; - } - - slot.delayslot = true; - slot.terminal = i.inst->isReturn() || i.inst->isDynamicJump(); - slot.jump = i.inst->isUnconditionalJump(); - slot.condjump = i.inst->isConditionalJump(); - slot.jmptarget = jmptarget; - } - /* Otherwise, use standard properties */ - else if(!i.inst->hasDelaySlot()) { - i.terminal = i.inst->isReturn() || i.inst->isDynamicJump(); - i.jump = i.inst->isUnconditionalJump(); - i.condjump = i.inst->isConditionalJump(); - i.jmptarget = jmptarget; - } - - m_pcrel.analyzeInstruction(pc, i); - return true; -} - -bool CfgPass::exploreFunction(uint32_t pc) -{ - m_lastFunction = pc; - m_claimedInstructions.clear(); - - // TODO: Use Binary functions - OldFunction *func = m_binary.vspace().disasm.getOrCreateFunctionAt(pc); - if(!this->analyzeFunction(pc)) - return false; - - RelConstDomain RCD; - - /* Look for call targets */ - for(uint32_t pc: m_claimedInstructions) { - // TODO: Use Binary instructions - OldInstruction const *ci - = m_binary.vspace().disasm.getInstructionAt(pc); - if(!ci) - continue; - AsmInstruction const &i = *ci->inst; - - /* Find function call instructions */ - if(i.isReturn() || !i.isCall() || i.arg_count < 1) - continue; - - /* The target must be known */ - if(!RCD.is_constant(ci->args[0].location)) - continue; - - uint32_t target = RCD.constant_value(ci->args[0].location); - auto &v = func->callTargets; - - if(std::find(v.begin(), v.end(), target) == v.end()) - func->callTargets.push_back(target); - } - - return true; -} - -} /* namespace FxOS */ diff --git a/shell/a.cpp b/shell/a.cpp index 516b610..5ecb700 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -4,121 +4,125 @@ #include "errors.h" #include "util.h" -#include +#include #include #include #include -#include -#include -#include #include #include +// TODO: fxos: Proper definition of function analysis version +#define FXOS_FUNCTION_ANALYSIS_VERSION 1 + //--- -// ad +// af //--- -static void ad_disassemble_all( - Binary &binary, std::vector const &addresses, bool force) +struct _af_args { - int successes = 0, errors = 0; - Timer timer; + bool update = false; + bool force = false; + std::string name = ""; + std::vector addresses; +}; - /* Analyze the CFGs of all functions */ - - timer.start(); - CfgPass cfg_pass(binary); - - /* We collect subfunction addresses while running the pass */ - for(int i = 0; i < (int)addresses.size(); i++) { - uint32_t entry = addresses[i]; - printr("[cfg %d/%zu] Disassembling 0x%08x...", i + 1, addresses.size(), - entry); - if(!cfg_pass.exploreFunction(entry)) { - FxOS_log(ERR, "while processing 0x%08x", entry); - errors++; - if(!force) - return; - } - else - successes++; - } - timer.stop(); - printf("\n"); - FxOS_log(LOG, "Finished pass in %s", timer.format_time()); - - printr("[syscall] Finding syscall references..."); - timer.restart(); - OS *os = binary.OSAnalysis(); - if(os) { - SyscallPass syscall_pass(binary, os); - if(!syscall_pass.analyzeAllInstructions()) { - errors++; - if(!force) - return; - } - } - timer.stop(); - printf("\n"); - FxOS_log(LOG, "Finished pass in %s", timer.format_time()); - - printf( - "Successfully analyzed %d functions (%d errors)\n", successes, errors); - - /* TODO: Get subfunction addresses by abstract interpretation and keep - going recursively */ -} - -static std::vector parse_ad(Session &session, Parser &parser) +static _af_args parse_af(Session &session, Parser &parser) { - Binary *b = session.currentBinary(); - if(!b) - return std::vector(); + _af_args args; + parser.option("-u", [&args](Parser &) { args.update = true; }); + parser.option("--force", [&args](Parser &) { args.force = true; }); + parser.option("-n", [&args](Parser &p) { args.name = p.symbol(""); }); + parser.accept_options(); - std::vector addresses; do { - addresses.push_back(parser.expr(b)); + args.addresses.push_back(parser.expr(session.currentBinary())); } while(!parser.at_end()); parser.end(); - return addresses; + return args; } -void _ad(Session &session, std::vector const &addresses) +static void af_analyze(Binary &binary, _af_args const &args) +{ + int successes = 0, skipped = 0, errors = 0; + Timer timer; + timer.start(); + + auto const &addresses = args.addresses; + int const FAV = FXOS_FUNCTION_ANALYSIS_VERSION; + + for(int i = 0; i < (int)addresses.size(); i++) { + u32 entry = addresses[i]; + printr("[%d/%zu] Analyzing 0x%08x...", i + 1, addresses.size(), entry); + + /* Check if there is already a function defined here */ + Function *existing = binary.functionAt(entry); + + if(!existing || existing->analysisVersion() < FAV) { + auto f = std::make_unique(binary, entry); + if(f->exploreFunctionAt(entry)) { + f->updateFunctionSize(); + f->setAnalysisVersion(FAV); + binary.addObject(std::move(f)); + successes++; + } + else { + FxOS_log(ERR, "... while analyzing 0x%08x", entry); + errors++; + } + } + else { + skipped++; + } + + /* TODO: Queue subfunctions for recursive analysis */ + } + timer.stop(); + printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n", + successes, skipped, errors, timer.format_time().c_str()); + + /* TODO: Check for overlapping functions etc */ +} + +void _af(Session &session, _af_args const &args) { Binary *b = session.currentBinary(); if(!b) return FxOS_log(ERR, "No current binary!\n"); - ad_disassemble_all(*b, addresses, false); + af_analyze(*b, args); } //-- -// ads +// afs //--- -static void parse_ads(Session &, Parser &parser) +static _af_args parse_afs(Session &, Parser &parser) { + _af_args args; + parser.option("-u", [&args](Parser &) { args.update = true; }); + parser.option("--force", [&args](Parser &) { args.force = true; }); + parser.accept_options(); parser.end(); + return args; } -void _ads(Session &session) +void _afs(Session &session, _af_args &args) { Binary *b = session.currentBinary(); if(!b) return FxOS_log(ERR, "No current binary!\n"); OS *os = b->OSAnalysis(); - if(!os) { - printf("ads: OS analysis failed, cannot enumerate syscalls"); - return; - } + if(!os) + return FxOS_log(ERR, "afs: No OS analysis, cannot enumerate syscalls"); + + // TODO: afs: Use syscall info - std::vector addresses; for(int i = 0; i < os->syscall_count(); i++) - addresses.push_back(os->syscall(i)); + args.addresses.push_back(os->syscall(i)); - ad_disassemble_all(*b, addresses, true); + af_analyze(*b, args); } //--- @@ -224,33 +228,41 @@ void _am(Session &session, std::string name) // Command definitions //--- -static ShellCommand _ad_cmd( - "ad", +static ShellCommand _af_cmd( + "af", [](Session &s, Parser &p) { - auto addresses = parse_ad(s, p); - _ad(s, addresses); + auto args = parse_af(s, p); + _af(s, args); }, - [](Session &s, Parser &p) { parse_ad(s, p); }, "Analysis: Disassemble", R"( -ad [...] + [](Session &s, Parser &p) { parse_af(s, p); }, "Analysis: Functions", R"( +af [-u|--force] [-n ] [...] -Disassemble the given set of addresses into the current virtual space's main -disassembly. The main disassembly is used for OS-wide features like cross- -reference search or call graphs. +Explore and disassemble functions starting at the specified addresses. For each +explored function, a binary object of Function type is created, and the +function is statically analyzed. + +By default, addresses where functions already exist are not reanalyzed. +Specifying -u (update) causes all functions to be re-processed, while keeping +user-specified information (name, prototype, etc). Specifying --force causes +all functions to be reanalyzed from scratch without keeping user-specified +information. + +When a single address is given, -n can specify the name of the function object +to be created. )"); -static ShellCommand _ads_cmd( - "ads", +static ShellCommand _afs_cmd( + "afs", [](Session &s, Parser &p) { - parse_ads(s, p); - _ads(s); + auto args = parse_afs(s, p); + _afs(s, args); }, - [](Session &s, Parser &p) { parse_ads(s, p); }, - "Analysis: Disassemble all Syscalls", R"( -ads + [](Session &s, Parser &p) { parse_afs(s, p); }, + "Analysis: Functions (Syscalls)", R"( +afs [-u|--force] -Disassembles all syscalls entries using ad, which stores the results in the -current virtual space's main disassembly. Unlike ad, this commands continues -even if some syscalls fail to disassemble. +Explore and disassemble syscalls. Like af, but automatically pulls function +names and prototypes out of the predefined syscall table, when there is one. )"); static ShellCommand _am_cmd( diff --git a/shell/d.cpp b/shell/d.cpp index 8ec4e80..5e7123e 100644 --- a/shell/d.cpp +++ b/shell/d.cpp @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -14,56 +13,6 @@ #include #include -static void disassemble( - Binary &binary, std::vector const &passes, u32 address) -{ - for(auto pass: passes) { - Timer timer; - timer.start(); - - bool ok; - - if(pass == "cfg") { - CfgPass p(binary); - ok = p.analyzeAnonymousFunction(address); - } - else if(pass == "pcrel") { - PcrelPass p(binary); - ok = p.analyzeAllInstructions(); - } - else if(pass == "syscall") { - OS *os = binary.OSAnalysis(); - if(os) { - SyscallPass p(binary, os); - ok = p.analyzeAllInstructions(); - } - } - else if(pass == "print" && address + 1) { - // viewAssemblyLegacyAddress(binary, address); - } - else { - FxOS_log(ERR, "unknown pass <%s>", pass); - ok = false; - } - - timer.stop(); - FxOS_log(LOG, "Finished pass <%s> in %s", pass, timer.format_time()); - - if(!ok) { - FxOS_log(ERR, "pass <%s> failed", pass); - break; - } - } - - if(address + 1) { - Function f(binary, address); - f.exploreFunctionAt(address); - ViewAssemblyOptions opts; - opts.binary = &binary; - viewAssemblyFunction(f, &opts); - } -} - //--- // d //--- @@ -111,7 +60,14 @@ void _d(Session &session, std::variant location) for(uint32_t pc = range.start; pc < range.end; pc += 2) b->vspace().disasm.getInstructionAt(pc, true); - disassemble(*b, {"pcrel", /*"constprop",*/ "syscall"}, -1); + PcrelPass p(*b); + p.analyzeAllInstructions(); + + OS *os = b->OSAnalysis(); + if(os) { + SyscallPass p(*b, os); + p.analyzeAllInstructions(); + } MemoryRegion r; r.start = range.start; @@ -127,8 +83,12 @@ void _d(Session &session, std::variant location) address++; } - /* cfg implicitly does pcrel */ - disassemble(*b, {"cfg", /*"constprop",*/ "syscall", "print"}, address); + Function f(*b, address); + if(f.exploreFunctionAt(address)) { + ViewAssemblyOptions opts; + opts.binary = b; + viewAssemblyFunction(f, &opts); + } } }