From 23275d99c728704e13f98dd847e646159aed5197 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Tue, 5 Apr 2022 19:35:52 +0100 Subject: [PATCH] add infrastructure for function claiming parts of the binary The CFG pass will soon be extended to actually emit the claims. --- include/fxos/disassembly.h | 64 +++++++++++++++++++++++++--- lib/disassembly.cpp | 85 +++++++++++++++++++++++++++++++++++++- shell/v.cpp | 17 +++++--- 3 files changed, 154 insertions(+), 12 deletions(-) diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 8dd67e0..298207e 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -114,6 +114,44 @@ struct Function std::vector callTargets; }; +//--- +// Dynamic claims +// +// Claims are small bits of information associated with sections of the virtual +// space, indicating what the data is used for. Typically it's either owned by +// a function, auxiliary function data, or some actual storage. +//--- + +struct Claim +{ + enum { + Function = 1, + FunctionAuxiliary = 2, + Data = 3, + Zero = 4, + Special = 5, + }; + + /* Start address within the virtual space */ + uint32_t address; + /* Size (bytes) */ + uint16_t size; + /* Type */ + int16_t type; + /* Function address, when relevant (eg. function) */ + uint32_t owner; + + /* Utility to check for intersections */ + bool intersects(Claim const &other) const; + + /* String representation */ + std::string str() const; +}; + +constexpr bool operator < (Claim const &c1, Claim const &c2) { + return c1.address < c2.address; +} + //--- // Storage for disassembled data //--- @@ -148,6 +186,23 @@ struct Disassembly Function *getFunctionAt(uint32_t pc); + // Claim information + + std::set claims; + + /* Access the claim that owns the address, if there is one */ + Claim const *getClaimAt(uint32_t address); + + /* Access the first claim that overlaps this region, if any */ + Claim const *findClaimConflict(uint32_t address, int size); + + /* Add a new exclusive claim. If there is any intersection with previous + claims, this fails. */ + bool addExclusiveClaim(Claim const &c); + + // TODO: Add non-exclusive claims/handle collisions + + // TODO: We don't want to deal with instructions loaded without a minimum // amount of analysis; can we tie instruction loading to some analysis? }; @@ -182,10 +237,9 @@ public: bool analyzeFunctionRecursively(Function &func); bool analyzeFunctionRecursively(uint32_t pc); - /* Enqueue subfunctions of function (this is done automatically by - analyzeFunctionRecursively()) */ + /* For custom analysis function: enqueue subfunctions. The update variant + enqueues them even if they were already seen. */ void enqueueSubfunctions(Function &func); - /* Same, even for functions that were already seen */ void updateSubfunctions(Function &func); private: @@ -216,9 +270,9 @@ public: /* Analyze an anonymous function; just assume one starts at PC */ bool analyzeAnonymousFunction(uint32_t pc); - /* Enqueue successors (analyzeFunction() does this automatically) */ + /* For custom analysis functions: enqueue successors. The update variant + enqueues them even if they were already seen. */ void enqueueSuccessors(uint32_t pc, Instruction &ins); - /* Same, even for instructions that were already seen */ void updateSuccessors(uint32_t pc, Instruction &ins); private: diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index aa49ad5..7343911 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -52,12 +52,46 @@ Instruction::Instruction(uint16_t opcode): { } +//--- +// Dynamic claims +//--- + +bool Claim::intersects(Claim const &other) const +{ + /* Compute the actual intersection */ + uint32_t inter_start = std::max(this->address, other.address); + uint32_t inter_end = std::min(this->address + this->size, + other.address + other.size); + + return inter_start < inter_end; +} + +std::string Claim::str() const +{ + std::string details = format(" (claim %08x:%d)", address, size); + + switch(type) { + case Claim::Function: + return format("function %08x", owner) + details; + case Claim::FunctionAuxiliary: + return std::string("auxiliary data") + details; + case Claim::Data: + return std::string("data") + details; + case Claim::Zero: + return std::string("zero region") + details; + case Claim::Special: + return format("special region %08x", address) + details; + default: + return format("", type) + details; + } +} + //--- // Storage for disassembled data //--- Disassembly::Disassembly(VirtualSpace &_vspace): - vspace {_vspace}, instructions {}, functions {} + vspace {_vspace}, instructions {}, functions {}, claims {} { } @@ -107,6 +141,55 @@ Function *Disassembly::getFunctionAt(uint32_t pc) return &it->second; } +Claim const *Disassembly::findClaimConflict(uint32_t address, int size) +{ + Claim fake_claim = { + .address = address, + .size = (uint16_t)size, + .type = 0, + .owner = 0, + }; + + /* Find the first claim whose start is [> address] */ + auto it = this->claims.upper_bound(fake_claim); + /* Backtrack to find the last claim whose start is [<= address] */ + if(it != this->claims.begin()) + it--; + + while(it != this->claims.end()) { + /* We completely passed address+size, no conflict found */ + if(it->address >= address + size) + return nullptr; + + /* There is an intersection */ + if(it->intersects(fake_claim)) + return &*it; + + it++; + } + + return nullptr; +} + +Claim const *Disassembly::getClaimAt(uint32_t address) +{ + return findClaimConflict(address, 1); +} + +bool Disassembly::addExclusiveClaim(Claim const &c) +{ + Claim const *conflict = this->findClaimConflict(c.address, c.size); + if(conflict) { + FxOS_log(ERR, "exclusive claim for %s conflicts with %s", + c.str(), conflict->str()); + return false; + } + + this->claims.insert(c); + return true; +} + + //--- // DisassemblyPass //--- diff --git a/shell/v.cpp b/shell/v.cpp index 78c116c..2b91f29 100644 --- a/shell/v.cpp +++ b/shell/v.cpp @@ -30,22 +30,27 @@ static void show_vspace(std::string name, VirtualSpace &s, Session &session) { bool is_current = (&s == session.current_space); + int total_claim_size = 0; + for(Claim const &c: s.disasm.claims) + total_claim_size += c.size; + if(is_current) fmt::print("* "); fmt::print(theme(11), "{}\n", name); - if(s.bindings.size() == 0) { - fmt::print(" (no bindings)\n"); - return; - } - fmt::print(" Symbol table: {} symbols\n", s.symbols.symbols.size()); fmt::print(" Main disassembly: {} instructions\n", s.disasm.instructions.size()); fmt::print(" Functions: {}\n", s.disasm.functions.size()); + fmt::print(" Claims: {} (totalling {} bytes)\n", + s.disasm.claims.size(), total_claim_size); - fmt::print(" Region--Start---------End---------File--------------------\n"); + fmt::print(" Region--Start---------End---------File------------------\n"); + if(s.bindings.size() == 0) { + fmt::print(" (no bindings)\n"); + return; + } for(auto &b: s.bindings) { MemoryRegion const *ref = MemoryRegion::region_for(b.region); fmt::print(" {:<7s} 0x{:08x} .. 0x{:08x}", (ref ? ref->name : ""),