add infrastructure for function claiming parts of the binary

The CFG pass will soon be extended to actually emit the claims.
This commit is contained in:
Lephenixnoir 2022-04-05 19:35:52 +01:00
parent 8b1105fb03
commit 23275d99c7
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
3 changed files with 154 additions and 12 deletions

View File

@ -114,6 +114,44 @@ struct Function
std::vector<uint32_t> callTargets;
};
//---
// Dynamic claims
//
// Claims are small bits of information associated with sections of the virtual
// space, indicating what the data is used for. Typically it's either owned by
// a function, auxiliary function data, or some actual storage.
//---
struct Claim
{
enum {
Function = 1,
FunctionAuxiliary = 2,
Data = 3,
Zero = 4,
Special = 5,
};
/* Start address within the virtual space */
uint32_t address;
/* Size (bytes) */
uint16_t size;
/* Type */
int16_t type;
/* Function address, when relevant (eg. function) */
uint32_t owner;
/* Utility to check for intersections */
bool intersects(Claim const &other) const;
/* String representation */
std::string str() const;
};
constexpr bool operator < (Claim const &c1, Claim const &c2) {
return c1.address < c2.address;
}
//---
// Storage for disassembled data
//---
@ -148,6 +186,23 @@ struct Disassembly
Function *getFunctionAt(uint32_t pc);
// Claim information
std::set<Claim> claims;
/* Access the claim that owns the address, if there is one */
Claim const *getClaimAt(uint32_t address);
/* Access the first claim that overlaps this region, if any */
Claim const *findClaimConflict(uint32_t address, int size);
/* Add a new exclusive claim. If there is any intersection with previous
claims, this fails. */
bool addExclusiveClaim(Claim const &c);
// TODO: Add non-exclusive claims/handle collisions
// TODO: We don't want to deal with instructions loaded without a minimum
// amount of analysis; can we tie instruction loading to some analysis?
};
@ -182,10 +237,9 @@ public:
bool analyzeFunctionRecursively(Function &func);
bool analyzeFunctionRecursively(uint32_t pc);
/* Enqueue subfunctions of function (this is done automatically by
analyzeFunctionRecursively()) */
/* For custom analysis function: enqueue subfunctions. The update variant
enqueues them even if they were already seen. */
void enqueueSubfunctions(Function &func);
/* Same, even for functions that were already seen */
void updateSubfunctions(Function &func);
private:
@ -216,9 +270,9 @@ public:
/* Analyze an anonymous function; just assume one starts at PC */
bool analyzeAnonymousFunction(uint32_t pc);
/* Enqueue successors (analyzeFunction() does this automatically) */
/* For custom analysis functions: enqueue successors. The update variant
enqueues them even if they were already seen. */
void enqueueSuccessors(uint32_t pc, Instruction &ins);
/* Same, even for instructions that were already seen */
void updateSuccessors(uint32_t pc, Instruction &ins);
private:

View File

@ -52,12 +52,46 @@ Instruction::Instruction(uint16_t opcode):
{
}
//---
// Dynamic claims
//---
bool Claim::intersects(Claim const &other) const
{
/* Compute the actual intersection */
uint32_t inter_start = std::max(this->address, other.address);
uint32_t inter_end = std::min(this->address + this->size,
other.address + other.size);
return inter_start < inter_end;
}
std::string Claim::str() const
{
std::string details = format(" (claim %08x:%d)", address, size);
switch(type) {
case Claim::Function:
return format("function %08x", owner) + details;
case Claim::FunctionAuxiliary:
return std::string("auxiliary data") + details;
case Claim::Data:
return std::string("data") + details;
case Claim::Zero:
return std::string("zero region") + details;
case Claim::Special:
return format("special region %08x", address) + details;
default:
return format("<type %d>", type) + details;
}
}
//---
// Storage for disassembled data
//---
Disassembly::Disassembly(VirtualSpace &_vspace):
vspace {_vspace}, instructions {}, functions {}
vspace {_vspace}, instructions {}, functions {}, claims {}
{
}
@ -107,6 +141,55 @@ Function *Disassembly::getFunctionAt(uint32_t pc)
return &it->second;
}
Claim const *Disassembly::findClaimConflict(uint32_t address, int size)
{
Claim fake_claim = {
.address = address,
.size = (uint16_t)size,
.type = 0,
.owner = 0,
};
/* Find the first claim whose start is [> address] */
auto it = this->claims.upper_bound(fake_claim);
/* Backtrack to find the last claim whose start is [<= address] */
if(it != this->claims.begin())
it--;
while(it != this->claims.end()) {
/* We completely passed address+size, no conflict found */
if(it->address >= address + size)
return nullptr;
/* There is an intersection */
if(it->intersects(fake_claim))
return &*it;
it++;
}
return nullptr;
}
Claim const *Disassembly::getClaimAt(uint32_t address)
{
return findClaimConflict(address, 1);
}
bool Disassembly::addExclusiveClaim(Claim const &c)
{
Claim const *conflict = this->findClaimConflict(c.address, c.size);
if(conflict) {
FxOS_log(ERR, "exclusive claim for %s conflicts with %s",
c.str(), conflict->str());
return false;
}
this->claims.insert(c);
return true;
}
//---
// DisassemblyPass
//---

View File

@ -30,22 +30,27 @@ static void show_vspace(std::string name, VirtualSpace &s, Session &session)
{
bool is_current = (&s == session.current_space);
int total_claim_size = 0;
for(Claim const &c: s.disasm.claims)
total_claim_size += c.size;
if(is_current) fmt::print("* ");
fmt::print(theme(11), "{}\n", name);
if(s.bindings.size() == 0) {
fmt::print(" (no bindings)\n");
return;
}
fmt::print(" Symbol table: {} symbols\n",
s.symbols.symbols.size());
fmt::print(" Main disassembly: {} instructions\n",
s.disasm.instructions.size());
fmt::print(" Functions: {}\n",
s.disasm.functions.size());
fmt::print(" Claims: {} (totalling {} bytes)\n",
s.disasm.claims.size(), total_claim_size);
fmt::print(" Region--Start---------End---------File--------------------\n");
fmt::print(" Region--Start---------End---------File------------------\n");
if(s.bindings.size() == 0) {
fmt::print(" (no bindings)\n");
return;
}
for(auto &b: s.bindings) {
MemoryRegion const *ref = MemoryRegion::region_for(b.region);
fmt::print(" {:<7s} 0x{:08x} .. 0x{:08x}", (ref ? ref->name : ""),