fxos/lib/disassembly.cpp

380 lines
9.0 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/disassembly.h>
#include <fxos/vspace.h>
#include <fxos/util/log.h>
#include <optional>
#include <array>
namespace FxOS {
/* Instruction map */
static std::array<std::optional<AsmInstruction>, 65536> insmap;
void register_instruction(AsmInstruction const &ins)
{
uint16_t opcode = ins.opcode;
if(insmap[opcode])
FxOS_log(ERR, "opcode collision between a %s and a %s at %04x",
insmap[opcode]->mnemonic, ins.mnemonic, opcode);
else
insmap[opcode] = ins;
}
//---
// Concrete (instantiated) arguments and instructions
//---
Argument::Argument()
{
location = RelConstDomain().bottom();
value = location;
syscall_id = -1;
}
Instruction::Instruction(AsmInstruction const *inst):
inst {inst}, args {}, opcode {inst->opcode}, leader {false},
delayslot {false}, terminal {false}, jump {false}, condjump {false},
jmptarget {0xffffffff}
{
}
Instruction::Instruction(uint16_t opcode):
inst {nullptr}, args {}, opcode {opcode}, leader {false}, delayslot {false},
terminal {false}, jump {false}, condjump {false}, jmptarget {0xffffffff}
{
}
//---
// Function information
//---
Function::Function(uint32_t pc): address {pc}
{
}
//---
// Dynamic claims
//---
bool Claim::intersects(Claim const &other) const
{
/* Compute the actual intersection */
uint32_t inter_start = std::max(this->address, other.address);
uint32_t inter_end
= std::min(this->address + this->size, other.address + other.size);
return inter_start < inter_end;
}
bool Claim::operator==(Claim const &other) const
{
return this->address == other.address && this->size == other.size
&& this->type == other.type && this->owner == other.owner;
}
std::string Claim::str() const
{
std::string details = format(" (claim 0x%08x:%d)", address, size);
switch(type) {
case Claim::Function:
return format("function at 0x%08x", owner) + details;
case Claim::FunctionAuxiliary:
return std::string("auxiliary data") + details;
case Claim::Data:
return std::string("data") + details;
case Claim::Zero:
return std::string("zero region") + details;
case Claim::Special:
return format("special region at 0x%08x", address) + details;
default:
return format("<type %d>", type) + details;
}
}
//---
// Storage for disassembled data
//---
Disassembly::Disassembly(VirtualSpace &_vspace):
vspace {_vspace}, instructions {}, functions {}, claims {}
{
}
bool Disassembly::hasInstructionAt(uint32_t pc)
{
return this->instructions.count(pc) > 0;
}
Instruction *Disassembly::getInstructionAt(uint32_t pc, bool allowDiscovery)
{
if(pc & 1) {
FxOS_log(ERR, "reading instruction for disassembly at 0x%08x", pc);
pc &= -2;
}
if(this->hasInstructionAt(pc)) {
return &this->instructions.at(pc);
}
else if(allowDiscovery) {
uint16_t opcode = this->vspace.read_u16(pc);
Instruction i(opcode);
if(insmap[opcode])
i = Instruction(&*insmap[opcode]);
this->instructions.emplace(pc, i);
return &this->instructions.at(pc);
}
else {
FxOS_log(ERR, "reading non-existing instruction at 0x%08x", pc);
return nullptr;
}
}
bool Disassembly::hasFunctionAt(uint32_t pc)
{
return this->functions.count(pc) > 0;
}
Function *Disassembly::getFunctionAt(uint32_t pc)
{
auto it = this->functions.find(pc);
if(it == this->functions.end())
return nullptr;
else
return &it->second;
}
Function *Disassembly::getOrCreateFunctionAt(uint32_t pc)
{
if(!this->hasFunctionAt(pc)) {
Function f(pc);
this->functions.insert({pc, f});
}
return this->getFunctionAt(pc);
}
std::vector<Claim const *> Disassembly::findClaimConflicts(
uint32_t address, int size, int max)
{
Claim fake_claim = {
.address = address,
.size = (uint16_t)size,
.type = 0,
.owner = 0,
};
std::vector<Claim const *> conflicts;
/* Find the first claim whose start is [> address] */
auto it = this->claims.upper_bound(fake_claim);
/* Backtrack to find the last claim whose start is [<= address] */
if(it != this->claims.begin())
it--;
while(it != this->claims.end()
&& (max < 0 || conflicts.size() < (size_t)max)) {
/* We completely passed address+size, no conflict found */
if(it->address >= address + size)
break;
/* There is an intersection */
if(it->intersects(fake_claim))
conflicts.push_back(&*it);
it++;
}
return conflicts;
}
Claim const *Disassembly::findClaimConflict(uint32_t address, int size)
{
auto claims = findClaimConflicts(address, size, 1);
return claims.size() > 0 ? claims[0] : nullptr;
}
Claim const *Disassembly::getClaimAt(uint32_t address)
{
return findClaimConflict(address, 1);
}
bool Disassembly::addExclusiveClaim(Claim const &c)
{
auto conflicts = this->findClaimConflicts(c.address, c.size);
bool exclusive = true;
for(auto conflict: conflicts) {
/* Allow declaring the same claim twice */
if(*conflict == c)
continue;
FxOS_log(ERR, "exclusive claim for %s conflicts with %s", c.str(),
conflicts[0]->str());
exclusive = false;
}
if(exclusive)
this->claims.insert(c);
return exclusive;
}
std::vector<Claim const *> Disassembly::findClaimsOwnedBy(uint32_t address)
{
std::vector<Claim const *> claims;
/* Since we don't order by owner we have to tank the linear search */
for(auto const &c: this->claims) {
if(c.owner == address)
claims.push_back(&c);
}
return claims;
}
//---
// DisassemblyPass
//---
DisassemblyPass::DisassemblyPass(Disassembly &disasm): m_disasm {disasm}
{
}
//---
// FunctionPass
//---
FunctionPass::FunctionPass(Disassembly &disasm): DisassemblyPass(disasm)
{
}
bool FunctionPass::analyzeAllFunctions()
{
bool ok = true;
for(auto &pair: m_disasm.functions)
ok &= this->analyzeFunction(pair.second);
return ok;
}
bool FunctionPass::analyzeFunction(uint32_t pc)
{
Function *func = m_disasm.getFunctionAt(pc);
if(!func) {
FxOS_log(ERR, "no function at 0x%08x", pc);
return false;
}
return this->analyzeFunction(*func);
}
bool FunctionPass::analyzeFunctionRecursively(Function &func)
{
return this->analyzeFunctionRecursively(func.address);
}
bool FunctionPass::analyzeFunctionRecursively(uint32_t pc)
{
bool ok = true;
m_queue.enqueue(pc);
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
Function *next = m_disasm.getFunctionAt(pc);
if(this->analyzeFunction(*next))
this->enqueueSubfunctions(*next);
else
ok = false;
}
return ok;
}
void FunctionPass::enqueueSubfunctions(Function &func)
{
for(uint32_t pc: func.callTargets)
m_queue.enqueue(pc);
}
void FunctionPass::updateSubfunctions(Function &func)
{
for(uint32_t pc: func.callTargets)
m_queue.update(pc);
}
//---
// InstructionPass
//---
InstructionPass::InstructionPass(Disassembly &disasm):
FunctionPass(disasm), m_allowDiscovery {false}
{
}
void InstructionPass::setAllowDiscovery(bool allowDiscovery)
{
m_allowDiscovery = allowDiscovery;
}
bool InstructionPass::analyzeAllInstructions()
{
bool ok = true;
for(auto &pair: m_disasm.instructions)
ok &= this->analyzeInstruction(pair.first, pair.second);
return ok;
}
bool InstructionPass::analyzeFunction(Function &func)
{
/* We don't have any function-specific information to pass yet, so we can
fall back to the anonymous version */
return this->analyzeAnonymousFunction(func.address);
}
bool InstructionPass::analyzeAnonymousFunction(uint32_t pc)
{
bool ok = true;
m_queue.enqueue(pc);
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
Instruction *i = m_disasm.getInstructionAt(pc, m_allowDiscovery);
if(i != nullptr && this->analyzeInstruction(pc, *i))
this->enqueueSuccessors(pc, *i);
else
ok = false;
}
return ok;
}
void InstructionPass::enqueueSuccessors(uint32_t pc, Instruction &i)
{
if(!i.terminal && !i.jump)
m_queue.enqueue(pc + 2);
if(i.jump || i.condjump)
m_queue.enqueue(i.jmptarget);
}
void InstructionPass::updateSuccessors(uint32_t pc, Instruction &i)
{
if(!i.terminal && !i.jump)
m_queue.update(pc + 2);
if(i.jump || i.condjump)
m_queue.update(i.jmptarget);
}
} /* namespace FxOS */