add infrastructure for function passes (WIP)
This commit is contained in:
parent
0028a3ad45
commit
45888eb01c
|
@ -18,6 +18,7 @@
|
|||
#include <fxos/lang.h>
|
||||
#include <fxos/semantics.h>
|
||||
#include <fxos/util/Buffer.h>
|
||||
#include <fxos/util/Queue.h>
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
|
@ -34,6 +35,9 @@ void register_instruction(AsmInstruction const &inst);
|
|||
/* Lex and register an assembly instruction table. */
|
||||
int load_instructions(Buffer const &file);
|
||||
|
||||
//---
|
||||
// Dynamic information on instructions
|
||||
//---
|
||||
|
||||
/* An argument for a disassembled instruction. */
|
||||
struct Argument
|
||||
|
@ -98,6 +102,18 @@ struct Instruction
|
|||
uint32_t jmptarget;
|
||||
};
|
||||
|
||||
//---
|
||||
// Dynamic information on functions
|
||||
//---
|
||||
|
||||
struct Function
|
||||
{
|
||||
uint32_t address;
|
||||
|
||||
/* List of subfunctions called. TODO: Not yet populated by anyone */
|
||||
std::vector<uint32_t> callTargets;
|
||||
};
|
||||
|
||||
/* Disassembly interface that automatically loads code from a target */
|
||||
struct Disassembly
|
||||
{
|
||||
|
@ -113,8 +129,14 @@ struct Disassembly
|
|||
loaded and initialized if it had not been read before. */
|
||||
Instruction &readins(uint32_t pc);
|
||||
|
||||
/* Find a function by address; returns nullptr if not yet defined */
|
||||
Function *getFunctionAt(uint32_t pc);
|
||||
|
||||
/* For other access patterns */
|
||||
std::map<uint32_t, Instruction> instructions;
|
||||
/* List of functions being documented */
|
||||
std::map<uint32_t, Function> functions;
|
||||
|
||||
/* Underlying target */
|
||||
VirtualSpace &space;
|
||||
};
|
||||
|
@ -126,49 +148,61 @@ struct Disassembly
|
|||
class DisassemblyPass
|
||||
{
|
||||
public:
|
||||
DisassemblyPass(Disassembly &disasm, std::string name="");
|
||||
|
||||
/* Analyze a single instruction, probably updating the annotations and
|
||||
the state of the pass itself. */
|
||||
virtual bool analyze(uint32_t pc, Instruction &inst) = 0;
|
||||
|
||||
/* Run the pass from the given entry point */
|
||||
virtual bool run(uint32_t entry_pc);
|
||||
|
||||
protected:
|
||||
/* Add an instruction to the queue to analyze next */
|
||||
void enqueue(uint32_t pc);
|
||||
/* Add the next loaded instruction in address space */
|
||||
void enqueue_next(uint32_t pc);
|
||||
/* Enqueue the unseen successors of this instruction */
|
||||
void enqueue_unseen_successors(uint32_t pc, Instruction &inst);
|
||||
/* Enqueue all the success of this instruction */
|
||||
void enqueue_all_successors(uint32_t pc, Instruction &inst);
|
||||
DisassemblyPass(Disassembly &disasm);
|
||||
|
||||
/* Underlying disassembly */
|
||||
Disassembly &m_disasm;
|
||||
};
|
||||
|
||||
/* A disassembly pass that observes each function independently */
|
||||
class FunctionPass: public DisassemblyPass
|
||||
{
|
||||
public:
|
||||
FunctionPass(Disassembly &disasm);
|
||||
|
||||
/* Analyze the whole disassembly */
|
||||
bool analyzeAllFunctions();
|
||||
|
||||
/* Analyze a function */
|
||||
virtual bool analyzeFunction(Function &func) = 0;
|
||||
bool analyzeFunction(uint32_t pc);
|
||||
|
||||
/* Analyze a function and its subfunctions, recursively */
|
||||
bool analyzeFunctionRecursively(Function &func);
|
||||
bool analyzeFunctionRecursively(uint32_t pc);
|
||||
|
||||
/* Enqueue subfunctions of function (this is done automatically by
|
||||
analyzeFunctionRecursively()) */
|
||||
void enqueueSubfunctions(Function &func);
|
||||
/* Same, even for functions that were already seen */
|
||||
void updateSubfunctions(Function &func);
|
||||
|
||||
private:
|
||||
/* Blocks to visit next, ordered for uniqueness */
|
||||
std::set<uint32_t> m_next;
|
||||
std::priority_queue<uint32_t> m_queue;
|
||||
|
||||
/* Visited blocks */
|
||||
std::set<uint32_t> m_seen;
|
||||
|
||||
/* Name of pass */
|
||||
std::string m_name;
|
||||
Queue<uint32_t> m_queue;
|
||||
};
|
||||
|
||||
/* A disassembly pass that observes each instruction independently */
|
||||
class InstructionDisassemblyPass: public DisassemblyPass
|
||||
class InstructionPass: public DisassemblyPass
|
||||
{
|
||||
public:
|
||||
InstructionDisassemblyPass(Disassembly &disasm, std::string name="");
|
||||
InstructionPass(Disassembly &disasm);
|
||||
|
||||
/* Runs the pass from the first instruction currently loaded, all the
|
||||
way down to the bottom, as if always using enqueue_next(). */
|
||||
virtual bool run();
|
||||
/* Analyze the whole disassembly in increasing address order */
|
||||
bool analyzeAllInstructions();
|
||||
|
||||
/* Analyze a single instruction */
|
||||
virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0;
|
||||
|
||||
/* Analyze a function by following its CFG */
|
||||
bool analyzeFunction(uint32_t pc);
|
||||
|
||||
/* Enqueue successors (analyzeFunction() does this automatically) */
|
||||
void enqueueSuccessors(uint32_t pc, Instruction &ins);
|
||||
/* Same, even for instructions that were already seen */
|
||||
void updateSuccessors(uint32_t pc, Instruction &ins);
|
||||
|
||||
private:
|
||||
Queue<uint32_t> m_queue;
|
||||
};
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// 1100101 |_ mov #0, r4 __ //
|
||||
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
||||
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
||||
// |_ base# + offset |_| /_\_\___/__/ //
|
||||
//---------------------------------------------------------------------------//
|
||||
// fxos/util/Queue: Simple queue that handles recursivity
|
||||
//---
|
||||
|
||||
#ifndef FXOS_UTIL_QUEUE_H
|
||||
#define FXOS_UTIL_QUEUE_H
|
||||
|
||||
#include <queue>
|
||||
#include <set>
|
||||
|
||||
template<typename T>
|
||||
struct Queue
|
||||
{
|
||||
Queue();
|
||||
|
||||
bool empty() const {
|
||||
return pending.empty();
|
||||
}
|
||||
T &pop() {
|
||||
T &object = pending.front();
|
||||
pending.pop();
|
||||
return object;
|
||||
}
|
||||
void clear() {
|
||||
pending.clear();
|
||||
seen.clear();
|
||||
}
|
||||
|
||||
/* Enqueue an object to visit later (if not already visited) */
|
||||
void enqueue(T object) {
|
||||
if(!seen.count(object)) {
|
||||
pending.push(object);
|
||||
seen.insert(object);
|
||||
}
|
||||
}
|
||||
/* Enqueue an object even for later update, regardless of loops */
|
||||
void update(T object) {
|
||||
pending.push(object);
|
||||
seen.insert(object);
|
||||
}
|
||||
|
||||
/* Queue of objects to visit next */
|
||||
std::queue<T> pending;
|
||||
/* Objects already visited or currently pending */
|
||||
std::set<T> seen;
|
||||
};
|
||||
|
||||
#endif /* FXOS_UTIL_QUEUE_H */
|
|
@ -57,7 +57,7 @@ Instruction::Instruction(uint16_t opcode):
|
|||
//---
|
||||
|
||||
Disassembly::Disassembly(VirtualSpace &_space):
|
||||
instructions {}, space {_space}
|
||||
instructions {}, functions {}, space {_space}
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -102,86 +102,139 @@ Instruction &Disassembly::readins(uint32_t pc)
|
|||
}
|
||||
}
|
||||
|
||||
Function *Disassembly::getFunctionAt(uint32_t pc)
|
||||
{
|
||||
auto it = this->functions.find(pc);
|
||||
|
||||
if(it == this->functions.end())
|
||||
return nullptr;
|
||||
else
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
//---
|
||||
// Base pass
|
||||
// DisassemblyPass
|
||||
//---
|
||||
|
||||
DisassemblyPass::DisassemblyPass(Disassembly &disasm, std::string name):
|
||||
m_disasm {disasm}, m_name {name}
|
||||
DisassemblyPass::DisassemblyPass(Disassembly &disasm):
|
||||
m_disasm {disasm}
|
||||
{
|
||||
}
|
||||
|
||||
void DisassemblyPass::enqueue(uint32_t pc)
|
||||
{
|
||||
if(m_next.count(pc))
|
||||
return;
|
||||
//---
|
||||
// FunctionPass
|
||||
//---
|
||||
|
||||
m_next.insert(pc);
|
||||
m_queue.push(pc);
|
||||
FunctionPass::FunctionPass(Disassembly &disasm):
|
||||
DisassemblyPass(disasm)
|
||||
{
|
||||
}
|
||||
|
||||
void DisassemblyPass::enqueue_next(uint32_t pc)
|
||||
bool FunctionPass::analyzeAllFunctions()
|
||||
{
|
||||
/* TODO: DisassemblyPass::enqueue_next is inefficient */
|
||||
do pc += 2;
|
||||
while(!m_disasm.hasins(pc));
|
||||
bool ok = true;
|
||||
|
||||
enqueue(pc);
|
||||
for(auto &pair: m_disasm.functions)
|
||||
ok &= this->analyzeFunction(pair.second);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, Instruction &i)
|
||||
bool FunctionPass::analyzeFunction(uint32_t pc)
|
||||
{
|
||||
if(!i.terminal && !i.jump) {
|
||||
if(!m_seen.count(pc + 2)) enqueue(pc + 2);
|
||||
}
|
||||
if(i.jump || i.condjump) {
|
||||
if(!m_seen.count(i.jmptarget)) enqueue(i.jmptarget);
|
||||
Function *func = m_disasm.getFunctionAt(pc);
|
||||
if(!func) {
|
||||
FxOS_log(ERR, "no function at %08x", pc);
|
||||
return false;
|
||||
}
|
||||
return this->analyzeFunction(*func);
|
||||
}
|
||||
|
||||
void DisassemblyPass::enqueue_all_successors(uint32_t pc, Instruction &i)
|
||||
bool FunctionPass::analyzeFunctionRecursively(Function &func)
|
||||
{
|
||||
return this->analyzeFunctionRecursively(func.address);
|
||||
}
|
||||
|
||||
bool FunctionPass::analyzeFunctionRecursively(uint32_t pc)
|
||||
{
|
||||
bool ok = true;
|
||||
m_queue.enqueue(pc);
|
||||
|
||||
while(!m_queue.empty()) {
|
||||
uint32_t pc = m_queue.pop();
|
||||
Function *next = m_disasm.getFunctionAt(pc);
|
||||
ok &= this->analyzeFunction(*next);
|
||||
this->enqueueSubfunctions(*next);
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
void FunctionPass::enqueueSubfunctions(Function &func)
|
||||
{
|
||||
for(uint32_t pc: func.callTargets)
|
||||
m_queue.enqueue(pc);
|
||||
}
|
||||
|
||||
void FunctionPass::updateSubfunctions(Function &func)
|
||||
{
|
||||
for(uint32_t pc: func.callTargets)
|
||||
m_queue.update(pc);
|
||||
}
|
||||
|
||||
//---
|
||||
// InstructionPass
|
||||
//---
|
||||
|
||||
InstructionPass::InstructionPass(Disassembly &disasm):
|
||||
DisassemblyPass(disasm)
|
||||
{
|
||||
}
|
||||
|
||||
bool InstructionPass::analyzeAllInstructions()
|
||||
{
|
||||
bool ok = true;
|
||||
|
||||
for(auto &pair: m_disasm.instructions)
|
||||
ok &= this->analyzeInstruction(pair.first, pair.second);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool InstructionPass::analyzeFunction(uint32_t pc)
|
||||
{
|
||||
bool ok = true;
|
||||
m_queue.enqueue(pc);
|
||||
|
||||
while(!m_queue.empty()) {
|
||||
uint32_t pc = m_queue.pop();
|
||||
if(!m_disasm.instructions.count(pc)) {
|
||||
FxOS_log(ERR, "no instruction at %08x", pc);
|
||||
continue;
|
||||
}
|
||||
|
||||
Instruction &i = m_disasm.instructions.at(pc);
|
||||
ok &= this->analyzeInstruction(pc, i);
|
||||
this->enqueueSuccessors(pc, i);
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
void InstructionPass::enqueueSuccessors(uint32_t pc, Instruction &i)
|
||||
{
|
||||
if(!i.terminal && !i.jump)
|
||||
enqueue(pc + 2);
|
||||
m_queue.enqueue(pc + 2);
|
||||
if(i.jump || i.condjump)
|
||||
enqueue(i.jmptarget);
|
||||
m_queue.enqueue(i.jmptarget);
|
||||
}
|
||||
|
||||
bool DisassemblyPass::run(uint32_t entry_pc)
|
||||
void InstructionPass::updateSuccessors(uint32_t pc, Instruction &i)
|
||||
{
|
||||
enqueue(entry_pc);
|
||||
|
||||
while(m_queue.size()) {
|
||||
uint32_t pc = m_queue.top();
|
||||
|
||||
m_queue.pop();
|
||||
m_next.erase(m_next.find(pc));
|
||||
|
||||
Instruction &ci = m_disasm.readins(pc);
|
||||
if(!analyze(pc, ci))
|
||||
return false;
|
||||
|
||||
m_seen.insert(pc);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//---
|
||||
// Base instruction-level pass
|
||||
//---
|
||||
|
||||
InstructionDisassemblyPass::InstructionDisassemblyPass(Disassembly &disasm,
|
||||
std::string name): DisassemblyPass(disasm, name)
|
||||
{
|
||||
}
|
||||
|
||||
bool InstructionDisassemblyPass::run()
|
||||
{
|
||||
for(auto &pair: m_disasm.instructions) {
|
||||
if(!analyze(pair.first, pair.second))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
if(!i.terminal && !i.jump)
|
||||
m_queue.update(pc + 2);
|
||||
if(i.jump || i.condjump)
|
||||
m_queue.update(i.jmptarget);
|
||||
}
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
14
shell/a.cpp
14
shell/a.cpp
|
@ -237,14 +237,20 @@ static void ad_disassemble_all(VirtualSpace &space,
|
|||
|
||||
if(pass == "cfg") {
|
||||
CfgPass p(space.disasm);
|
||||
for(uint32_t addr: addresses) {
|
||||
printr("[cfg] Disassembling %08x...", addr);
|
||||
if(!p.run(addr)) {
|
||||
FxOS_log(ERR, "while processing %08x", addr);
|
||||
|
||||
/* We collect subfunction addresses while running the pass */
|
||||
for(int i = 0; i < (int)addresses.size(); i++) {
|
||||
uint32_t entry = addresses[i];
|
||||
printr("[cfg %d/%zu] Disassembling %08x...",
|
||||
i+1, addresses.size(), entry);
|
||||
if(!p.run(entry)) {
|
||||
FxOS_log(ERR, "while processing %08x", entry);
|
||||
errors++;
|
||||
if(!force) break;
|
||||
}
|
||||
else successes++;
|
||||
|
||||
/* TODO: Get subfunction addresses here */
|
||||
}
|
||||
}
|
||||
else if(pass == "pcrel") {
|
||||
|
|
Loading…
Reference in New Issue