add infrastructure for function passes (WIP)

This commit is contained in:
Lephenixnoir 2022-04-04 22:53:05 +01:00
parent 0028a3ad45
commit 45888eb01c
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
4 changed files with 240 additions and 94 deletions

View File

@ -18,6 +18,7 @@
#include <fxos/lang.h>
#include <fxos/semantics.h>
#include <fxos/util/Buffer.h>
#include <fxos/util/Queue.h>
#include <set>
#include <map>
@ -34,6 +35,9 @@ void register_instruction(AsmInstruction const &inst);
/* Lex and register an assembly instruction table. */
int load_instructions(Buffer const &file);
//---
// Dynamic information on instructions
//---
/* An argument for a disassembled instruction. */
struct Argument
@ -98,6 +102,18 @@ struct Instruction
uint32_t jmptarget;
};
//---
// Dynamic information on functions
//---
struct Function
{
uint32_t address;
/* List of subfunctions called. TODO: Not yet populated by anyone */
std::vector<uint32_t> callTargets;
};
/* Disassembly interface that automatically loads code from a target */
struct Disassembly
{
@ -113,8 +129,14 @@ struct Disassembly
loaded and initialized if it had not been read before. */
Instruction &readins(uint32_t pc);
/* Find a function by address; returns nullptr if not yet defined */
Function *getFunctionAt(uint32_t pc);
/* For other access patterns */
std::map<uint32_t, Instruction> instructions;
/* List of functions being documented */
std::map<uint32_t, Function> functions;
/* Underlying target */
VirtualSpace &space;
};
@ -126,49 +148,61 @@ struct Disassembly
class DisassemblyPass
{
public:
DisassemblyPass(Disassembly &disasm, std::string name="");
/* Analyze a single instruction, probably updating the annotations and
the state of the pass itself. */
virtual bool analyze(uint32_t pc, Instruction &inst) = 0;
/* Run the pass from the given entry point */
virtual bool run(uint32_t entry_pc);
protected:
/* Add an instruction to the queue to analyze next */
void enqueue(uint32_t pc);
/* Add the next loaded instruction in address space */
void enqueue_next(uint32_t pc);
/* Enqueue the unseen successors of this instruction */
void enqueue_unseen_successors(uint32_t pc, Instruction &inst);
/* Enqueue all the success of this instruction */
void enqueue_all_successors(uint32_t pc, Instruction &inst);
DisassemblyPass(Disassembly &disasm);
/* Underlying disassembly */
Disassembly &m_disasm;
};
/* A disassembly pass that observes each function independently */
class FunctionPass: public DisassemblyPass
{
public:
FunctionPass(Disassembly &disasm);
/* Analyze the whole disassembly */
bool analyzeAllFunctions();
/* Analyze a function */
virtual bool analyzeFunction(Function &func) = 0;
bool analyzeFunction(uint32_t pc);
/* Analyze a function and its subfunctions, recursively */
bool analyzeFunctionRecursively(Function &func);
bool analyzeFunctionRecursively(uint32_t pc);
/* Enqueue subfunctions of function (this is done automatically by
analyzeFunctionRecursively()) */
void enqueueSubfunctions(Function &func);
/* Same, even for functions that were already seen */
void updateSubfunctions(Function &func);
private:
/* Blocks to visit next, ordered for uniqueness */
std::set<uint32_t> m_next;
std::priority_queue<uint32_t> m_queue;
/* Visited blocks */
std::set<uint32_t> m_seen;
/* Name of pass */
std::string m_name;
Queue<uint32_t> m_queue;
};
/* A disassembly pass that observes each instruction independently */
class InstructionDisassemblyPass: public DisassemblyPass
class InstructionPass: public DisassemblyPass
{
public:
InstructionDisassemblyPass(Disassembly &disasm, std::string name="");
InstructionPass(Disassembly &disasm);
/* Runs the pass from the first instruction currently loaded, all the
way down to the bottom, as if always using enqueue_next(). */
virtual bool run();
/* Analyze the whole disassembly in increasing address order */
bool analyzeAllInstructions();
/* Analyze a single instruction */
virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0;
/* Analyze a function by following its CFG */
bool analyzeFunction(uint32_t pc);
/* Enqueue successors (analyzeFunction() does this automatically) */
void enqueueSuccessors(uint32_t pc, Instruction &ins);
/* Same, even for instructions that were already seen */
void updateSuccessors(uint32_t pc, Instruction &ins);
private:
Queue<uint32_t> m_queue;
};
} /* namespace FxOS */

53
include/fxos/util/Queue.h Normal file
View File

@ -0,0 +1,53 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/util/Queue: Simple queue that handles recursivity
//---
#ifndef FXOS_UTIL_QUEUE_H
#define FXOS_UTIL_QUEUE_H
#include <queue>
#include <set>
template<typename T>
struct Queue
{
Queue();
bool empty() const {
return pending.empty();
}
T &pop() {
T &object = pending.front();
pending.pop();
return object;
}
void clear() {
pending.clear();
seen.clear();
}
/* Enqueue an object to visit later (if not already visited) */
void enqueue(T object) {
if(!seen.count(object)) {
pending.push(object);
seen.insert(object);
}
}
/* Enqueue an object even for later update, regardless of loops */
void update(T object) {
pending.push(object);
seen.insert(object);
}
/* Queue of objects to visit next */
std::queue<T> pending;
/* Objects already visited or currently pending */
std::set<T> seen;
};
#endif /* FXOS_UTIL_QUEUE_H */

View File

@ -57,7 +57,7 @@ Instruction::Instruction(uint16_t opcode):
//---
Disassembly::Disassembly(VirtualSpace &_space):
instructions {}, space {_space}
instructions {}, functions {}, space {_space}
{
}
@ -102,86 +102,139 @@ Instruction &Disassembly::readins(uint32_t pc)
}
}
Function *Disassembly::getFunctionAt(uint32_t pc)
{
auto it = this->functions.find(pc);
if(it == this->functions.end())
return nullptr;
else
return &it->second;
}
//---
// Base pass
// DisassemblyPass
//---
DisassemblyPass::DisassemblyPass(Disassembly &disasm, std::string name):
m_disasm {disasm}, m_name {name}
DisassemblyPass::DisassemblyPass(Disassembly &disasm):
m_disasm {disasm}
{
}
void DisassemblyPass::enqueue(uint32_t pc)
{
if(m_next.count(pc))
return;
//---
// FunctionPass
//---
m_next.insert(pc);
m_queue.push(pc);
FunctionPass::FunctionPass(Disassembly &disasm):
DisassemblyPass(disasm)
{
}
void DisassemblyPass::enqueue_next(uint32_t pc)
bool FunctionPass::analyzeAllFunctions()
{
/* TODO: DisassemblyPass::enqueue_next is inefficient */
do pc += 2;
while(!m_disasm.hasins(pc));
bool ok = true;
enqueue(pc);
for(auto &pair: m_disasm.functions)
ok &= this->analyzeFunction(pair.second);
return ok;
}
void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, Instruction &i)
bool FunctionPass::analyzeFunction(uint32_t pc)
{
if(!i.terminal && !i.jump) {
if(!m_seen.count(pc + 2)) enqueue(pc + 2);
}
if(i.jump || i.condjump) {
if(!m_seen.count(i.jmptarget)) enqueue(i.jmptarget);
Function *func = m_disasm.getFunctionAt(pc);
if(!func) {
FxOS_log(ERR, "no function at %08x", pc);
return false;
}
return this->analyzeFunction(*func);
}
void DisassemblyPass::enqueue_all_successors(uint32_t pc, Instruction &i)
bool FunctionPass::analyzeFunctionRecursively(Function &func)
{
return this->analyzeFunctionRecursively(func.address);
}
bool FunctionPass::analyzeFunctionRecursively(uint32_t pc)
{
bool ok = true;
m_queue.enqueue(pc);
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
Function *next = m_disasm.getFunctionAt(pc);
ok &= this->analyzeFunction(*next);
this->enqueueSubfunctions(*next);
}
return ok;
}
void FunctionPass::enqueueSubfunctions(Function &func)
{
for(uint32_t pc: func.callTargets)
m_queue.enqueue(pc);
}
void FunctionPass::updateSubfunctions(Function &func)
{
for(uint32_t pc: func.callTargets)
m_queue.update(pc);
}
//---
// InstructionPass
//---
InstructionPass::InstructionPass(Disassembly &disasm):
DisassemblyPass(disasm)
{
}
bool InstructionPass::analyzeAllInstructions()
{
bool ok = true;
for(auto &pair: m_disasm.instructions)
ok &= this->analyzeInstruction(pair.first, pair.second);
return ok;
}
bool InstructionPass::analyzeFunction(uint32_t pc)
{
bool ok = true;
m_queue.enqueue(pc);
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
if(!m_disasm.instructions.count(pc)) {
FxOS_log(ERR, "no instruction at %08x", pc);
continue;
}
Instruction &i = m_disasm.instructions.at(pc);
ok &= this->analyzeInstruction(pc, i);
this->enqueueSuccessors(pc, i);
}
return ok;
}
void InstructionPass::enqueueSuccessors(uint32_t pc, Instruction &i)
{
if(!i.terminal && !i.jump)
enqueue(pc + 2);
m_queue.enqueue(pc + 2);
if(i.jump || i.condjump)
enqueue(i.jmptarget);
m_queue.enqueue(i.jmptarget);
}
bool DisassemblyPass::run(uint32_t entry_pc)
void InstructionPass::updateSuccessors(uint32_t pc, Instruction &i)
{
enqueue(entry_pc);
while(m_queue.size()) {
uint32_t pc = m_queue.top();
m_queue.pop();
m_next.erase(m_next.find(pc));
Instruction &ci = m_disasm.readins(pc);
if(!analyze(pc, ci))
return false;
m_seen.insert(pc);
}
return true;
}
//---
// Base instruction-level pass
//---
InstructionDisassemblyPass::InstructionDisassemblyPass(Disassembly &disasm,
std::string name): DisassemblyPass(disasm, name)
{
}
bool InstructionDisassemblyPass::run()
{
for(auto &pair: m_disasm.instructions) {
if(!analyze(pair.first, pair.second))
return false;
}
return true;
if(!i.terminal && !i.jump)
m_queue.update(pc + 2);
if(i.jump || i.condjump)
m_queue.update(i.jmptarget);
}
} /* namespace FxOS */

View File

@ -237,14 +237,20 @@ static void ad_disassemble_all(VirtualSpace &space,
if(pass == "cfg") {
CfgPass p(space.disasm);
for(uint32_t addr: addresses) {
printr("[cfg] Disassembling %08x...", addr);
if(!p.run(addr)) {
FxOS_log(ERR, "while processing %08x", addr);
/* We collect subfunction addresses while running the pass */
for(int i = 0; i < (int)addresses.size(); i++) {
uint32_t entry = addresses[i];
printr("[cfg %d/%zu] Disassembling %08x...",
i+1, addresses.size(), entry);
if(!p.run(entry)) {
FxOS_log(ERR, "while processing %08x", entry);
errors++;
if(!force) break;
}
else successes++;
/* TODO: Get subfunction addresses here */
}
}
else if(pass == "pcrel") {