_ads: register functions during discovery

This commit is contained in:
Lephenixnoir 2022-04-05 14:02:06 +01:00
parent 613e5b07f4
commit 6ae1a88bf7
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
12 changed files with 157 additions and 123 deletions

View File

@ -114,31 +114,42 @@ struct Function
std::vector<uint32_t> callTargets;
};
//---
// Storage for disassembled data
//---
/* Disassembly interface that automatically loads code from a target */
struct Disassembly
{
Disassembly(VirtualSpace &space);
Disassembly(VirtualSpace &vspace);
/* Check whether an instruction has been visited so far */
bool hasins(uint32_t pc);
/* Get the minimum and maximum loaded instruction addresses */
uint32_t minpc();
uint32_t maxpc();
VirtualSpace &vspace;
/* Get the storage to any concrete instruction. The instruction will be
loaded and initialized if it had not been read before. */
Instruction &readins(uint32_t pc);
// Instruction information
std::map<uint32_t, Instruction> instructions;
/* Check whether an instruction is loaded at PC */
bool hasInstructionAt(uint32_t pc);
/* Find an instruction by address. If the instruction is not loaded,
returns nullptr, unless [allowDiscovery] is set, in which case it's
loaded normally. */
Instruction *getInstructionAt(uint32_t pc, bool allowDiscovery=false);
// Function information
std::map<uint32_t, Function> functions;
/* Check whether a function is defined at PC */
bool hasFunctionAt(uint32_t pc);
/* Find a function by address; returns nullptr if not yet defined */
Function *getFunctionAt(uint32_t pc);
/* For other access patterns */
std::map<uint32_t, Instruction> instructions;
/* List of functions being documented */
std::map<uint32_t, Function> functions;
/* Underlying target */
VirtualSpace &space;
// TODO: We don't want to deal with instructions loaded without a minimum
// amount of analysis; can we tie instruction loading to some analysis?
};
//---
@ -182,7 +193,7 @@ private:
};
/* A disassembly pass that observes each instruction independently */
class InstructionPass: public DisassemblyPass
class InstructionPass: public FunctionPass
{
public:
InstructionPass(Disassembly &disasm);
@ -199,7 +210,11 @@ public:
virtual bool analyzeInstruction(uint32_t pc, Instruction &ins) = 0;
/* Analyze a function by following its CFG */
bool analyzeFunction(uint32_t pc);
using FunctionPass::analyzeFunction;
bool analyzeFunction(Function &func) override;
/* Analyze an anonymous function; just assume one starts at PC */
bool analyzeAnonymousFunction(uint32_t pc);
/* Enqueue successors (analyzeFunction() does this automatically) */
void enqueueSuccessors(uint32_t pc, Instruction &ins);

View File

@ -53,6 +53,10 @@ class CfgPass: public InstructionPass
public:
CfgPass(Disassembly &disasm);
bool analyzeInstruction(uint32_t pc, Instruction &inst) override;
/* Explore a new function at the specified address. This is a wrapper
around [analyzeFunction()] that creates the function. */
bool exploreFunction(uint32_t pc);
};
} /* namespace FxOS */

View File

@ -34,6 +34,10 @@ struct Timer
void start();
void stop();
/* Reset the total time; reset and start */
void reset();
void restart();
/* String representation of time */
std::string format_time() const;

View File

@ -53,53 +53,48 @@ Instruction::Instruction(uint16_t opcode):
}
//---
// Disassembler interface
// Storage for disassembled data
//---
Disassembly::Disassembly(VirtualSpace &_space):
instructions {}, functions {}, space {_space}
Disassembly::Disassembly(VirtualSpace &_vspace):
vspace {_vspace}, instructions {}, functions {}
{
}
bool Disassembly::hasins(uint32_t pc)
bool Disassembly::hasInstructionAt(uint32_t pc)
{
return this->instructions.count(pc) > 0;
}
uint32_t Disassembly::minpc()
{
if(this->instructions.empty())
return 0xffffffff;
return this->instructions.cbegin()->first;
}
uint32_t Disassembly::maxpc()
{
if(this->instructions.empty())
return 0xffffffff;
return this->instructions.crbegin()->first;
}
Instruction &Disassembly::readins(uint32_t pc)
Instruction *Disassembly::getInstructionAt(uint32_t pc, bool allowDiscovery)
{
if(pc & 1) {
FxOS_log(ERR, "reading instruction for disassembly at %08x", pc);
pc &= -2;
}
try {
return this->instructions.at(pc);
if(this->hasInstructionAt(pc)) {
return &this->instructions.at(pc);
}
catch(std::out_of_range &e) {
uint16_t opcode = this->space.read_u16(pc);
Instruction ci(opcode);
else if(allowDiscovery) {
uint16_t opcode = this->vspace.read_u16(pc);
Instruction i(opcode);
if(insmap[opcode])
ci = Instruction(&*insmap[opcode]);
i = Instruction(&*insmap[opcode]);
this->instructions.emplace(pc, ci);
return this->instructions.at(pc);
this->instructions.emplace(pc, i);
return &this->instructions.at(pc);
}
else {
FxOS_log(ERR, "reading non-existing instruction at %08x", pc);
return nullptr;
}
}
bool Disassembly::hasFunctionAt(uint32_t pc)
{
return this->functions.count(pc) > 0;
}
Function *Disassembly::getFunctionAt(uint32_t pc)
@ -188,7 +183,7 @@ void FunctionPass::updateSubfunctions(Function &func)
//---
InstructionPass::InstructionPass(Disassembly &disasm):
DisassemblyPass(disasm), m_allowDiscovery {false}
FunctionPass(disasm), m_allowDiscovery {false}
{
}
@ -207,28 +202,23 @@ bool InstructionPass::analyzeAllInstructions()
return ok;
}
bool InstructionPass::analyzeFunction(uint32_t pc)
bool InstructionPass::analyzeFunction(Function &func)
{
/* We don't have any function-specific information to pass yet, so we can
fall back to the anonymous version */
return this->analyzeAnonymousFunction(func.address);
}
bool InstructionPass::analyzeAnonymousFunction(uint32_t pc)
{
bool ok = true;
m_queue.enqueue(pc);
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
Instruction *i = nullptr;
Instruction *i = m_disasm.getInstructionAt(pc, m_allowDiscovery);
if(m_allowDiscovery) {
i = &m_disasm.readins(pc);
}
else {
if(!m_disasm.instructions.count(pc)) {
FxOS_log(ERR, "no instruction at %08x", pc);
continue;
}
i = &m_disasm.instructions.at(pc);
}
if(this->analyzeInstruction(pc, *i))
if(i != nullptr && this->analyzeInstruction(pc, *i))
this->enqueueSuccessors(pc, *i);
else ok = false;
}

View File

@ -43,7 +43,7 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i)
jmptarget = (pc+4) + args[0].disp;
/* Make the target of the jump a leader */
Instruction &target = m_disasm.readins(jmptarget);
Instruction &target = *m_disasm.getInstructionAt(jmptarget, true);
target.leader = true;
/* Check that it's not in a delay slot */
@ -64,7 +64,7 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i)
}
/* If it has a delay slot, create it at the next instruction */
else if(i.inst->isdelayed()) {
Instruction &slot = m_disasm.readins(pc+2);
Instruction &slot = *m_disasm.getInstructionAt(pc+2, true);
if(slot.leader)
throw std::logic_error(format("%08x is a leader and also a delay "
"slot - this is unsupported by fxos and will produce garbage "
@ -91,4 +91,15 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i)
return true;
}
bool CfgPass::exploreFunction(uint32_t pc)
{
if(!m_disasm.hasFunctionAt(pc)) {
// TODO: Have proper function creation methods in Disassembly
Function func = { .address=pc, .callTargets={} };
m_disasm.functions[pc] = func;
}
return this->analyzeFunction(pc);
}
} /* namespace FxOS */

View File

@ -31,7 +31,7 @@ bool PcrelPass::analyzeInstruction(uint32_t pc, Instruction &ci)
/* Also compute the value. This is sign-extended from 16-bit with
mov.w. There is no mov.b for this instruction. */
VirtualSpace &space = m_disasm.space;
VirtualSpace &space = m_disasm.vspace;
uint32_t v = -1;
if(i->opsize == 2 && space.covers(addr, 2)) {

View File

@ -20,10 +20,10 @@ PrintPass::PrintPass(Disassembly &disasm):
/* Default parameters: all 0 */
/* Use an OS observer to describe syscalls in header lines */
m_os = disasm.space.os_analysis();
m_os = disasm.vspace.os_analysis();
/* Use the symbol tables from the virtual space */
m_symtables.push_back(disasm.space.symbols);
m_symtables.push_back(disasm.vspace.symbols);
}
bool PrintPass::analyzeInstruction(uint32_t pc, Instruction &i)

View File

@ -36,6 +36,17 @@ void Timer::stop(void)
}
}
void Timer::reset(void)
{
this->time_ns = 0;
}
void Timer::restart(void)
{
this->reset();
this->start();
}
std::string Timer::format_time(uint64_t time_ns)
{
if(time_ns < 2000) return format("%lld ns", time_ns);

View File

@ -8,7 +8,6 @@
#include <fxos/util/log.h>
#include <cstdlib>
#include <cstring>
#include <iostream>
namespace FxOS {
@ -61,20 +60,22 @@ void logmsg(int level, char const *file, int line, char const *func,
message.pop_back();
}
printf("\x1b[K");
if(prefix) {
if(level == LOG_LEVEL_LOG)
std::cerr << "[" << file << ":" << line << "@" << func << "] ";
fprintf(stderr, "\e[30;1m[%s:%d@%s]\e[0m ", file, line, func);
if(level == LOG_LEVEL_WRN)
std::cerr << "warning: ";
fprintf(stderr, "warning: ");
if(level == LOG_LEVEL_ERR)
std::cerr << "\x1b[31;1merror:\x1b[0m ";
fprintf(stderr, "\x1b[31;1merror:\x1b[0m ");
}
else std::cerr << " ";
else fprintf(stderr, " ");
std::cerr << message;
fputs(message.c_str(), stderr);
if(endline) {
std::cerr << '\n';
fputc('\n', stderr);
lastlevel = -1;
}
else {

View File

@ -228,62 +228,58 @@ void _af4(Session &session, uint32_t value, std::vector<MemoryRegion> &regions)
static void ad_disassemble_all(VirtualSpace &space,
std::vector<uint32_t> const &addresses, bool force)
{
std::vector<std::string> passes = { "cfg", "pcrel", "syscall" };
int successes=0, errors=0;
Timer timer;
for(auto pass: passes) {
Timer timer;
timer.start();
/* Analyze the CFGs of all functions */
if(pass == "cfg") {
CfgPass p(space.disasm);
timer.start();
CfgPass cfg_pass(space.disasm);
/* We collect subfunction addresses while running the pass */
for(int i = 0; i < (int)addresses.size(); i++) {
uint32_t entry = addresses[i];
printr("[cfg %d/%zu] Disassembling %08x...",
i+1, addresses.size(), entry);
if(!p.analyzeFunction(entry)) {
FxOS_log(ERR, "while processing %08x", entry);
errors++;
if(!force) break;
}
else successes++;
/* TODO: Get subfunction addresses here */
}
/* We collect subfunction addresses while running the pass */
for(int i = 0; i < (int)addresses.size(); i++) {
uint32_t entry = addresses[i];
printr("[cfg %d/%zu] Disassembling %08x...",
i+1, addresses.size(), entry);
if(!cfg_pass.exploreFunction(entry)) {
FxOS_log(ERR, "while processing %08x", entry);
errors++;
if(!force) return;
}
else if(pass == "pcrel") {
printr("[pcrel] Resolving PC-relative addressing modes...");
PcrelPass p(space.disasm);
if(!p.analyzeAllInstructions()) {
errors++;
if(!force) break;
}
}
else if(pass == "syscall") {
printr("[syscall] Finding syscall references...");
OS *os = space.os_analysis();
if(os) {
SyscallPass p(space.disasm, os);
if(!p.analyzeAllInstructions()) {
errors++;
if(!force) break;
}
}
}
else {
FxOS_log(ERR, "unknown pass <%s>", pass);
break;
}
printf("\n");
else successes++;
timer.stop();
FxOS_log(LOG, "Finished pass <%s> in %s", pass, timer.format_time());
if(errors && !force)
break;
/* TODO: Get subfunction addresses here */
}
timer.stop();
printf("\n");
FxOS_log(LOG, "Finished pass <cfg> in %s", timer.format_time());
/* Annotate all decoded instructions with pcrel/syscall
TODO: analyze only the functions, if possible */
printr("[pcrel] Resolving PC-relative addressing modes...");
timer.restart();
PcrelPass pcrel_pass(space.disasm);
if(!pcrel_pass.analyzeAllInstructions()) {
errors++;
if(!force) return;
}
timer.stop();
printf("\n");
FxOS_log(LOG, "Finished pass <pcrel> in %s", timer.format_time());
printr("[syscall] Finding syscall references...");
timer.restart();
OS *os = space.os_analysis();
if(os) {
SyscallPass syscall_pass(space.disasm, os);
if(!syscall_pass.analyzeAllInstructions()) {
errors++;
if(!force) return;
}
}
timer.stop();
printf("\n");
FxOS_log(LOG, "Finished pass <syscall> in %s", timer.format_time());
printf("Successfully analyzed %d functions (%d errors)\n",
successes, errors);

View File

@ -26,7 +26,7 @@ static void disassemble(Session &session, Disassembly &disasm,
if(pass == "cfg")
{
CfgPass p(disasm);
ok = p.analyzeFunction(address);
ok = p.analyzeAnonymousFunction(address);
}
else if(pass == "pcrel")
{
@ -136,7 +136,7 @@ void _dr(Session &session, Range range)
/* Load the block into memory */
for(uint32_t pc = range.start; pc < range.end; pc += 2)
disasm.readins(pc);
disasm.getInstructionAt(pc, true);
disassemble(session, disasm, { "pcrel", /*"constprop",*/ "syscall",
"print" }, -1);

View File

@ -42,6 +42,8 @@ static void show_vspace(std::string name, VirtualSpace &s, Session &session)
s.symbols.symbols.size());
fmt::print(" Main disassembly: {} instructions\n",
s.disasm.instructions.size());
fmt::print(" Functions: {}\n",
s.disasm.functions.size());
fmt::print(" Region--Start---------End---------File--------------------\n");
for(auto &b: s.bindings) {