fxos: replace function analysis with new objects

This affects ad and ads, which are now called af (Analysis: Functions)
and afs, and produce binary objects instead of entries in the soon-to-
be-removed disassembly.
This commit is contained in:
Lephenixnoir 2023-11-05 19:26:04 +01:00
parent 7f2fc40ac6
commit d65515b9ad
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
10 changed files with 217 additions and 375 deletions

View File

@ -55,7 +55,6 @@ set(fxos_core_SOURCES
lib/function.cpp
lib/memory.cpp
lib/os.cpp
lib/passes/cfg.cpp
lib/passes/pcrel.cpp
lib/passes/syscall.cpp
lib/project.cpp

View File

@ -60,6 +60,9 @@ struct Binary
return m_objects;
}
/* Add an object to the binary. */
void addObject(std::unique_ptr<BinaryObject> &&obj);
/* Return the address of an object by name, if it exists. If there are
multiple objects with the same name, returns an arbitrary one. */
std::optional<u32> objectAddress(std::string const &name) const;
@ -78,6 +81,12 @@ struct Binary
std::vector<BinaryObject *> objectsCovering(u32 address);
std::vector<BinaryObject const *> objectsCovering(u32 address) const;
/* Return one or all functions defined at a given address. */
Function *functionAt(u32 address);
Function const *functionAt(u32 address) const;
std::vector<Function *> functionsAt(u32 address);
std::vector<Function const *> functionsAt(u32 address) const;
private:
VirtualSpace m_vspace;

View File

@ -78,14 +78,24 @@ struct Function: public BinaryObject
return m_blocks.end();
}
/* Construction functions to be used only by the cfg pass. */
void exploreFunctionAt(u32 address);
/* Version number of the analysis that was run on the function. Used to
avoid re-analyzing unless there are new features. */
int analysisVersion() const
{
return m_analysisVersion;
}
/* Construction functions to be used only by the analysis pass. */
bool exploreFunctionAt(u32 address);
BasicBlock &addBasicBlock(BasicBlock &&bb);
void updateFunctionSize();
void setAnalysisVersion(int version);
private:
/* List of basic blocks (entry block is always number 0) */
std::vector<BasicBlock> m_blocks;
/* Analysis version */
int m_analysisVersion = 0;
};
/* Basic block within a function. */
@ -338,6 +348,10 @@ struct Instruction
assert(insmap[m_opcode] && "use of Instruction with invalid opcode");
return *insmap[m_opcode];
}
bool hasValidOpcode() const
{
return insmap[m_opcode].has_value();
}
/* Instruction's size in bytes. */
uint size() const
{

View File

@ -1,75 +0,0 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/passes/cfg: Control Flow Graph construction
//
// This pass explores functions by loading every instruction's potential
// successor into the diassembly store. It also sets the [jmptarget] field of
// the Instructions as it goes, allowing other passes to traverse the (somewhat
// implicit) CFG.
//
// This is the main exploration pass; other passes do not typically load new
// instructions from the underlying disassembly. Straightforward passes such as
// [print] iterate on instructions loaded by this pass.
//
// The main problem that this pass has to deal with is delay slots. These are
// pretty tricky to deal with; for instance, in
//
// bra pc+120
// mov #1, r4
//
// the CPU will run [mov #1, r4] while performing the branch to pc+120 in order
// to fill an otherwise-unfillable pipeline cycle. This is annoying for all
// kinds of reasons, and fxos handles this by acting as if the mov itself had
// pc+120 as an unconditional successor.
//
// This could be tricky for the abstract interpreter because the jump target
// has to be computed using the state at the jump instruction, not the one at
// the delay slot. Luckily all delayed jumps are no-ops in terms of state, so
// the confusion has no effect.
//
// Note that jumping into a delay slot will activate the jump in fxos, which is
// not the actual behavior of the processor. I don't believe any compiler does
// this kind of things (most are not inherently designed for delay slots
// anyway). If such an instance is found, fxos will throw an exception and give
// up to make sure no analysis pass returns invalid results.
//
// Take-home message: delay slots are a pain to analyze, so we get rid of them
// as soon as possible and proceed with normal semantics.
//---
#ifndef FXOS_PASSES_CFG_H
#define FXOS_PASSES_CFG_H
#include <fxos/disassembly.h>
#include <fxos/passes/pcrel.h>
#include <set>
namespace FxOS {
class CfgPass: public InstructionPass
{
public:
CfgPass(Binary &binary);
bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override;
/* Explore a new function at the specified address. This method creates the
function if it doesn't exist yet, explores its CFG, and generates claims
over relevant parts of the binary. */
bool exploreFunction(uint32_t pc);
private:
/* Last explored function */
uint32_t m_lastFunction;
/* Set of instructions in a function, used to generate new claims */
std::set<uint32_t> m_claimedInstructions;
/* pcrel pass used to find call to other functions */
PcrelPass m_pcrel;
};
} /* namespace FxOS */
#endif /* FXOS_PASSES_CFG_H */

View File

@ -59,6 +59,11 @@ OS *Binary::OSAnalysis(bool force) const
return m_os.get();
}
void Binary::addObject(std::unique_ptr<BinaryObject> &&obj)
{
m_objects.insert({obj->address(), std::move(obj)});
};
std::optional<u32> Binary::objectAddress(std::string const &name) const
{
for(auto const &[address, obj]: m_objects) {
@ -101,7 +106,7 @@ std::vector<BinaryObject *> Binary::objectsCovering(u32 address)
std::vector<BinaryObject *> objects;
for(auto const &[obj_address, obj]: m_objects) {
if(obj_address <= address && obj_address + obj->size() < address)
if(obj_address <= address && obj_address + obj->size() > address)
objects.push_back(obj.get());
}
@ -113,13 +118,53 @@ std::vector<BinaryObject const *> Binary::objectsCovering(u32 address) const
std::vector<BinaryObject const *> objects;
for(auto const &[obj_address, obj]: m_objects) {
if(obj_address <= address && obj_address + obj->size() < address)
if(obj_address <= address && obj_address + obj->size() > address)
objects.push_back(obj.get());
}
return objects;
}
Function *Binary::functionAt(u32 address)
{
for(auto obj: objectsAt(address)) {
if(obj->isFunction())
return &obj->getFunction();
}
return nullptr;
}
Function const *Binary::functionAt(u32 address) const
{
for(auto obj: objectsAt(address)) {
if(obj->isFunction())
return &obj->getFunction();
}
return nullptr;
}
std::vector<Function *> Binary::functionsAt(u32 address)
{
std::vector<Function *> funcs;
for(auto obj: objectsAt(address)) {
if(obj->isFunction())
funcs.push_back(&obj->getFunction());
}
return funcs;
}
std::vector<Function const *> Binary::functionsAt(u32 address) const
{
std::vector<Function const *> funcs;
for(auto obj: objectsAt(address)) {
if(obj->isFunction())
funcs.push_back(&obj->getFunction());
}
return funcs;
}
//=== BinaryObject ===//
bool BinaryObject::intersects(BinaryObject const &other) const

View File

@ -45,6 +45,11 @@ void Function::updateFunctionSize()
this->setSize(max_address - this->address());
}
void Function::setAnalysisVersion(int version)
{
m_analysisVersion = version;
}
/* The first step in building function CFGs is delimiting the blocks. Starting
from the entry point, we generate "superblocks" by reading instructions
linearly until we find a terminator.
@ -75,10 +80,11 @@ struct Superblock
};
// TODO: Unclear what the exit status of the superblock is in case of error
static Superblock exploreSuperblock(Function &function, u32 entry)
static Superblock exploreSuperblock(Function &function, u32 entry, bool *error)
{
Superblock sb;
sb.leaders.insert(entry);
*error = false;
VirtualSpace &vspace = function.parentBinary().vspace();
bool inDelaySlot = false;
@ -92,16 +98,24 @@ static Superblock exploreSuperblock(Function &function, u32 entry)
// TODO: Handle 32-bit DSP instructions
if(!vspace.covers(pc, 2)) {
FxOS_log(ERR, "superblock %08x exits vspace at %08x", entry, pc);
*error = true;
break;
}
u32 opcodeBits = vspace.read_u16(pc);
Instruction ins(function, pc, opcodeBits);
if(!ins.hasValidOpcode()) {
FxOS_log(ERR, "invalid instruction %08x: %04x in superblock", pc,
opcodeBits);
*error = true;
break;
}
AsmInstruction opcode = ins.opcode();
if(inDelaySlot && !opcode.isValidDelaySlot()) {
FxOS_log(ERR, "superblock %08x has invalid delay slot at %08x",
entry, pc);
*error = true;
break;
}
@ -121,6 +135,9 @@ static Superblock exploreSuperblock(Function &function, u32 entry)
pc += 2;
}
if(*error)
return sb;
if(sb.mayFallthrough)
sb.fallthroughTarget = pc;
return sb;
@ -140,7 +157,7 @@ static bool cutSuperblockAt(std::vector<Superblock> &blocks, u32 address)
return false;
}
void Function::exploreFunctionAt(u32 functionAddress)
bool Function::exploreFunctionAt(u32 functionAddress)
{
assert(!(functionAddress & 1) && "function starts at unaligned address");
@ -158,7 +175,11 @@ void Function::exploreFunctionAt(u32 functionAddress)
if(cutSuperblockAt(blocks, entry))
continue;
Superblock sb = exploreSuperblock(*this, entry);
bool error = false;
Superblock sb = exploreSuperblock(*this, entry, &error);
if(error)
return false;
/* Process static jump targets and fallthrough targets to queue new
superblocks or cut existing ones */
@ -199,6 +220,8 @@ void Function::exploreFunctionAt(u32 functionAddress)
}
// TODO: Set successors and predecessors
return true;
}
//=== BasicBlock ===//

View File

@ -7,7 +7,7 @@
//---------------------------------------------------------------------------//
#include <fxos/lang.h>
#include <fxos/disassembly.h>
#include <fxos/function.h>
#include <fxos/util/format.h>
#include <fxos/util/log.h>

View File

@ -1,145 +0,0 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/passes/cfg.h>
#include <fxos/disassembly.h>
#include <fxos/util/log.h>
#include <fxos/binary.h>
#include <cassert>
namespace FxOS {
CfgPass::CfgPass(Binary &binary):
InstructionPass(binary), m_claimedInstructions {}, m_pcrel {binary}
{
this->setAllowDiscovery(true);
}
bool CfgPass::analyzeInstruction(uint32_t pc, OldInstruction &i)
{
/* Don't explore successors if the instruction cannot be decoded, not
even pc+2. This will prevent wild overshoot. */
if(!i.inst) {
FxOS_log(ERR, "invalid instruction at 0x%08x: 0x%04x", pc, i.opcode);
return false;
}
m_claimedInstructions.insert(pc);
/* Compute the jump target for jump instructions. This is easy because
they are all trivially computable. (...If they are not we dub them
"terminal" to avoid the computation!) */
uint32_t jmptarget = 0xffffffff;
if(i.inst->isAnyStaticJump()) {
auto &args = i.inst->args;
if(i.inst->arg_count != 1 || args[0].kind != AsmArgument::PcJump) {
FxOS_log(ERR, "invalid jump instruction at 0x%08x", pc);
return false;
}
jmptarget = (pc + 4) + args[0].disp;
/* Make the target of the jump a leader */
// TODO: Use Binary instructions
OldInstruction &target
= *m_binary.vspace().disasm.getInstructionAt(jmptarget, true);
target.leader = true;
/* Check that it's not in a delay slot */
if(target.delayslot)
throw std::logic_error(format(
"0x%08x jumps into 0x%08x, which is "
"a delay slot - this is unsupported by fxos and will produce "
"garbage analysis! (x_x)",
pc, jmptarget));
}
/* If this instruction is in a delay slot, check its type. A valid
delay slot has no branching properties on its own, so nothing new to
set in the properties. */
if(i.delayslot) {
if(!i.inst->isValidDelaySlot()) {
FxOS_log(ERR, "invalid delay slot at 0x%08x", pc);
return false;
}
}
/* If it has a delay slot, create it at the next instruction */
else if(i.inst->hasDelaySlot()) {
// TODO: Use Binary instructions
OldInstruction &slot
= *m_binary.vspace().disasm.getInstructionAt(pc + 2, true);
if(slot.leader)
throw std::logic_error(format(
"0x%08x is a leader and also a delay"
" slot - this is unsupported by fxos and will produce garbage "
"analysis! (x_x)",
pc + 2));
if(!slot.inst->isValidDelaySlot()) {
FxOS_log(ERR, "invalid delay slot at 0x%08x", pc + 2);
return false;
}
slot.delayslot = true;
slot.terminal = i.inst->isReturn() || i.inst->isDynamicJump();
slot.jump = i.inst->isUnconditionalJump();
slot.condjump = i.inst->isConditionalJump();
slot.jmptarget = jmptarget;
}
/* Otherwise, use standard properties */
else if(!i.inst->hasDelaySlot()) {
i.terminal = i.inst->isReturn() || i.inst->isDynamicJump();
i.jump = i.inst->isUnconditionalJump();
i.condjump = i.inst->isConditionalJump();
i.jmptarget = jmptarget;
}
m_pcrel.analyzeInstruction(pc, i);
return true;
}
bool CfgPass::exploreFunction(uint32_t pc)
{
m_lastFunction = pc;
m_claimedInstructions.clear();
// TODO: Use Binary functions
OldFunction *func = m_binary.vspace().disasm.getOrCreateFunctionAt(pc);
if(!this->analyzeFunction(pc))
return false;
RelConstDomain RCD;
/* Look for call targets */
for(uint32_t pc: m_claimedInstructions) {
// TODO: Use Binary instructions
OldInstruction const *ci
= m_binary.vspace().disasm.getInstructionAt(pc);
if(!ci)
continue;
AsmInstruction const &i = *ci->inst;
/* Find function call instructions */
if(i.isReturn() || !i.isCall() || i.arg_count < 1)
continue;
/* The target must be known */
if(!RCD.is_constant(ci->args[0].location))
continue;
uint32_t target = RCD.constant_value(ci->args[0].location);
auto &v = func->callTargets;
if(std::find(v.begin(), v.end(), target) == v.end())
func->callTargets.push_back(target);
}
return true;
}
} /* namespace FxOS */

View File

@ -4,121 +4,125 @@
#include "errors.h"
#include "util.h"
#include <fxos/disassembly.h>
#include <fxos/function.h>
#include <fxos/vspace.h>
#include <fxos/util/Timer.h>
#include <fxos/util/log.h>
#include <fxos/passes/cfg.h>
#include <fxos/passes/pcrel.h>
#include <fxos/passes/syscall.h>
#include <fmt/core.h>
#include <endian.h>
// TODO: fxos: Proper definition of function analysis version
#define FXOS_FUNCTION_ANALYSIS_VERSION 1
//---
// ad
// af
//---
static void ad_disassemble_all(
Binary &binary, std::vector<uint32_t> const &addresses, bool force)
struct _af_args
{
int successes = 0, errors = 0;
Timer timer;
bool update = false;
bool force = false;
std::string name = "";
std::vector<u32> addresses;
};
/* Analyze the CFGs of all functions */
timer.start();
CfgPass cfg_pass(binary);
/* We collect subfunction addresses while running the pass */
for(int i = 0; i < (int)addresses.size(); i++) {
uint32_t entry = addresses[i];
printr("[cfg %d/%zu] Disassembling 0x%08x...", i + 1, addresses.size(),
entry);
if(!cfg_pass.exploreFunction(entry)) {
FxOS_log(ERR, "while processing 0x%08x", entry);
errors++;
if(!force)
return;
}
else
successes++;
}
timer.stop();
printf("\n");
FxOS_log(LOG, "Finished pass <cfg> in %s", timer.format_time());
printr("[syscall] Finding syscall references...");
timer.restart();
OS *os = binary.OSAnalysis();
if(os) {
SyscallPass syscall_pass(binary, os);
if(!syscall_pass.analyzeAllInstructions()) {
errors++;
if(!force)
return;
}
}
timer.stop();
printf("\n");
FxOS_log(LOG, "Finished pass <syscall> in %s", timer.format_time());
printf(
"Successfully analyzed %d functions (%d errors)\n", successes, errors);
/* TODO: Get subfunction addresses by abstract interpretation and keep
going recursively */
}
static std::vector<uint32_t> parse_ad(Session &session, Parser &parser)
static _af_args parse_af(Session &session, Parser &parser)
{
Binary *b = session.currentBinary();
if(!b)
return std::vector<uint32_t>();
_af_args args;
parser.option("-u", [&args](Parser &) { args.update = true; });
parser.option("--force", [&args](Parser &) { args.force = true; });
parser.option("-n", [&args](Parser &p) { args.name = p.symbol(""); });
parser.accept_options();
std::vector<uint32_t> addresses;
do {
addresses.push_back(parser.expr(b));
args.addresses.push_back(parser.expr(session.currentBinary()));
}
while(!parser.at_end());
parser.end();
return addresses;
return args;
}
void _ad(Session &session, std::vector<uint32_t> const &addresses)
static void af_analyze(Binary &binary, _af_args const &args)
{
int successes = 0, skipped = 0, errors = 0;
Timer timer;
timer.start();
auto const &addresses = args.addresses;
int const FAV = FXOS_FUNCTION_ANALYSIS_VERSION;
for(int i = 0; i < (int)addresses.size(); i++) {
u32 entry = addresses[i];
printr("[%d/%zu] Analyzing 0x%08x...", i + 1, addresses.size(), entry);
/* Check if there is already a function defined here */
Function *existing = binary.functionAt(entry);
if(!existing || existing->analysisVersion() < FAV) {
auto f = std::make_unique<Function>(binary, entry);
if(f->exploreFunctionAt(entry)) {
f->updateFunctionSize();
f->setAnalysisVersion(FAV);
binary.addObject(std::move(f));
successes++;
}
else {
FxOS_log(ERR, "... while analyzing 0x%08x", entry);
errors++;
}
}
else {
skipped++;
}
/* TODO: Queue subfunctions for recursive analysis */
}
timer.stop();
printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n",
successes, skipped, errors, timer.format_time().c_str());
/* TODO: Check for overlapping functions etc */
}
void _af(Session &session, _af_args const &args)
{
Binary *b = session.currentBinary();
if(!b)
return FxOS_log(ERR, "No current binary!\n");
ad_disassemble_all(*b, addresses, false);
af_analyze(*b, args);
}
//--
// ads
// afs
//---
static void parse_ads(Session &, Parser &parser)
static _af_args parse_afs(Session &, Parser &parser)
{
_af_args args;
parser.option("-u", [&args](Parser &) { args.update = true; });
parser.option("--force", [&args](Parser &) { args.force = true; });
parser.accept_options();
parser.end();
return args;
}
void _ads(Session &session)
void _afs(Session &session, _af_args &args)
{
Binary *b = session.currentBinary();
if(!b)
return FxOS_log(ERR, "No current binary!\n");
OS *os = b->OSAnalysis();
if(!os) {
printf("ads: OS analysis failed, cannot enumerate syscalls");
return;
}
if(!os)
return FxOS_log(ERR, "afs: No OS analysis, cannot enumerate syscalls");
// TODO: afs: Use syscall info
std::vector<uint32_t> addresses;
for(int i = 0; i < os->syscall_count(); i++)
addresses.push_back(os->syscall(i));
args.addresses.push_back(os->syscall(i));
ad_disassemble_all(*b, addresses, true);
af_analyze(*b, args);
}
//---
@ -224,33 +228,41 @@ void _am(Session &session, std::string name)
// Command definitions
//---
static ShellCommand _ad_cmd(
"ad",
static ShellCommand _af_cmd(
"af",
[](Session &s, Parser &p) {
auto addresses = parse_ad(s, p);
_ad(s, addresses);
auto args = parse_af(s, p);
_af(s, args);
},
[](Session &s, Parser &p) { parse_ad(s, p); }, "Analysis: Disassemble", R"(
ad [<addresses>...]
[](Session &s, Parser &p) { parse_af(s, p); }, "Analysis: Functions", R"(
af [-u|--force] [-n <name>] [<addresses>...]
Disassemble the given set of addresses into the current virtual space's main
disassembly. The main disassembly is used for OS-wide features like cross-
reference search or call graphs.
Explore and disassemble functions starting at the specified addresses. For each
explored function, a binary object of Function type is created, and the
function is statically analyzed.
By default, addresses where functions already exist are not reanalyzed.
Specifying -u (update) causes all functions to be re-processed, while keeping
user-specified information (name, prototype, etc). Specifying --force causes
all functions to be reanalyzed from scratch without keeping user-specified
information.
When a single address is given, -n can specify the name of the function object
to be created.
)");
static ShellCommand _ads_cmd(
"ads",
static ShellCommand _afs_cmd(
"afs",
[](Session &s, Parser &p) {
parse_ads(s, p);
_ads(s);
auto args = parse_afs(s, p);
_afs(s, args);
},
[](Session &s, Parser &p) { parse_ads(s, p); },
"Analysis: Disassemble all Syscalls", R"(
ads
[](Session &s, Parser &p) { parse_afs(s, p); },
"Analysis: Functions (Syscalls)", R"(
afs [-u|--force]
Disassembles all syscalls entries using ad, which stores the results in the
current virtual space's main disassembly. Unlike ad, this commands continues
even if some syscalls fail to disassemble.
Explore and disassemble syscalls. Like af, but automatically pulls function
names and prototypes out of the predefined syscall table, when there is one.
)");
static ShellCommand _am_cmd(

View File

@ -6,7 +6,6 @@
#include <fmt/core.h>
#include <fxos/disassembly.h>
#include <fxos/passes/cfg.h>
#include <fxos/passes/pcrel.h>
#include <fxos/passes/syscall.h>
#include <fxos/view/assembly.h>
@ -14,56 +13,6 @@
#include <fxos/util/Timer.h>
#include <fxos/util/log.h>
static void disassemble(
Binary &binary, std::vector<std::string> const &passes, u32 address)
{
for(auto pass: passes) {
Timer timer;
timer.start();
bool ok;
if(pass == "cfg") {
CfgPass p(binary);
ok = p.analyzeAnonymousFunction(address);
}
else if(pass == "pcrel") {
PcrelPass p(binary);
ok = p.analyzeAllInstructions();
}
else if(pass == "syscall") {
OS *os = binary.OSAnalysis();
if(os) {
SyscallPass p(binary, os);
ok = p.analyzeAllInstructions();
}
}
else if(pass == "print" && address + 1) {
// viewAssemblyLegacyAddress(binary, address);
}
else {
FxOS_log(ERR, "unknown pass <%s>", pass);
ok = false;
}
timer.stop();
FxOS_log(LOG, "Finished pass <%s> in %s", pass, timer.format_time());
if(!ok) {
FxOS_log(ERR, "pass <%s> failed", pass);
break;
}
}
if(address + 1) {
Function f(binary, address);
f.exploreFunctionAt(address);
ViewAssemblyOptions opts;
opts.binary = &binary;
viewAssemblyFunction(f, &opts);
}
}
//---
// d
//---
@ -111,7 +60,14 @@ void _d(Session &session, std::variant<long, Range> location)
for(uint32_t pc = range.start; pc < range.end; pc += 2)
b->vspace().disasm.getInstructionAt(pc, true);
disassemble(*b, {"pcrel", /*"constprop",*/ "syscall"}, -1);
PcrelPass p(*b);
p.analyzeAllInstructions();
OS *os = b->OSAnalysis();
if(os) {
SyscallPass p(*b, os);
p.analyzeAllInstructions();
}
MemoryRegion r;
r.start = range.start;
@ -127,8 +83,12 @@ void _d(Session &session, std::variant<long, Range> location)
address++;
}
/* cfg implicitly does pcrel */
disassemble(*b, {"cfg", /*"constprop",*/ "syscall", "print"}, address);
Function f(*b, address);
if(f.exploreFunctionAt(address)) {
ViewAssemblyOptions opts;
opts.binary = b;
viewAssemblyFunction(f, &opts);
}
}
}