af, afs: add recursive function search option (-r)

This commit is contained in:
Lephenixnoir 2024-01-11 20:02:37 +01:00
parent 7a656c3b3e
commit 51ee6fd6ff
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
4 changed files with 94 additions and 20 deletions

2
.gitignore vendored
View File

@ -1,5 +1,5 @@
# Build files
/build
/build*
/bin
# Semantic exclude

View File

@ -105,7 +105,12 @@ struct Function: public BinaryObject
return m_analysisResult.get();
}
/* Construction functions to be used only by the analysis pass. */
/* List all function calls. This iterates on all instructions in the
function to find calls. Requires analysis for useful targets. Returns a
series of pairs {instruction address, call target}. */
std::vector<std::pair<u32, RelConst>> findFunctionCalls() const;
/* Construction functions to be used only by the exploration pass. */
bool exploreFunctionAt(u32 address);
BasicBlock &createBasicBlock(u32 address, bool isEntryBlock);
void sortBasicBlocks();

View File

@ -20,7 +20,7 @@ Function::Function(Binary &binary, u32 address):
{
/* Size is not determined at first. */
m_blocks.reserve(4);
m_blocks.reserve(16);
/* Entry block index is not determined at first. */
m_entryBlockIndex = -1;
@ -69,6 +69,24 @@ BasicBlock const &Function::basicBlockByAddress(u32 pc) const
__builtin_unreachable();
}
std::vector<std::pair<u32, RelConst>> Function::findFunctionCalls() const
{
std::vector<std::pair<u32, RelConst>> calls;
for(BasicBlock const &bb: *this) {
for(auto const &[ins, state]: bb.instructionsWithState()) {
if(!ins.opcode().isCall())
continue;
CpuRegister targetReg = ins.opcode().operand(0).base();
RelConst target = state.getRegister(targetReg);
calls.emplace_back(ins.address(), target);
}
}
return calls;
}
/* Create a new basic block and add it to the function. Invalidates previous
pointers to other blocks! */
BasicBlock &Function::createBasicBlock(u32 address, bool isEntryBlock)
@ -171,7 +189,11 @@ static Superblock exploreSuperblock(Function &function, u32 entry, bool *error)
int vspaceLen;
u8 const *vspaceData = (u8 *)vspace.translate_dynamic(entry, &vspaceLen);
u32 vspaceEnd = entry + vspaceLen;
assert(vspaceData);
if(!vspaceData) {
FxOS_log(ERR, "superblock %08x out of vspace", entry);
*error = true;
return sb;
}
while(!terminatorFound || inDelaySlot) {
sb.addresses.push_back(pc);

View File

@ -21,6 +21,7 @@ struct _af_args
{
bool update = false;
bool force = false;
bool recursive = false;
std::string name = "";
std::vector<u32> addresses;
bool consistency = false;
@ -33,6 +34,7 @@ static _af_args parse_af(Session &session, Parser &parser)
parser.option("--force", [&args](Parser &) { args.force = true; });
parser.option("-n", [&args](Parser &p) { args.name = p.symbol(""); });
parser.option("-c", [&args](Parser &) { args.consistency = true; });
parser.option("-r", [&args](Parser &) { args.recursive = true; });
parser.accept_options();
if(args.consistency && parser.at_end())
@ -98,43 +100,83 @@ static void _af_consistency(Binary const &binary)
}
}
static void af_analyze(Binary &binary, _af_args const &args)
static void af_analyze(Session &session, Binary &binary, _af_args const &args)
{
int successes = 0, skipped = 0, errors = 0;
Timer timer;
timer.start();
auto const &addresses = args.addresses;
std::deque<u32> functionQueue;
std::set<u32> seen;
for(int i = 0; i < (int)addresses.size(); i++) {
u32 entry = addresses[i];
printr("[%d/%zu] Analyzing 0x%08x...", i + 1, addresses.size(), entry);
for(u32 entry: args.addresses) {
functionQueue.push_back(entry);
seen.insert(entry);
}
std::set<u32> TEST_knownErrors;
int done = 0, successes = 0, skipped = 0, errors = 0;
int total = functionQueue.size();
int unresolvedCalls = 0;
for(; !functionQueue.empty(); done++) {
u32 entry = functionQueue.front();
functionQueue.pop_front();
printr("[%d/%d] Analyzing 0x%08x...", done + 1, total, entry);
/* Check if there is already a function defined here */
Function *existing = binary.functionAt(entry);
if(!existing) {
if(existing) {
if(!existing->hasAnalysis()) {
existing->runAnalysis();
successes++;
}
else {
skipped++;
}
}
else {
auto f = std::make_unique<Function>(binary, entry);
if(f->exploreFunctionAt(entry)) {
f->updateFunctionSize();
f->runAnalysis();
binary.addObject(std::move(f));
existing = binary.functionAt(entry);
successes++;
}
else {
FxOS_log(ERR, "... while analyzing 0x%08x", entry);
TEST_knownErrors.insert(entry);
errors++;
continue;
}
}
else {
skipped++;
}
/* TODO: Queue subfunctions for recursive analysis */
RelConstDomain RCD;
if(args.recursive) {
for(auto const &[pc, target]: existing->findFunctionCalls()) {
if(!RCD.is_constant(target)) {
unresolvedCalls++;
continue;
}
u32 targetAddress = RCD.constant_value(target);
if(!seen.contains(targetAddress)) {
functionQueue.push_back(targetAddress);
seen.insert(targetAddress);
total++;
}
}
}
}
timer.stop();
printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n",
printf("\nAnalyzed %d functions (+%d already done, +%d errors) in %s\n",
successes, skipped, errors, timer.format_time().c_str());
if(args.recursive)
printf("There were %d unresolved call sites.\n", unresolvedCalls);
if(successes > 0)
session.project().setDirty();
// _af_consistency(binary);
}
@ -144,7 +186,7 @@ void _af(Session &session, _af_args const &args)
Binary *b = session.currentBinary();
if(!b)
return FxOS_log(ERR, "No current binary!\n");
af_analyze(*b, args);
af_analyze(session, *b, args);
}
//--
@ -156,6 +198,7 @@ static _af_args parse_afs(Session &, Parser &parser)
_af_args args;
parser.option("-u", [&args](Parser &) { args.update = true; });
parser.option("--force", [&args](Parser &) { args.force = true; });
parser.option("-r", [&args](Parser &) { args.recursive = true; });
parser.accept_options();
parser.end();
return args;
@ -181,7 +224,7 @@ void _afs(Session &session, _af_args &args)
for(int i = 0; i < os->syscall_count(); i++)
args.addresses.push_back(os->syscall(i));
af_analyze(*b, args);
af_analyze(session, *b, args);
}
//---
@ -294,13 +337,17 @@ static ShellCommand _af_cmd(
_af(s, args);
},
[](Session &s, Parser &p) { parse_af(s, p); }, "Analysis: Functions", R"(
af [-u|--force] [-n <name>] [<addresses>...]
af [-r] [-u|--force] [-n <name>] [<addresses>...]
af -c
Explore and disassemble functions starting at the specified addresses. For each
explored function, a binary object of Function type is created, and the
function is statically analyzed.
With -r (recursive), also analyze called functions, recursively. This might not
be exhaustive because function calls can't always be resolved to a statically-
known address.
By default, addresses where functions already exist are not reanalyzed.
Specifying -u (update) causes all functions to be re-processed, while keeping
user-specified information (name, prototype, etc). Specifying --force causes
@ -325,7 +372,7 @@ static ShellCommand _afs_cmd(
},
[](Session &s, Parser &p) { parse_afs(s, p); },
"Analysis: Functions (Syscalls)", R"(
afs [-u|--force]
afs [-r] [-u|--force]
Explore and disassemble syscalls. Like af, but automatically pulls function
names and prototypes out of the predefined syscall table, when there is one.