fxos: faster project loading through functions

- Compute analysis on-demand
- Faster function exploration, due to:
  * Cache basic block's index in function
  * Preallocate memory for 4 blocks per function
  * Preallocate 32 instructions per superblock
  * Assume functions do not cross vspace region boundaries
  * Improve cutSuperblockAt() with lower-level code
This commit is contained in:
Lephenixnoir 2024-01-11 12:41:33 +01:00
parent 80d6001417
commit 64a3df8e17
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
4 changed files with 64 additions and 53 deletions

View File

@ -92,20 +92,16 @@ struct Function: public BinaryObject
return m_blocks.end();
}
/* Version number of the analysis that was run on the function. Used to
avoid re-analyzing unless there are new features. */
int analysisVersion() const
{
return m_analysisVersion;
}
/* Was this function analyzed? */
bool hasAnalysis() const
{
return static_cast<bool>(m_analysisResult);
return m_hasAnalysisResult;
}
/* Get analysis results if there are any. */
/* Get analysis results or compute them on-demand. */
StaticFunctionAnalysis const *getAnalysis() const
{
if(!hasAnalysis())
runAnalysis();
return m_analysisResult.get();
}
@ -117,15 +113,16 @@ struct Function: public BinaryObject
void setAnalysisVersion(int version);
/* Analysis execution functions (also semi-private). */
void runAnalysis();
void runAnalysis() const;
private:
/* List of basic blocks (entry block is always number 0) */
std::vector<BasicBlock> m_blocks;
/* Analysis version */
int m_analysisVersion = 0;
/* Analysis result */
std::unique_ptr<StaticFunctionAnalysis> m_analysisResult;
/* Analysis result (mutable because computed lazily, on-demand) */
mutable std::unique_ptr<StaticFunctionAnalysis> m_analysisResult;
mutable bool m_hasAnalysisResult = false;
/* ID of the entry block */
int m_entryBlockIndex;
};
@ -159,7 +156,12 @@ struct BasicBlock
return m_address;
}
/* Block's index within function. */
uint blockIndex() const;
uint blockIndex() const
{
/* Must have been specified */
assert(m_blockIndex >= 0);
return m_blockIndex;
}
/*** Access to instructions ***/
@ -501,6 +503,7 @@ struct BasicBlock
void finalizeBlock();
void addSuccessor(BasicBlock *succ);
void addPredecessor(BasicBlock *pred);
void setBlockIndex(int blockIndex);
private:
std::reference_wrapper<Function> m_function;
@ -510,6 +513,7 @@ private:
std::vector<uint> m_predecessors;
u32 m_address;
u32 m_flags;
int m_blockIndex = -1;
};
/* Concrete instruction in a basic block. This class only contains a minimal

View File

@ -20,6 +20,8 @@ Function::Function(Binary &binary, u32 address):
{
/* Size is not determined at first. */
m_blocks.reserve(4);
/* Entry block index is not determined at first. */
m_entryBlockIndex = -1;
@ -45,8 +47,6 @@ void Function::deserialize(BSON const &b)
exploreFunctionAt(address());
setName(b["name"].getString());
setComment(b["comm"].isNull() ? "" : b["comment"].getString());
runAnalysis();
}
BasicBlock &Function::basicBlockByAddress(u32 pc)
@ -76,7 +76,10 @@ BasicBlock &Function::createBasicBlock(u32 address, bool isEntryBlock)
assert(isEntryBlock == (address == this->address())
&& "inconsistent entry block specification in function");
m_blocks.emplace_back(*this, address, isEntryBlock);
return m_blocks.back();
BasicBlock &bb = m_blocks.back();
bb.setBlockIndex(m_blocks.size() - 1);
return bb;
}
/* Sorts blocks by address. Invalidates pointers to blocks. */
@ -85,18 +88,11 @@ void Function::sortBasicBlocks()
std::sort(m_blocks.begin(), m_blocks.end(),
[](auto &bb1, auto &bb2) { return bb1.address() < bb2.address(); });
/* Update entry block index */
/* Update instruction's parent block numbers and the entry block index */
for(uint i = 0; i < m_blocks.size(); i++) {
if(m_blocks[i].isEntryBlock()) {
m_blocks[i].setBlockIndex(i);
if(m_blocks[i].isEntryBlock())
m_entryBlockIndex = i;
break;
}
}
/* Update instruction's parent block numbers */
for(uint i = 0; i < m_blocks.size(); i++) {
for(Instruction &ins: m_blocks[i].instructionsInAddressOrder())
ins.setBlockContext(i, ins.indexInBlock());
}
}
@ -122,9 +118,10 @@ void Function::setAnalysisVersion(int version)
m_analysisVersion = version;
}
void Function::runAnalysis()
void Function::runAnalysis() const
{
m_analysisResult = interpretFunction(*this);
m_hasAnalysisResult = true;
}
/* The first step in building function CFGs is delimiting the blocks. Starting
@ -160,6 +157,7 @@ struct Superblock
static Superblock exploreSuperblock(Function &function, u32 entry, bool *error)
{
Superblock sb;
sb.addresses.reserve(32);
sb.leaders.insert(entry);
*error = false;
@ -168,17 +166,25 @@ static Superblock exploreSuperblock(Function &function, u32 entry, bool *error)
bool terminatorFound = false;
u32 pc = entry;
/* Determine how much space the vspace covers at this address to know if we
ever exit the space */
int vspaceLen;
u8 const *vspaceData = (u8 *)vspace.translate_dynamic(entry, &vspaceLen);
u32 vspaceEnd = entry + vspaceLen;
assert(vspaceData);
while(!terminatorFound || inDelaySlot) {
sb.addresses.push_back(pc);
/* Read the next instruction from memory */
// TODO: Handle 32-bit DSP instructions
if(!vspace.covers(pc, 2)) {
if(pc + 2 > vspaceEnd) {
FxOS_log(ERR, "superblock %08x exits vspace at %08x", entry, pc);
*error = true;
break;
}
u32 opcodeBits = vspace.read_u16(pc);
u32 opcodeBits = (vspaceData[0] << 8) | vspaceData[1];
vspaceData += 2;
Instruction ins(function, pc, opcodeBits);
if(!ins.hasValidOpcode()) {
@ -226,9 +232,17 @@ static bool cutSuperblockAt(std::vector<Superblock> &blocks, u32 address)
{
for(auto &b: blocks) {
auto const &a = b.addresses;
if(std::find(a.begin(), a.end(), address) != a.end()) {
b.leaders.insert(address);
return true;
int size = a.size();
/* Prune the superblock based on easy bounds */
if(!size || a[0] > address || a[size - 1] < address)
continue;
for(int i = 0; i < size; i++) {
if(b.addresses[i] == address) {
b.leaders.insert(address);
return true;
}
}
}
return false;
@ -341,17 +355,6 @@ BasicBlock::BasicBlock(Function &function, u32 address, bool isEntryBlock):
m_flags |= Flags::IsEntryBlock;
}
uint BasicBlock::blockIndex() const
{
for(uint i = 0; i < parentFunction().blockCount(); i++) {
BasicBlock const &bb = parentFunction().basicBlockByIndex(i);
if(&bb == this)
return i;
}
assert(false && "blockIndex: block not in its own parent");
__builtin_unreachable();
}
ProgramState const *BasicBlock::initialState() const
{
StaticFunctionAnalysis const *SFA = parentFunction().getAnalysis();
@ -458,6 +461,13 @@ void BasicBlock::addPredecessor(BasicBlock *pred)
m_predecessors.push_back(pred->blockIndex());
}
void BasicBlock::setBlockIndex(int blockIndex)
{
m_blockIndex = blockIndex;
for(Instruction &ins: instructionsInAddressOrder())
ins.setBlockContext(m_blockIndex, ins.indexInBlock());
}
//=== Instruction ===//
Instruction::Instruction(Function &function, u32 address, u32 opcode):

View File

@ -49,15 +49,15 @@ void Timer::restart(void)
std::string Timer::format_time(uint64_t time_ns)
{
if(time_ns < 2000)
return format("%lld ns", time_ns);
return format("%lldns", time_ns);
time_ns /= 1000;
if(time_ns < 2000)
return format("%lld us", time_ns);
return format("%lldus", time_ns);
time_ns /= 1000;
if(time_ns < 2000)
return format("%lld ms", time_ns);
return format("%lldms", time_ns);
time_ns /= 1000;
return format("%lld s", time_ns);
return format("%llds", time_ns);
}
std::string Timer::format_time() const

View File

@ -13,9 +13,6 @@
#include <fmt/core.h>
#include <endian.h>
// TODO: fxos: Proper definition of function analysis version
#define FXOS_FUNCTION_ANALYSIS_VERSION 1
//---
// af
//---
@ -108,7 +105,6 @@ static void af_analyze(Binary &binary, _af_args const &args)
timer.start();
auto const &addresses = args.addresses;
int const FAV = FXOS_FUNCTION_ANALYSIS_VERSION;
for(int i = 0; i < (int)addresses.size(); i++) {
u32 entry = addresses[i];
@ -117,11 +113,11 @@ static void af_analyze(Binary &binary, _af_args const &args)
/* Check if there is already a function defined here */
Function *existing = binary.functionAt(entry);
if(!existing || existing->analysisVersion() < FAV) {
if(!existing) {
auto f = std::make_unique<Function>(binary, entry);
if(f->exploreFunctionAt(entry)) {
f->updateFunctionSize();
f->setAnalysisVersion(FAV);
f->runAnalysis();
binary.addObject(std::move(f));
successes++;
}
@ -140,7 +136,7 @@ static void af_analyze(Binary &binary, _af_args const &args)
printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n",
successes, skipped, errors, timer.format_time().c_str());
_af_consistency(binary);
// _af_consistency(binary);
}
void _af(Session &session, _af_args const &args)
@ -310,6 +306,7 @@ Specifying -u (update) causes all functions to be re-processed, while keeping
user-specified information (name, prototype, etc). Specifying --force causes
all functions to be reanalyzed from scratch without keeping user-specified
information.
-> TODO: Currently, no difference.
When a single address is given, -n can specify the name of the function object
to be created.