fxos/lib/function.cpp

486 lines
15 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/function.h>
#include <fxos/analysis.h>
#include <fxos/util/format.h>
#include <fxos/util/log.h>
namespace FxOS {
//=== Function ===//
Function::Function(Binary &binary, u32 address):
BinaryObject(binary, BinaryObject::Function, address, 0),
m_analysisResult {nullptr}
{
/* Size is not determined at first. */
/* Entry block index is not determined at first. */
m_entryBlockIndex = -1;
/* Default unambiguous name */
setName(format("fun.%08x", address));
}
BSON Function::serialize() const
{
// TODO: Function: Serialize certain analysis results?
return BSON::mkDocument({
{"*", BSON::mkString("Function")},
{"address", BSON::mkI32(address())},
{"name", BSON::mkString(name())},
{"comm", comment() != "" ? BSON::mkString(comment()) : BSON::mkNull()},
});
}
void Function::deserialize(BSON const &b)
{
assert(b.isDocument() && b["*"].getString() == "Function");
exploreFunctionAt(address());
setName(b["name"].getString());
setComment(b["comm"].isNull() ? "" : b["comment"].getString());
runAnalysis();
}
BasicBlock &Function::basicBlockByAddress(u32 pc)
{
for(BasicBlock &bb: *this) {
if(bb.address() == pc)
return bb;
}
assert(false && "not the address of a basic block in this function");
__builtin_unreachable();
}
BasicBlock const &Function::basicBlockByAddress(u32 pc) const
{
for(BasicBlock const &bb: *this) {
if(bb.address() == pc)
return bb;
}
assert(false && "not the address of a basic block in this function");
__builtin_unreachable();
}
/* Create a new basic block and add it to the function. Invalidates previous
pointers to other blocks! */
BasicBlock &Function::createBasicBlock(u32 address, bool isEntryBlock)
{
assert(isEntryBlock == (address == this->address())
&& "inconsistent entry block specification in function");
m_blocks.emplace_back(*this, address, isEntryBlock);
return m_blocks.back();
}
/* Sorts blocks by address. Invalidates pointers to blocks. */
void Function::sortBasicBlocks()
{
std::sort(m_blocks.begin(), m_blocks.end(),
[](auto &bb1, auto &bb2) { return bb1.address() < bb2.address(); });
/* Update entry block index */
for(uint i = 0; i < m_blocks.size(); i++) {
if(m_blocks[i].isEntryBlock()) {
m_entryBlockIndex = i;
break;
}
}
/* Update instruction's parent block numbers */
for(uint i = 0; i < m_blocks.size(); i++) {
for(Instruction &ins: m_blocks[i].instructionsInAddressOrder())
ins.setBlockContext(i, ins.indexInBlock());
}
}
/* Update the function's BinaryObject size by finding the last address covered
by any instruction in the function. */
void Function::updateFunctionSize()
{
u32 max_address = this->address();
for(BasicBlock &bb: *this) {
if(bb.instructionCount() == 0)
continue;
Instruction &insn = bb.instructionAtIndex(bb.instructionCount() - 1);
max_address
= std::max(max_address, insn.address() + insn.encodingSize());
}
this->setSize(max_address - this->address());
}
void Function::setAnalysisVersion(int version)
{
m_analysisVersion = version;
}
void Function::runAnalysis()
{
m_analysisResult = interpretFunction(*this);
}
/* The first step in building function CFGs is delimiting the blocks. Starting
from the entry point, we generate "superblocks" by reading instructions
linearly until we find a terminator.
In general, a superblock will be split into multiple basic blocks, with a
cut at every target of a jump inside the superblock. We record these as we
explore, and generate basic blocks at the end. */
struct Superblock
{
/* Addresses of all instructions in the superblock. */
std::vector<u32> addresses;
/* Addresses of all basic block leaders in the superblock. */
std::set<u32> leaders;
/* Whether the superblock ends with a dynamic jump */
bool mustDynamicJump = false;
/* Whether the superblock may end with a jump to a static target */
bool mayStaticJump = false;
/* Whether the superblock may end by a fallthrough */
bool mayFallthrough = false;
/* Whether the superblock ends with a return */
bool mustReturn = false;
/* If mayStaticJump is set, target address */
u32 staticTarget = 0xffffffff;
/* If mayFallthrough is set, fallthrough address */
u32 fallthroughTarget = 0xffffffff;
};
// TODO: Unclear what the exit status of the superblock is in case of error
static Superblock exploreSuperblock(Function &function, u32 entry, bool *error)
{
Superblock sb;
sb.leaders.insert(entry);
*error = false;
VirtualSpace &vspace = function.parentBinary().vspace();
bool inDelaySlot = false;
bool terminatorFound = false;
u32 pc = entry;
while(!terminatorFound || inDelaySlot) {
sb.addresses.push_back(pc);
/* Read the next instruction from memory */
// TODO: Handle 32-bit DSP instructions
if(!vspace.covers(pc, 2)) {
FxOS_log(ERR, "superblock %08x exits vspace at %08x", entry, pc);
*error = true;
break;
}
u32 opcodeBits = vspace.read_u16(pc);
Instruction ins(function, pc, opcodeBits);
if(!ins.hasValidOpcode()) {
FxOS_log(ERR, "invalid instruction %08x: %04x in superblock", pc,
opcodeBits);
*error = true;
break;
}
AsmInstruction opcode = ins.opcode();
if(inDelaySlot && !opcode.isValidDelaySlot()) {
FxOS_log(ERR, "superblock %08x has invalid delay slot at %08x",
entry, pc);
*error = true;
break;
}
/* Set exit properties when finding the terminator */
if(opcode.isBlockTerminator()) {
sb.mustDynamicJump = opcode.isDynamicJump();
sb.mayStaticJump = opcode.isAnyStaticJump();
sb.mayFallthrough = opcode.isConditionalJump();
sb.mustReturn = opcode.isReturn();
if(sb.mayStaticJump)
sb.staticTarget = opcode.getPCRelativeTarget(pc);
}
terminatorFound = terminatorFound || opcode.isBlockTerminator();
inDelaySlot = !inDelaySlot && opcode.hasDelaySlot();
pc += 2;
}
if(*error)
return sb;
if(sb.mayFallthrough)
sb.fallthroughTarget = pc;
return sb;
}
/* Cut a superblock in the list and returns true if one contains provided
address, otherwise returns false. */
static bool cutSuperblockAt(std::vector<Superblock> &blocks, u32 address)
{
for(auto &b: blocks) {
auto const &a = b.addresses;
if(std::find(a.begin(), a.end(), address) != a.end()) {
b.leaders.insert(address);
return true;
}
}
return false;
}
bool Function::exploreFunctionAt(u32 functionAddress)
{
assert(!(functionAddress & 1) && "function starts at unaligned address");
std::vector<Superblock> blocks;
std::queue<u32> queue;
queue.push(functionAddress);
while(!queue.empty()) {
u32 entry = queue.front();
queue.pop();
/* If this address was found by another superblock that was explored
while [entry] was in the queue, perform the cut now */
if(cutSuperblockAt(blocks, entry))
continue;
bool error = false;
Superblock sb = exploreSuperblock(*this, entry, &error);
if(error)
return false;
/* Process static jump targets and fallthrough targets to queue new
superblocks or cut existing ones */
if(sb.mayFallthrough) {
if(!cutSuperblockAt(blocks, sb.fallthroughTarget))
queue.push(sb.fallthroughTarget);
}
if(sb.mayStaticJump) {
if(!cutSuperblockAt(blocks, sb.staticTarget))
queue.push(sb.staticTarget);
}
blocks.push_back(std::move(sb));
}
/* Successors by addresses, before we get the pointers */
std::map<u32, std::vector<u32>> successorAddresses;
/* Cut superblocks. The loop on b.leaders schedules the construction of new
BasicBlock objects but the iteration is really the multi-part do loop
using the iterator on b.addresses. */
for(auto &b: blocks) {
auto it = b.addresses.begin();
for(u32 pc: b.leaders) {
assert(pc == *it);
BasicBlock &bb = createBasicBlock(*it, *it == functionAddress);
do {
// TODO: Support 32-bit instructions
u32 opcode = parentBinary().vspace().read_u16(*it);
Instruction ins(*this, *it, opcode);
bb.addInstruction(std::move(ins));
it++;
}
while(it != b.addresses.end() && !b.leaders.count(*it));
bb.finalizeBlock();
successorAddresses[pc];
/* Find successors: either superblock's successors at end of
superblock, or next block in the same superblock */
if(it == b.addresses.end()) {
if(b.staticTarget + 1)
successorAddresses[pc].push_back(b.staticTarget);
if(b.fallthroughTarget + 1)
successorAddresses[pc].push_back(b.fallthroughTarget);
}
else {
successorAddresses[pc].push_back(*it);
}
}
}
/* Sort blocks now before creating CFG nodes, which are index-based */
sortBasicBlocks();
/* Set block predecessors */
for(auto &[pc, succ]: successorAddresses) {
BasicBlock &bb = basicBlockByAddress(pc);
for(u32 a: succ)
bb.addSuccessor(&basicBlockByAddress(a));
}
/* Set block predecessors */
for(BasicBlock &bb: *this) {
for(BasicBlock &succ: bb.successors()) {
succ.addPredecessor(&bb);
}
}
updateFunctionSize();
return true;
}
//=== BasicBlock ===//
BasicBlock::BasicBlock(Function &function, u32 address, bool isEntryBlock):
m_function {function}, m_address {address}, m_flags {0}
{
if(isEntryBlock)
m_flags |= Flags::IsEntryBlock;
}
uint BasicBlock::blockIndex() const
{
for(uint i = 0; i < parentFunction().blockCount(); i++) {
BasicBlock const &bb = parentFunction().basicBlockByIndex(i);
if(&bb == this)
return i;
}
assert(false && "blockIndex: block not in its own parent");
__builtin_unreachable();
}
ProgramState const *BasicBlock::initialState() const
{
StaticFunctionAnalysis const *SFA = parentFunction().getAnalysis();
if(!SFA)
return nullptr;
return &SFA->blocks[blockIndex()].entry;
}
bool BasicBlock::mayStaticBranch() const
{
Instruction const *ins = terminatorInstruction();
return ins && ins->opcode().isAnyStaticJump();
}
bool BasicBlock::mustStaticBranch() const
{
Instruction const *ins = terminatorInstruction();
return ins && ins->opcode().isUnconditionalJump();
}
u32 BasicBlock::staticBranchTarget() const
{
Instruction const *ins = terminatorInstruction();
if(!ins || !ins->opcode().isAnyStaticJump())
return 0xffffffff;
return ins->opcode().getPCRelativeTarget(ins->address());
}
bool BasicBlock::mayFallthrough() const
{
Instruction const *ins = terminatorInstruction();
return !ins || ins->opcode().isConditionalJump();
}
bool BasicBlock::mustDynamicBranch() const
{
Instruction const *ins = terminatorInstruction();
return ins && ins->opcode().isDynamicJump();
}
void BasicBlock::addInstruction(Instruction &&insn)
{
insn.setBlockContext(this->blockIndex(), m_instructions.size());
m_instructions.push_back(std::move(insn));
}
void BasicBlock::finalizeBlock()
{
/* Ensure a bunch of invariants. */
/* Instruction must be sequential. */
u32 pc = this->address();
for(Instruction &insn: instructionsInAddressOrder()) {
assert(insn.address() == pc && "non-sequential instructions in bb");
pc += insn.encodingSize();
}
/* The block must have no more than one terminator. */
Instruction *term = nullptr;
for(Instruction &insn: instructionsInAddressOrder()) {
bool isReturn = insn.opcode().isBlockTerminator();
assert(!(term && isReturn) && "bb with multiple terminators");
if(isReturn)
term = &insn;
}
/* The block must have a delay slot iff the terminator has one. */
bool hasDelaySlot = false;
if(term) {
hasDelaySlot = term->opcode().hasDelaySlot();
assert(
term->indexInBlock() == this->instructionCount() - hasDelaySlot - 1
&& "incorrectly placed bb terminator");
}
/* Set structural flags. */
if(hasDelaySlot)
m_flags |= Flags::HasDelaySlot;
if(!term)
m_flags |= Flags::NoTerminator;
if(term && (term->opcode().isReturn() || term->opcode().isDynamicJump()))
m_flags |= Flags::IsTerminator;
/* TODO: Check that insns with delay slots are valid/not in last place. */
for(uint i = 0; i < instructionCount(); i++) {
if(instructionAtIndex(i).opcode().hasDelaySlot()) {
Instruction &DSI = instructionAtIndex(i + 1);
DSI.setFlags(DSI.flags() | Instruction::Flags::InDelaySlot);
}
}
}
void BasicBlock::addSuccessor(BasicBlock *succ)
{
assert(&succ->parentFunction() == &parentFunction());
m_successors.push_back(succ->blockIndex());
}
void BasicBlock::addPredecessor(BasicBlock *pred)
{
assert(&pred->parentFunction() == &parentFunction());
m_predecessors.push_back(pred->blockIndex());
}
//=== Instruction ===//
Instruction::Instruction(Function &function, u32 address, u32 opcode):
m_function {function}, m_address {address}, m_opcode {opcode}
{
/* Start with no flags; they will be set as needed */
m_flags = 0;
}
ProgramStateDiff const *Instruction::stateDiff() const
{
StaticFunctionAnalysis const *SFA = parentFunction().getAnalysis();
if(!SFA)
return nullptr;
return &SFA->blocks[m_blockIndex].diffs[m_insnIndex];
}
void Instruction::setBlockContext(uint blockIndex, uint insnIndex)
{
m_blockIndex = blockIndex;
m_insnIndex = insnIndex;
}
} /* namespace FxOS */