fxos/include/fxos/function.h

637 lines
18 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/function: Functions and their component blocks and instructions
#ifndef FXOS_FUNCTION_H
#define FXOS_FUNCTION_H
#include <fxos/util/types.h>
#include <fxos/binary.h>
#include <fxos/lang.h>
#include <array>
#include <vector>
#include <optional>
#include <cassert>
namespace FxOS {
struct Function;
struct BasicBlock;
struct Instruction;
struct StaticFunctionAnalysis;
struct ProgramState;
struct ProgramStateDiff;
// TODO: move this extern declaration of FxOS::insmap
extern std::array<std::optional<AsmInstruction>, 65536> insmap;
/* Binary object representing a function. */
struct Function: public BinaryObject
{
Function(Binary &binary, u32 address);
/* Number of basic blocks. */
uint blockCount() const
{
return m_blocks.size();
}
/* Get basic block by its index. */
BasicBlock &basicBlockByIndex(uint index)
{
assert(index < blockCount() && "out-of-bounds block number");
return m_blocks[index];
}
BasicBlock const &basicBlockByIndex(uint index) const
{
assert(index < blockCount() && "out-of-bounds block number");
return m_blocks[index];
}
/* Get basic block by its address. */
BasicBlock &basicBlockByAddress(u32 pc);
BasicBlock const &basicBlockByAddress(u32 pc) const;
/* Get the entry block. */
uint entryBlockIndex() const
{
return m_entryBlockIndex;
}
BasicBlock &entryBlock()
{
return m_blocks[m_entryBlockIndex];
}
BasicBlock const &entryBlock() const
{
return m_blocks[m_entryBlockIndex];
}
/* Iterators over basic blocks. */
auto const begin() const
{
return m_blocks.begin();
}
auto begin()
{
return m_blocks.begin();
}
auto const end() const
{
return m_blocks.end();
}
auto end()
{
return m_blocks.end();
}
/* Version number of the analysis that was run on the function. Used to
avoid re-analyzing unless there are new features. */
int analysisVersion() const
{
return m_analysisVersion;
}
/* Was this function analyzed? */
bool hasAnalysis() const
{
return static_cast<bool>(m_analysisResult);
}
/* Get analysis results if there are any. */
StaticFunctionAnalysis const *getAnalysis() const
{
return m_analysisResult.get();
}
/* Construction functions to be used only by the analysis pass. */
bool exploreFunctionAt(u32 address);
BasicBlock &createBasicBlock(u32 address, bool isEntryBlock);
void sortBasicBlocks();
void updateFunctionSize();
void setAnalysisVersion(int version);
/* Analysis execution functions (also semi-private). */
void runAnalysis();
private:
/* List of basic blocks (entry block is always number 0) */
std::vector<BasicBlock> m_blocks;
/* Analysis version */
int m_analysisVersion = 0;
/* Analysis result */
std::unique_ptr<StaticFunctionAnalysis> m_analysisResult;
/* ID of the entry block */
int m_entryBlockIndex;
};
/* Basic block within a function. */
struct BasicBlock
{
/*** General properties ***/
/* Binary and function that own the basic block. */
Binary &parentBinary()
{
return m_function.get().parentBinary();
}
Binary const &parentBinary() const
{
return m_function.get().parentBinary();
}
Function &parentFunction()
{
return m_function.get();
}
Function const &parentFunction() const
{
return m_function.get();
}
/* Block's address (address of first instruction). */
u32 address() const
{
return m_address;
}
/* Block's index within function. */
uint blockIndex() const;
/*** Access to instructions ***/
/* Number of instructions. */
uint instructionCount() const
{
return m_instructions.size();
}
/* Instruction at a given index in the block (index < instructionCount()).
This function returns instructions in increasing order of address. */
Instruction &instructionAtIndex(uint index)
{
assert(index < instructionCount()
&& "out-of-bounds access to basic block");
return m_instructions[index];
}
Instruction const &instructionAtIndex(uint index) const
{
assert(index < instructionCount()
&& "out-of-bounds access to basic block");
return m_instructions[index];
}
/* View over instructions in storage/address order */
auto instructionsInAddressOrder() // -> [Instruction &]
{
return std::views::all(m_instructions);
}
auto instructionsInAddressOrder() const // -> [Instruction const &]
{
return std::views::all(m_instructions);
}
/* View over instructions as pairs [instruction, delaySlotInstruction]. The
second member is null unless the first member has a delay slot in which
case the second member is a pointer to the instruction in that delay
slot, and that instruction never appears as the first member. */
template<typename Ins, typename Vec>
struct InsnPairIterator
{
/* This type is an input iterator. It satisfies __LegacyIterator so it
gets iterator_traits<> automatically. Ins is Instruction (maybe
const), Vec is std::vector<Instruction> (maybe const). */
InsnPairIterator(Vec *v, uint i): m_v {v}, m_i {i}
{
}
friend bool operator==(
InsnPairIterator<Ins, Vec> &left, InsnPairIterator<Ins, Vec> &right)
{
return left.m_v == right.m_v && left.m_i == right.m_i;
}
std::pair<Ins &, Ins *> operator*() const
{
Ins &ins = (*m_v)[m_i];
return {
ins, ins.opcode().hasDelaySlot() ? &(*m_v)[m_i + 1] : nullptr};
}
InsnPairIterator &operator++()
{
Ins &ins = (*m_v)[m_i];
m_i += 1 + ins.opcode().hasDelaySlot();
return *this;
}
private:
Vec *m_v;
uint m_i;
};
template<typename Ins, typename Vec>
struct InsnPairView: std::ranges::view_interface<InsnPairView<Ins, Vec>>
{
InsnPairView(Vec &v): m_v {v}
{
}
InsnPairIterator<Ins, Vec> begin() const
{
return InsnPairIterator<Ins, Vec>(&m_v, 0);
}
InsnPairIterator<Ins, Vec> end() const
{
return InsnPairIterator<Ins, Vec>(&m_v, m_v.size());
}
private:
Vec &m_v;
};
/* Input range for instructions with their delay slots. */
auto instructionsAndDelaySlots() // -> [[Instruction &, Instruction *]]
{
return InsnPairView<Instruction, std::vector<Instruction>>(
m_instructions);
}
auto instructionsAndDelaySlots() const
// -> [[Instruction const &, Instruction const *]]
{
return InsnPairView<Instruction const, std::vector<Instruction> const>(
m_instructions);
}
/* Terminator instruction. */
Instruction *terminatorInstruction()
{
bool hasDelaySlot = (m_flags & Flags::HasDelaySlot) != 0;
return hasNoTerminator()
? nullptr
: &m_instructions[instructionCount() - hasDelaySlot - 1];
}
Instruction const *terminatorInstruction() const
{
bool hasDelaySlot = (m_flags & Flags::HasDelaySlot) != 0;
return hasNoTerminator()
? nullptr
: &m_instructions[instructionCount() - hasDelaySlot - 1];
}
/*** Analysis results ***/
/* Entry state after analysis, if analysis was performed. */
ProgramState const *initialState() const;
/* Iteration over instructions in address order with static value info.
This iteration is not quite in execution order but the change of state
due to delay slot instructions is merged into the associated branch and
the delay slot is a nop-diff so the sequencing is still correct. */
template<typename It, typename State>
struct InsnStateIterator
{
/* This type is an input iterator. It satisfies __LegacyIterator so it
gets iterator_traits<> automatically. It is the iterator over the
vector of instructions. State is ProgramState. */
InsnStateIterator(It it, State const *initPS): m_it {it}
{
if(initPS)
m_PS = *initPS;
else
m_PS.setTop();
}
friend bool operator==(InsnStateIterator<It, State> &left,
InsnStateIterator<It, State> &right)
{
return left.m_it == right.m_it;
}
auto operator*() const
{
return std::make_pair(*m_it, m_PS);
}
InsnStateIterator &operator++()
{
auto *diff = (*m_it).stateDiff();
if(diff)
m_PS.applyDiff(*diff);
++m_it;
return *this;
}
private:
It m_it;
State m_PS;
};
template<typename Vec>
struct InsnStateView: std::ranges::view_interface<InsnStateView<Vec>>
{
InsnStateView(Vec &v, ProgramState const *PS): m_v {v}, m_PS {PS}
{
}
auto begin() const
{
return InsnStateIterator(m_v.begin(), m_PS);
}
auto end() const
{
/* Final state does not matter */
return InsnStateIterator(m_v.end(), m_PS);
}
private:
Vec &m_v;
ProgramState const *m_PS;
};
/* Input range for instructions with their delay slots. */
auto instructionsWithState() // -> [[Instruction &, ProgramState const &]]
{
return InsnStateView<std::vector<Instruction>>(
m_instructions, initialState());
}
auto instructionsWithState() const
// -> [[Instruction const &, ProgramState const &]]
{
return InsnStateView<std::vector<Instruction> const>(
m_instructions, initialState());
}
/*** Flags ***/
/* The following flags should be considered private, they're exposed only
for construction and debugging. Use the associated functions. */
enum Flags {
IsEntryBlock = 0x01,
IsTerminator = 0x02,
HasDelaySlot = 0x04,
NoTerminator = 0x08,
Last,
ValidFlags = (Last - 2) * 2 + 1,
};
u32 getFlags() const
{
return m_flags;
}
void setFlags(u32 flags)
{
assert(!(flags & ~Flags::ValidFlags)
&& "setting invalid basic block flags");
m_flags = flags;
}
/* Whether this block is the parent function's entry block. */
bool isEntryBlock() const
{
return (m_flags & Flags::IsEntryBlock) != 0;
}
/* Same as .mustReturn(). */
bool isTerminator() const
{
return (m_flags & Flags::IsTerminator) != 0;
}
/* Whether this block lacks a terminator. */
bool hasNoTerminator() const
{
return (m_flags & Flags::NoTerminator) != 0;
}
/*** Block ending information ***/
/* Whether the block might end with a branch to a static target. */
bool mayStaticBranch() const;
/* Whether the block always ends with a branch to a static target. */
bool mustStaticBranch() const;
/* Target of the static jump, -1 if there is none. */
u32 staticBranchTarget() const;
/* Whether the block might fall through (conditional or no jump). */
bool mayFallthrough() const;
/* Whether the block always falls through. */
bool mustFallthrough() const
{
return hasNoTerminator();
}
/* Whether the block ends with a function return. */
bool mustReturn() const
{
/* Same as terminatorInstruction()->opcode().isReturn() */
return (m_flags & Flags::IsTerminator) != 0;
}
/* Whether the block ends with a dynamically-known jump target. */
bool mustDynamicBranch() const;
/*** CFG navigation ***/
auto successors() // -> [BasicBlock &]
{
return std::views::all(m_successors)
| std::views::transform([this](int index) -> BasicBlock & {
return parentFunction().basicBlockByIndex(index);
});
}
auto successors() const // -> [BasicBlock const &]
{
return std::views::all(m_successors)
| std::views::transform([this](int index) {
return parentFunction().basicBlockByIndex(index);
});
}
std::vector<uint> const &successorsByIndex() const
{
return m_successors;
}
auto successorsByAddress() const // -> [u32]
{
return successors()
| std::views::transform([](auto bb) { return bb.address(); });
}
auto predecessors() // -> [BasicBlock &]
{
return std::views::all(m_predecessors)
| std::views::transform([this](int index) -> BasicBlock & {
return parentFunction().basicBlockByIndex(index);
});
}
auto predecessors() const // -> [BasicBlock const &]
{
return std::views::all(m_predecessors)
| std::views::transform([this](int index) {
return parentFunction().basicBlockByIndex(index);
});
}
std::vector<uint> const &predecessorsByIndex() const
{
return m_predecessors;
}
auto predecessorsByAddress() const // -> [u32]
{
return predecessors()
| std::views::transform([](auto bb) { return bb.address(); });
}
uint successorCount() const
{
return m_successors.size();
}
uint predecessorCount() const
{
return m_predecessors.size();
}
/*** Construction functions (semi-private) ***/
BasicBlock(Function &function, u32 address, bool isEntryBlock);
void addInstruction(Instruction &&insn);
void finalizeBlock();
void addSuccessor(BasicBlock *succ);
void addPredecessor(BasicBlock *pred);
private:
std::reference_wrapper<Function> m_function;
std::vector<Instruction> m_instructions;
/* TODO: More compact storage for CFG edges, especially successors (≤ 2) */
std::vector<uint> m_successors;
std::vector<uint> m_predecessors;
u32 m_address;
u32 m_flags;
};
/* Concrete instruction in a basic block. This class only contains a minimal
amount of data, and most analysis results provided by its methods are
instead queried from the appropriate Binary. */
struct Instruction
{
enum Flags {
InDelaySlot = 0x01,
Last,
ValidFlags = (Last - 2) * 2 + 1,
};
Instruction(Function &function, u32 address, u32 opcode);
// TODO: Don't use the word "opcode", maybe "base"
// TODO: Get opcode from Instruction
AsmInstruction const &opcode() const
{
assert(insmap[m_opcode] && "use of Instruction with invalid opcode");
return *insmap[m_opcode];
}
bool hasValidOpcode() const
{
return insmap[m_opcode].has_value();
}
/* Instruction's size in bytes. */
uint encodingSize() const
{
return (m_opcode >> 16) ? 4 : 2;
}
/* Binary, function and basic block that own the instruction. */
Binary &parentBinary()
{
return m_function.parentBinary();
}
Binary const &parentBinary() const
{
return m_function.parentBinary();
}
Function &parentFunction()
{
return m_function;
}
Function const &parentFunction() const
{
return m_function;
}
BasicBlock &parentBlock()
{
return m_function.basicBlockByIndex(m_blockIndex);
}
BasicBlock const &parentBlock() const
{
return m_function.basicBlockByIndex(m_blockIndex);
}
/* Instruction's address. */
u32 address() const
{
return m_address;
}
/* Index of instruction within its basic block. */
uint indexInBlock() const
{
return m_insnIndex;
}
/* Whether this instruction is the first instruction in its block. */
bool isFirstInBlock() const
{
return m_insnIndex == 0;
}
/* Whether this instruction is the last in its block. This does *not* imply
that it's a jump, because delay slots are a thing. */
bool isLastInBlock() const
{
return (uint)m_insnIndex + 1 == parentBlock().instructionCount();
}
/* Whether this instruction is in a delay slot. Since only jumps have delay
slots, this implies isLastInBlock(). */
bool isInDelaySlot() const
{
return m_flags & Flags::InDelaySlot;
}
/* Properties about parameters. This is tailored to the SuperH ISA. */
// TODO: Extract parameter info
// - Get branch target if any, immediate if any, memory access address if
// any (harder: dynamic)...
// - All successors (+ user specifiable for dynamic cases)
// - All constants
/* Functions to access and modify flags */
u32 flags() const
{
return m_flags;
}
void setFlags(u32 flags)
{
m_flags = flags;
}
/* Effect on program state found by analysis, if analysis was performed. */
ProgramStateDiff const *stateDiff() const;
/* Construction functions to be used only by the cfg pass */
void setBlockContext(uint blockIndex, uint insnIndex);
private:
/* The following members are instantiated for every instruction mapped out
in the Binary - keep it reasonably small. */
Function &m_function;
u32 m_address;
u32 m_opcode;
u32 m_flags;
u16 m_blockIndex;
u16 m_insnIndex;
};
} /* namespace FxOS */
#endif /* FXOS_FUNCTION_H */