fxos/include/fxos/function.h

499 lines
13 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/function: Functions and their component blocks and instructions
#ifndef FXOS_FUNCTION_H
#define FXOS_FUNCTION_H
#include <fxos/util/types.h>
#include <fxos/binary.h>
#include <fxos/lang.h>
#include <array>
#include <vector>
#include <optional>
#include <cassert>
namespace FxOS {
class Function;
class BasicBlock;
class Instruction;
class StaticFunctionAnalysis;
// TODO: move this extern declaration of FxOS::insmap
extern std::array<std::optional<AsmInstruction>, 65536> insmap;
/* Binary object representing a function. */
struct Function: public BinaryObject
{
Function(Binary &binary, u32 address);
/* Number of basic blocks. */
uint blockCount() const
{
return m_blocks.size();
}
/* Get basic block by its index. */
BasicBlock &basicBlockByIndex(uint index)
{
assert(index < blockCount() && "out-of-bounds block number");
return m_blocks[index];
}
BasicBlock const &basicBlockByIndex(uint index) const
{
assert(index < blockCount() && "out-of-bounds block number");
return m_blocks[index];
}
/* Get basic block by its address. */
BasicBlock &basicBlockByAddress(u32 pc);
BasicBlock const &basicBlockByAddress(u32 pc) const;
/* Get the entry block. */
int entryBlockIndex() const
{
return m_entryBlockIndex;
}
BasicBlock &entryBlock()
{
return m_blocks[m_entryBlockIndex];
}
BasicBlock const &entryBlock() const
{
return m_blocks[m_entryBlockIndex];
}
/* Iterators over basic blocks. */
auto const begin() const
{
return m_blocks.begin();
}
auto begin()
{
return m_blocks.begin();
}
auto const end() const
{
return m_blocks.end();
}
auto end()
{
return m_blocks.end();
}
/* Version number of the analysis that was run on the function. Used to
avoid re-analyzing unless there are new features. */
int analysisVersion() const
{
return m_analysisVersion;
}
/* Get analysis results if there are any. */
StaticFunctionAnalysis const *getAnalysis() const
{
return m_analysisResult.get();
}
/* Construction functions to be used only by the analysis pass. */
bool exploreFunctionAt(u32 address);
BasicBlock &createBasicBlock(u32 address, bool isEntryBlock);
void sortBasicBlocks();
void updateFunctionSize();
void setAnalysisVersion(int version);
/* Analysis execution functions (also semi-private). */
void runAnalysis();
private:
/* List of basic blocks (entry block is always number 0) */
std::vector<BasicBlock> m_blocks;
/* Analysis version */
int m_analysisVersion = 0;
/* Analysis result */
std::unique_ptr<StaticFunctionAnalysis> m_analysisResult;
/* ID of the entry block */
int m_entryBlockIndex;
};
/* Basic block within a function. */
struct BasicBlock
{
enum Flags {
IsEntryBlock = 0x01,
IsTerminator = 0x02,
HasDelaySlot = 0x04,
NoTerminator = 0x08,
Last,
ValidFlags = (Last - 2) * 2 + 1,
};
// Basic blocks can exit in four ways:
// 1. Fall through
// 2. Jump to static destination
// 3. Jump to dynamic destination
// 4. Function return
// A given block might have multiple options (typically 1/2)
BasicBlock(Function &function, u32 address, bool isEntryBlock);
/* Block's address (address of first instruction). */
u32 address() const
{
return m_address;
}
/* Number of instructions. */
uint instructionCount() const
{
return m_instructions.size();
}
/* Binary and function that own the basic block. */
Binary &parentBinary()
{
return m_function.get().parentBinary();
}
Binary const &parentBinary() const
{
return m_function.get().parentBinary();
}
Function &parentFunction()
{
return m_function.get();
}
Function const &parentFunction() const
{
return m_function.get();
}
/* Block's index within function. */
uint blockIndex() const;
/* Instruction at a given index in the block (index < size()). */
Instruction &instructionAtIndex(uint index)
{
assert(index < instructionCount()
&& "out-of-bounds access to basic block");
return m_instructions[index];
}
Instruction const &instructionAtIndex(uint index) const
{
assert(index < instructionCount()
&& "out-of-bounds access to basic block");
return m_instructions[index];
}
/* Terminator instruction. */
Instruction *terminatorInstruction()
{
return hasNoTerminator()
? nullptr
: &m_instructions[instructionCount() - hasDelaySlot() - 1];
}
Instruction const *terminatorInstruction() const
{
return hasNoTerminator()
? nullptr
: &m_instructions[instructionCount() - hasDelaySlot() - 1];
}
/* Instruction in terminator's delay slot. */
Instruction *delaySlotInstruction()
{
return hasDelaySlot() ? &m_instructions[instructionCount() - 1]
: nullptr;
}
Instruction const *delaySlotInstruction() const
{
return hasDelaySlot() ? &m_instructions[instructionCount() - 1]
: nullptr;
}
/* Iterators over instructions. */
auto const begin() const
{
return m_instructions.begin();
}
auto begin()
{
return m_instructions.begin();
}
auto const end() const
{
return m_instructions.end();
}
auto end()
{
return m_instructions.end();
}
auto const rbegin() const
{
return m_instructions.rbegin();
}
auto rbegin()
{
return m_instructions.rbegin();
}
auto const rend() const
{
return m_instructions.rend();
}
auto rend()
{
return m_instructions.rend();
}
/* Functions for checking and setting flags */
u32 getFlags() const
{
return m_flags;
}
void setFlags(u32 flags)
{
assert(!(flags & ~Flags::ValidFlags)
&& "setting invalid basic block flags");
m_flags = flags;
}
bool isEntryBlock() const
{
return (m_flags & Flags::IsEntryBlock) != 0;
}
bool isTerminator() const
{
return (m_flags & Flags::IsTerminator) != 0;
}
bool hasDelaySlot() const
{
return (m_flags & Flags::HasDelaySlot) != 0;
}
bool hasNoTerminator() const
{
return (m_flags & Flags::NoTerminator) != 0;
}
/* Block exit information. */
/* Whether the block might fall through (conditional or no jump). */
bool mayFallthrough() const;
/* Whether the block always falls through. */
bool mustFallthrough() const
{
return hasNoTerminator();
}
/* Whether the block has a statically-known jump target. The jump might be
conditional, so this doesn't guarantee the target will be followed. */
bool hasStaticTarget() const;
/* Get said target, -1 if there is none. */
u32 staticTarget() const;
/* Whether the block ends with a dynamically-known jump target. In SuperH
none of these are conditional so that makes it the only option. */
bool hasDynamicTarget() const;
/* CFG navigation. */
auto successors() // -> [BasicBlock &]
{
return std::views::all(m_successors)
| std::views::transform([this](int index) -> BasicBlock & {
return parentFunction().basicBlockByIndex(index);
});
}
auto successors() const // -> [BasicBlock const &]
{
return std::views::all(m_successors)
| std::views::transform([this](int index) {
return parentFunction().basicBlockByIndex(index);
});
}
std::vector<int> const &successorsByIndex() const
{
return m_successors;
}
auto successorsByAddress() const // -> [u32]
{
return successors()
| std::views::transform([](auto bb) { return bb.address(); });
}
auto predecessors() // -> [BasicBlock &]
{
return std::views::all(m_predecessors)
| std::views::transform([this](int index) -> BasicBlock & {
return parentFunction().basicBlockByIndex(index);
});
}
auto predecessors() const // -> [BasicBlock const &]
{
return std::views::all(m_predecessors)
| std::views::transform([this](int index) {
return parentFunction().basicBlockByIndex(index);
});
}
std::vector<int> const &predecessorsByIndex() const
{
return m_predecessors;
}
auto predecessorsByAddress() const // -> [u32]
{
return predecessors()
| std::views::transform([](auto bb) { return bb.address(); });
}
uint successorCount() const
{
return m_successors.size();
}
uint predecessorCount() const
{
return m_predecessors.size();
}
/* Construction functions to be used only by the cfg pass. */
void addInstruction(Instruction &&insn);
void finalizeBlock();
void addSuccessor(BasicBlock *succ);
void addPredecessor(BasicBlock *pred);
private:
std::reference_wrapper<Function> m_function;
std::vector<Instruction> m_instructions;
/* TODO: More compact storage for CFG edges, especially successors (≤ 2) */
std::vector<int> m_successors;
std::vector<int> m_predecessors;
u32 m_address;
u32 m_flags;
};
/* Concrete instruction in a basic block. This class only contains a minimal
amount of data, and most analysis results provided by its methods are
instead queried from the appropriate Binary. */
struct Instruction
{
enum Flags {
InDelaySlot = 0x01,
Last,
ValidFlags = (Last - 2) * 2 + 1,
};
Instruction(Function &function, u32 address, u32 opcode);
// TODO: Don't use the word "opcode", maybe "base"
// TODO: Get opcode from Instruction
AsmInstruction const &opcode() const
{
assert(insmap[m_opcode] && "use of Instruction with invalid opcode");
return *insmap[m_opcode];
}
bool hasValidOpcode() const
{
return insmap[m_opcode].has_value();
}
/* Instruction's size in bytes. */
uint encodingSize() const
{
return (m_opcode >> 16) ? 4 : 2;
}
/* Binary, function and basic block that own the instruction. */
Binary &parentBinary()
{
return m_function.parentBinary();
}
Binary const &parentBinary() const
{
return m_function.parentBinary();
}
Function &parentFunction()
{
return m_function;
}
Function const &parentFunction() const
{
return m_function;
}
BasicBlock &parentBlock()
{
return m_function.basicBlockByIndex(m_blockIndex);
}
BasicBlock const &parentBlock() const
{
return m_function.basicBlockByIndex(m_blockIndex);
}
/* Instruction's address. */
u32 address() const
{
return m_address;
}
/* Index of instruction within its basic block. */
uint indexInBlock() const
{
return m_insnIndex;
}
/* Whether this instruction is the first instruction in its block. */
bool isFirstInBlock() const
{
return m_insnIndex == 0;
}
/* Whether this instruction is the last in its block. This does *not* imply
that it's a jump, because delay slots are a thing. */
bool isLastInBlock() const
{
return (uint)m_insnIndex + 1 == parentBlock().instructionCount();
}
/* Whether this instruction is in a delay slot. Since only jumps have delay
slots, this implies isLastInBlock(). */
bool isInDelaySlot() const
{
return m_flags & Flags::InDelaySlot;
}
/* Properties about parameters. This is tailored to the SuperH ISA. */
// TODO: Extract parameter info
// - Get branch target if any, immediate if any, memory access address if
// any (harder: dynamic)...
// - All successors (+ user specifiable for dynamic cases)
// - All constants
/* Functions to access and modify flags */
u32 flags() const
{
return m_flags;
}
void setFlags(u32 flags)
{
m_flags = flags;
}
/* Construction functions to be used only by the cfg pass */
void setBlockContext(uint blockIndex, uint insnIndex);
private:
/* The following members are instantiated for every instruction mapped out
in the Binary - keep it reasonably small. */
Function &m_function;
u32 m_address;
u32 m_opcode;
u32 m_flags;
u16 m_blockIndex;
u16 m_insnIndex;
};
} /* namespace FxOS */
#endif /* FXOS_FUNCTION_H */