fxos/include/fxos/function.h

438 lines
11 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/function: Functions and their component blocks and instructions
#ifndef FXOS_FUNCTION_H
#define FXOS_FUNCTION_H
#include <fxos/util/types.h>
#include <fxos/binary.h>
#include <fxos/lang.h>
#include <array>
#include <vector>
#include <optional>
#include <cassert>
namespace FxOS {
class Function;
class BasicBlock;
class Instruction;
// TODO: move this extern declaration of FxOS::insmap
extern std::array<std::optional<AsmInstruction>, 65536> insmap;
/* Binary object representing a function. */
struct Function: public BinaryObject
{
Function(Binary &binary, u32 address);
/* Number of basic blocks. */
uint blockCount() const
{
return m_blocks.size();
}
/* Get basic block by its index. */
BasicBlock &basicBlockByIndex(uint index)
{
assert(index < blockCount() && "out-of-bounds block number");
return m_blocks[index];
}
BasicBlock const &basicBlockByIndex(uint index) const
{
assert(index < blockCount() && "out-of-bounds block number");
return m_blocks[index];
}
/* Get the entry block. */
BasicBlock &entryBlock()
{
return basicBlockByIndex(0);
}
BasicBlock const &entryBlock() const
{
return basicBlockByIndex(0);
}
/* Iterators over basic blocks. */
auto const begin() const
{
return m_blocks.begin();
}
auto begin()
{
return m_blocks.begin();
}
auto const end() const
{
return m_blocks.end();
}
auto end()
{
return m_blocks.end();
}
/* Construction functions to be used only by the cfg pass. */
void exploreFunctionAt(u32 address);
void addBasicBlock(BasicBlock &&bb);
void updateFunctionSize();
private:
/* List of basic blocks (entry block is always number 0) */
std::vector<BasicBlock> m_blocks;
};
/* Basic block within a function. */
struct BasicBlock
{
enum Flags {
IsEntryBlock = 0x01,
IsTerminator = 0x02,
HasDelaySlot = 0x04,
NoTerminator = 0x08,
Last,
ValidFlags = (Last - 2) * 2 + 1,
};
// Basic blocks can exit in four ways:
// 1. Fall through
// 2. Jump to static destination
// 3. Jump to dynamic destination
// 4. Function return
// A given block might have multiple options (typically 1/2)
BasicBlock(Function &function, u32 address, bool isEntryBlock);
/* Block's address (address of first instruction). */
u32 address() const
{
return m_address;
}
/* Number of instructions. */
uint instructionCount() const
{
return m_instructions.size();
}
/* Binary and function that own the basic block. */
Binary &parentBinary()
{
return m_function.parentBinary();
}
Binary const &parentBinary() const
{
return m_function.parentBinary();
}
Function &parentFunction()
{
return m_function;
}
Function const &parentFunction() const
{
return m_function;
}
/* Block's index within function. */
uint blockIndex() const;
/* Instruction at a given index in the block (index < size()). */
Instruction &instructionAtIndex(uint index)
{
assert(index < instructionCount()
&& "out-of-bounds access to basic block");
return m_instructions[index];
}
Instruction const &instructionAtIndex(uint index) const
{
assert(index < instructionCount()
&& "out-of-bounds access to basic block");
return m_instructions[index];
}
/* Terminator instruction. */
Instruction *terminatorInstruction()
{
return hasNoTerminator()
? nullptr
: &m_instructions[instructionCount() - hasDelaySlot() - 1];
}
Instruction const *terminatorInstruction() const
{
return hasNoTerminator()
? nullptr
: &m_instructions[instructionCount() - hasDelaySlot() - 1];
}
/* Instruction in terminator's delay slot. */
Instruction *delaySlotInstruction()
{
return hasDelaySlot() ? &m_instructions[instructionCount() - 1]
: nullptr;
}
Instruction const *delaySlotInstruction() const
{
return hasDelaySlot() ? &m_instructions[instructionCount() - 1]
: nullptr;
}
/* Iterators over instructions. */
auto const begin() const
{
return m_instructions.begin();
}
auto begin()
{
return m_instructions.begin();
}
auto const end() const
{
return m_instructions.end();
}
auto end()
{
return m_instructions.end();
}
auto const rbegin() const
{
return m_instructions.rbegin();
}
auto rbegin()
{
return m_instructions.rbegin();
}
auto const rend() const
{
return m_instructions.rend();
}
auto rend()
{
return m_instructions.rend();
}
/* Functions for checking and setting flags */
u32 getFlags() const
{
return m_flags;
}
void setFlags(u32 flags)
{
assert(!(flags & ~Flags::ValidFlags)
&& "setting invalid basic block flags");
m_flags = flags;
}
bool isEntryBlock() const
{
return (m_flags & Flags::IsEntryBlock) != 0;
}
bool isTerminator() const
{
return (m_flags & Flags::IsTerminator) != 0;
}
bool hasDelaySlot() const
{
return (m_flags & Flags::HasDelaySlot) != 0;
}
bool hasNoTerminator() const
{
return (m_flags & Flags::NoTerminator) != 0;
}
/* Block exit information. */
/* Whether the block might fall through (conditional or no jump). */
bool mayFallthrough() const;
/* Whether the block always falls through. */
bool mustFallthrough() const
{
return hasNoTerminator();
}
/* Whether the block has a statically-known jump target. The jump might be
conditional, so this doesn't guarantee the target will be followed. */
bool hasStaticTarget() const;
/* Get said target, -1 if there is none. */
u32 staticTarget() const;
/* Whether the block ends with a dynamically-known jump target. In SuperH
none of these are conditional so that makes it the only option. */
bool hasDynamicTarget() const;
/* CFG navigation. */
std::vector<BasicBlock *> const &successors()
{
return m_successors;
}
std::vector<BasicBlock const *> successors() const
{
std::vector<BasicBlock const *> succ(m_successors.size());
for(auto *bb: m_successors)
succ.push_back(bb);
return succ;
}
std::vector<BasicBlock *> const &predecessors()
{
return m_predecessors;
}
std::vector<BasicBlock const *> predecessors() const
{
std::vector<BasicBlock const *> pred(m_predecessors.size());
for(auto *bb: m_predecessors)
pred.push_back(bb);
return pred;
}
uint successorCount() const
{
return m_successors.size();
}
uint predecessorCount() const
{
return m_predecessors.size();
}
/* Construction functions to be used only by the cfg pass. */
void addInstruction(Instruction &&insn);
void finalizeBlock();
// TODO: Set successors and predecessors
private:
Function &m_function;
std::vector<Instruction> m_instructions;
/* TODO: More compact storage for CFG edges, especially successors (≤ 2) */
std::vector<BasicBlock *> m_successors;
std::vector<BasicBlock *> m_predecessors;
u32 m_address;
u32 m_flags;
};
/* Concrete instruction in a basic block. This class only contains a minimal
amount of data, and most analysis results provided by its methods are
instead queried from the appropriate Binary. */
struct Instruction
{
enum Flags {
InDelaySlot = 0x01,
Last,
ValidFlags = (Last - 2) * 2 + 1,
};
Instruction(Function &function, u32 address, u32 opcode);
// TODO: Rename AsmInstruction -> Opcode
// TODO: Get opcode from Instruction
AsmInstruction const &opcode() const
{
assert(insmap[m_opcode] && "use of Instruction with invalid opcode");
return *insmap[m_opcode];
}
/* Instruction's size in bytes. */
uint size() const
{
return (m_opcode >> 16) ? 4 : 2;
}
/* Binary, function and basic block that own the instruction. */
Binary &parentBinary()
{
return m_function.parentBinary();
}
Binary const &parentBinary() const
{
return m_function.parentBinary();
}
Function &parentFunction()
{
return m_function;
}
Function const &parentFunction() const
{
return m_function;
}
BasicBlock &parentBlock()
{
return m_function.basicBlockByIndex(m_blockIndex);
}
BasicBlock const &parentBlock() const
{
return m_function.basicBlockByIndex(m_blockIndex);
}
/* Instruction's address. */
u32 address() const
{
return m_address;
}
/* Index of instruction within its basic block. */
uint indexInBlock() const
{
return m_insnIndex;
}
/* Whether this instruction is the first instruction in its block. */
bool isFirstInBlock() const
{
return m_insnIndex == 0;
}
/* Whether this instruction is the last in its block. This does *not* imply
that it's a jump, because delay slots are a thing. */
bool isLastInBlock() const
{
return (uint)m_insnIndex + 1 == parentBlock().instructionCount();
}
/* Whether this instruction is in a delay slot. Since only jumps have delay
slots, this implies isLastInBlock(). */
bool isInDelaySlot() const
{
return m_flags & Flags::InDelaySlot;
}
/* Properties about parameters. This is tailored to the SuperH ISA. */
// TODO: Extract parameter info
// - Get branch target if any, immediate if any, memory access address if
// any (harder: dynamic)...
// - All successors (+ user specifiable for dynamic cases)
// - All constants
/* Functions to access and modify flags */
u32 flags() const
{
return m_flags;
}
void setFlags(u32 flags)
{
m_flags = flags;
}
/* Construction functions to be used only by the cfg pass */
void setBlockContext(uint blockIndex, uint insnIndex);
private:
/* The following members are instantiated for every instruction mapped out
in the Binary - keep it reasonably small. */
Function &m_function;
u32 m_address;
u32 m_opcode;
u32 m_flags;
u16 m_blockIndex;
u16 m_insnIndex;
};
} /* namespace FxOS */
#endif /* FXOS_FUNCTION_H */