fxos/include/fxos/lang.h

412 lines
12 KiB
C++

//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/lang: Assembler language syntax
//
// This file defines the syntactic tools needed to read and manipulate
// assembler instructions.
//
// The CpuRegister class is a glorified type-safe enumeration. Registers can be
// named, fi. CpuRegister::R0; they can be constructed from their lowercase
// name as a string, fi. CpuRegister("r0"); and they can be printed with the
// .str() method.
//
// The Argument struct represents an argument to an instruction. This is
// syntactic only; for instance Deref (@rn) does not mean that memory is
// accessed, since [jmp @rn] or [ocbwb @rn] do not actually access @rn.
// Constructor functions such as Argument_Deref() are provided.
//
// Finally, the Instruction struct represents an abstract instruction out of
// context. Each Instruction object only models one particular instance of one
// particular instruction, for instance [mov #14, r2] and not [mov #imm, rn].
// The rationale for this is disassembly speed and a number of simplifications
// for passes; and there are less than 65'000 non-DSP instructions anyway.
//---
#ifndef FXOS_LANG_H
#define FXOS_LANG_H
#include <fxos/util/types.h>
#include <cassert>
#include <ranges>
#include <string>
#include <array>
namespace FxOS {
/* CPU register names, with a little meat for conversion to and from string */
class CpuRegister
{
public:
// clang-format off
enum CpuRegisterName: i8 {
/* Value 0 is reserved for special purposes such as "no register" */
UNDEFINED = 0,
/* Caller-saved general-purpose registers */
R0, R1, R2, R3, R4, R5, R6, R7,
/* Banked general-purpose registers. fxos does not account for
banking identities, these are just for naming and output. */
R0B, R1B, R2B, R3B, R4B, R5B, R6B, R7B,
/* Callee-saved general-purpose registers */
R8, R9, R10, R11, R12, R13, R14, R15,
/* System registers */
MACH, MACL, PR, PC,
/* Control registers */
SR, SSR, SPC, GBR, VBR, DBR, SGR,
};
// clang-format on
CpuRegister() = default;
/* Construction from CpuRegisterName */
constexpr CpuRegister(CpuRegisterName name): m_name(name)
{
}
/* Construction from string */
CpuRegister(std::string register_name);
/* Conversion to string */
std::string str() const noexcept;
/* Conversion to CpuRegisterName for switch statements */
constexpr operator CpuRegisterName() noexcept
{
return m_name;
}
/* Comparison operators */
constexpr bool operator==(CpuRegister r) const
{
return m_name == r.m_name;
}
constexpr bool operator!=(CpuRegister r) const
{
return m_name != r.m_name;
}
/* Get the register number for r0 ... r15, -1 for other registers. */
int getRn() const;
/* Make an r0 ... r15 register name from its number. */
static CpuRegister makeRn(int n);
private:
CpuRegisterName m_name;
};
/* Operands to raw assembler instructions */
struct AsmOperand
{
enum Kind : i8 {
Reg, /* rn */
Deref, /* @rn */
PostInc, /* @rn+ */
PreDec, /* @-rn */
StructDeref, /* @(disp,rn) or @(disp,gbr) */
ArrayDeref, /* @(r0,rn) or @(r0,gbr) */
PcRel, /* @(disp,pc) with 4-alignment correction */
PcJump, /* pc+disp */
PcAddr, /* pc+disp (the address itself, for mova) */
Imm, /* #imm */
};
/* Default constructor gives register r0. */
AsmOperand();
/* Factory functions */
static AsmOperand mkReg(CpuRegister base)
{
return AsmOperand(Reg, base, 0);
}
static AsmOperand mkDeref(CpuRegister base, i8 opsize)
{
return AsmOperand(Deref, base, opsize);
}
static AsmOperand mkPostInc(CpuRegister base, i8 opsize)
{
return AsmOperand(PostInc, base, opsize);
}
static AsmOperand mkPreDec(CpuRegister base, i8 opsize)
{
return AsmOperand(PreDec, base, opsize);
}
static AsmOperand mkStructDeref(int disp, int opsize, CpuRegister base)
{
return AsmOperand(disp, opsize, base);
}
static AsmOperand mkArrayDeref(
CpuRegister index, CpuRegister base, i8 opsize)
{
return AsmOperand(index, base, opsize);
}
static AsmOperand mkPcRel(int disp, int opsize)
{
return AsmOperand(PcRel, disp, opsize);
}
static AsmOperand mkPcJump(int disp)
{
return AsmOperand(PcJump, disp);
}
static AsmOperand mkPcAddr(int disp)
{
return AsmOperand(PcAddr, disp);
}
static AsmOperand mkImm(int imm)
{
return AsmOperand(Imm, imm);
}
std::string str() const;
/* Is this operand a register? */
bool isRegister() const
{
return m_kind == Reg;
}
/* Is this operand in memory? */
bool isMemory() const
{
switch(m_kind) {
case Deref:
case PostInc:
case PreDec:
case StructDeref:
case ArrayDeref:
case PcRel:
return true;
case Reg:
case PcJump:
case PcAddr:
case Imm:
return false;
}
return false;
}
/* Is this operand a constant? */
bool isConstant() const
{
switch(m_kind) {
case PcRel:
case PcJump:
case PcAddr:
case Imm:
return true;
case Reg:
case Deref:
case PostInc:
case PreDec:
case StructDeref:
case ArrayDeref:
return false;
}
return false;
}
// TODO: RelConst modeling the memory address being being used (first
// without current state, then with it, for r0/rn, disp/pc, and r0/gbr)
Kind kind() const
{
return m_kind;
}
CpuRegister base() const
{
assert(m_kind != Imm && "invalid AsmOperand accessor: base");
return m_base;
}
CpuRegister index() const
{
assert(m_kind == ArrayDeref && "invalid AsmOperand accessor: index");
return m_index;
}
int opsize() const
{
return m_opsize;
}
int disp() const
{
assert((m_kind == StructDeref || m_kind == PcRel || m_kind == PcJump
|| m_kind == PcAddr)
&& "invalid AsmOperand accessor: disp");
return m_disp_imm;
}
int imm() const
{
assert(m_kind == Imm && "invalid AsmOperand accessor: imm");
return m_disp_imm;
}
/* Is this operand computed using a PC-relative address? */
bool usesPCRelativeAddressing() const
{
return m_kind == PcRel || m_kind == PcJump || m_kind == PcAddr;
}
/* Get the PC-relative target, assuming the instruction is at the provided
address, for arguments with PC-relative adressing. */
u32 getPCRelativeTarget(u32 pc) const;
private:
AsmOperand(Kind kind, CpuRegister base, i8 opsize);
AsmOperand(int disp, i8 opsize, CpuRegister base);
AsmOperand(CpuRegister index, CpuRegister base, i8 opsize);
AsmOperand(Kind kind, int disp_imm, i8 opsize = 0);
Kind m_kind;
/* Base register. Valid for all modes except Imm */
CpuRegister m_base;
/* Index register. Valid for ArrayDeref */
CpuRegister m_index;
/* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */
i8 m_opsize;
/* Displacement in bytes for StructDeref, PcRel, PcJump, and PcAddr, or
immediate value for Imm. */
int m_disp_imm;
};
/* Assembler instruction */
struct AsmInstruction
{
enum Tag {
IsReturn = 0x01,
IsUnconditionalJump = 0x02,
IsConditionalJump = 0x04,
IsCall = 0x08,
HasDelaySlot = 0x10,
IsInvalidDelaySlot = 0x20,
IsDynamicJump = 0x40,
};
#define GENDEFS_INSN(NAME, STR) SH_##NAME,
enum {
#include "gendefs/insn.h"
SH_MAX,
};
#undef GENDEFS_INSN
static_assert(SH_MAX <= 0xff);
AsmInstruction(u32 encoding, char const *mnemonic, int tags, int opCount,
AsmOperand op1 = {}, AsmOperand op2 = {});
/* Operation code, one of AsmInstruction::SH_* (eg. SH_mov) */
uint operation() const
{
return m_operation;
}
/* Access to operands */
int operandCount() const
{
return m_opCount;
}
AsmOperand const &operand(int i) const
{
assert((uint)i < m_opCount && "operand out-of-bounds");
return m_ops[i];
}
auto operands() const // -> [AsmOperand const &]
{
return std::views::take(m_ops, m_opCount);
}
/* Size indication (purely syntactic), 0 if not present */
int opsize() const
{
return m_opsize;
}
/* Original encoding */
u32 encoding() const
{
return m_encoding;
}
/* Mnemonic (with the size indicator, eg. "mov.l") */
std::string mnemonic() const;
/* Operation name (without the size indicator, eg. "mov") */
char const *operationString() const;
/* Size indicator to go after the op name (".b", ".w", ".l" or empty) */
char const *operationSizeString() const;
//=== Instruction classes ===//
/* Whether the instruction terminates the function it's in. */
bool isReturn() const
{
return (m_tags & Tag::IsReturn) != 0;
}
/* Whether the instruction is a conditional/unconditional static jump. */
bool isConditionalJump() const
{
return (m_tags & Tag::IsConditionalJump) != 0;
}
bool isUnconditionalJump() const
{
return (m_tags & Tag::IsUnconditionalJump) != 0;
}
bool isAnyStaticJump() const
{
int IsJump = Tag::IsConditionalJump | Tag::IsUnconditionalJump;
return (m_tags & IsJump) != 0;
}
/* Whether the instruction jumps to a dynamic target. This does not include
*calls* to dynamic targets. These jumps are always unconditional. */
bool isDynamicJump() const
{
return (m_tags & Tag::IsDynamicJump) != 0;
}
/* Whether the instruction is a function call. */
bool isCall() const
{
return (m_tags & Tag::IsCall) != 0;
}
/* Whether the instruction has a delay slot */
bool hasDelaySlot() const
{
return (m_tags & Tag::HasDelaySlot) != 0;
}
/* Wheher the instruction terminates its basic block. */
bool isBlockTerminator() const
{
return isAnyStaticJump() || isDynamicJump() || isReturn();
}
/* Whether the instruction can be used in a delay slot. */
bool isValidDelaySlot() const
{
return !isBlockTerminator() && !hasDelaySlot()
&& (m_tags & Tag::IsInvalidDelaySlot) == 0;
}
//=== Instruction info ===//
/* Get the PC-relative target, assuming the instruction is at the provided
address, for instructions with PC-relative offsets. */
u32 getPCRelativeTarget(u32 pc) const;
private:
/* Original encoding */
u32 m_encoding;
/* Assembler instruction name (mov, add, etc), without size modifier. */
u8 m_operation;
/* Operation size (0, 1, 2 or 4) */
i8 m_opsize;
/* Number of operands */
u8 m_opCount;
/* Instruction tags */
u16 m_tags;
/* Operands */
std::array<AsmOperand, 2> m_ops;
};
} /* namespace FxOS */
#endif /* FXOS_LANG_H */