//---------------------------------------------------------------------------// // 1100101 |_ mov #0, r4 __ // // 11 |_ <0xb380 %5c4> / _|_ _____ ___ // // 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // // |_ base# + offset |_| /_\_\___/__/ // //---------------------------------------------------------------------------// // fxos/lang: Assembler language syntax // // This file defines the syntactic tools needed to read and manipulate // assembler instructions. // // The CpuRegister class is a glorified type-safe enumeration. Registers can be // named, fi. CpuRegister::R0; they can be constructed from their lowercase // name as a string, fi. CpuRegister("r0"); and they can be printed with the // .str() method. // // The Argument struct represents an argument to an instruction. This is // syntactic only; for instance Deref (@rn) does not mean that memory is // accessed, since [jmp @rn] or [ocbwb @rn] do not actually access @rn. // Constructor functions such as Argument_Deref() are provided. // // Finally, the Instruction struct represents an abstract instruction out of // context. Each Instruction object only models one particular instance of one // particular instruction, for instance [mov #14, r2] and not [mov #imm, rn]. // The rationale for this is disassembly speed and a number of simplifications // for passes; and there are less than 65'000 non-DSP instructions anyway. //--- #ifndef FXOS_LANG_H #define FXOS_LANG_H #include #include #include #include #include namespace FxOS { /* CPU register names, with a little meat for conversion to and from string */ class CpuRegister { public: // clang-format off enum CpuRegisterName: i8 { /* Value 0 is reserved for special purposes such as "no register" */ UNDEFINED = 0, /* Caller-saved general-purpose registers */ R0, R1, R2, R3, R4, R5, R6, R7, /* Banked general-purpose registers. fxos does not account for banking identities, these are just for naming and output. */ R0B, R1B, R2B, R3B, R4B, R5B, R6B, R7B, /* Callee-saved general-purpose registers */ R8, R9, R10, R11, R12, R13, R14, R15, /* System registers */ MACH, MACL, PR, PC, /* Control registers */ SR, SSR, SPC, GBR, VBR, DBR, SGR, }; // clang-format on CpuRegister() = default; /* Construction from CpuRegisterName */ constexpr CpuRegister(CpuRegisterName name): m_name(name) { } /* Construction from string */ CpuRegister(std::string register_name); /* Conversion to string */ std::string str() const noexcept; /* Conversion to CpuRegisterName for switch statements */ constexpr operator CpuRegisterName() noexcept { return m_name; } /* Comparison operators */ constexpr bool operator==(CpuRegister r) const { return m_name == r.m_name; } constexpr bool operator!=(CpuRegister r) const { return m_name != r.m_name; } /* Get the register number for r0 ... r15, -1 for other registers. */ int getRn() const; /* Make an r0 ... r15 register name from its number. */ static CpuRegister makeRn(int n); private: CpuRegisterName m_name; }; /* Operands to raw assembler instructions */ struct AsmOperand { enum Kind : i8 { Reg, /* rn */ Deref, /* @rn */ PostInc, /* @rn+ */ PreDec, /* @-rn */ StructDeref, /* @(disp,rn) or @(disp,gbr) */ ArrayDeref, /* @(r0,rn) or @(r0,gbr) */ PcRel, /* @(disp,pc) with 4-alignment correction */ PcJump, /* pc+disp */ PcAddr, /* pc+disp (the address itself, for mova) */ Imm, /* #imm */ }; /* Default constructor gives register r0. */ AsmOperand(); /* Factory functions */ static AsmOperand mkReg(CpuRegister base) { return AsmOperand(Reg, base, 0); } static AsmOperand mkDeref(CpuRegister base, i8 opsize) { return AsmOperand(Deref, base, opsize); } static AsmOperand mkPostInc(CpuRegister base, i8 opsize) { return AsmOperand(PostInc, base, opsize); } static AsmOperand mkPreDec(CpuRegister base, i8 opsize) { return AsmOperand(PreDec, base, opsize); } static AsmOperand mkStructDeref(int disp, int opsize, CpuRegister base) { return AsmOperand(disp, opsize, base); } static AsmOperand mkArrayDeref( CpuRegister index, CpuRegister base, i8 opsize) { return AsmOperand(index, base, opsize); } static AsmOperand mkPcRel(int disp, int opsize) { return AsmOperand(PcRel, disp, opsize); } static AsmOperand mkPcJump(int disp) { return AsmOperand(PcJump, disp); } static AsmOperand mkPcAddr(int disp) { return AsmOperand(PcAddr, disp); } static AsmOperand mkImm(int imm) { return AsmOperand(Imm, imm); } std::string str() const; /* Is this operand a register? */ bool isRegister() const { return m_kind == Reg; } /* Is this operand in memory? */ bool isMemory() const { switch(m_kind) { case Deref: case PostInc: case PreDec: case StructDeref: case ArrayDeref: case PcRel: return true; case Reg: case PcJump: case PcAddr: case Imm: return false; } return false; } /* Is this operand a constant? */ bool isConstant() const { switch(m_kind) { case PcRel: case PcJump: case PcAddr: case Imm: return true; case Reg: case Deref: case PostInc: case PreDec: case StructDeref: case ArrayDeref: return false; } return false; } // TODO: RelConst modeling the memory address being being used (first // without current state, then with it, for r0/rn, disp/pc, and r0/gbr) Kind kind() const { return m_kind; } CpuRegister base() const { assert(m_kind != Imm && "invalid AsmOperand accessor: base"); return m_base; } CpuRegister index() const { assert(m_kind == ArrayDeref && "invalid AsmOperand accessor: index"); return m_index; } int opsize() const { return m_opsize; } int disp() const { assert((m_kind == StructDeref || m_kind == PcRel || m_kind == PcJump || m_kind == PcAddr) && "invalid AsmOperand accessor: disp"); return m_disp_imm; } int imm() const { assert(m_kind == Imm && "invalid AsmOperand accessor: imm"); return m_disp_imm; } /* Is this operand computed using a PC-relative address? */ bool usesPCRelativeAddressing() const { return m_kind == PcRel || m_kind == PcJump || m_kind == PcAddr; } /* Get the PC-relative target, assuming the instruction is at the provided address, for arguments with PC-relative adressing. */ u32 getPCRelativeTarget(u32 pc) const; private: AsmOperand(Kind kind, CpuRegister base, i8 opsize); AsmOperand(int disp, i8 opsize, CpuRegister base); AsmOperand(CpuRegister index, CpuRegister base, i8 opsize); AsmOperand(Kind kind, int disp_imm, i8 opsize = 0); Kind m_kind; /* Base register. Valid for all modes except Imm */ CpuRegister m_base; /* Index register. Valid for ArrayDeref */ CpuRegister m_index; /* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */ i8 m_opsize; /* Displacement in bytes for StructDeref, PcRel, PcJump, and PcAddr, or immediate value for Imm. */ int m_disp_imm; }; /* Assembler instruction */ struct AsmInstruction { enum Tag { IsReturn = 0x01, IsUnconditionalJump = 0x02, IsConditionalJump = 0x04, IsCall = 0x08, HasDelaySlot = 0x10, IsInvalidDelaySlot = 0x20, IsDynamicJump = 0x40, }; #define GENDEFS_INSN(NAME, STR) SH_##NAME, enum { #include "gendefs/insn.h" SH_MAX, }; #undef GENDEFS_INSN static_assert(SH_MAX <= 0xff); AsmInstruction(u32 encoding, char const *mnemonic, int tags, int opCount, AsmOperand op1 = {}, AsmOperand op2 = {}); /* Operation code, one of AsmInstruction::SH_* (eg. SH_mov) */ uint operation() const { return m_operation; } /* Access to operands */ int operandCount() const { return m_opCount; } AsmOperand const &operand(int i) const { assert((uint)i < m_opCount && "operand out-of-bounds"); return m_ops[i]; } auto operands() const // -> [AsmOperand const &] { return std::views::take(m_ops, m_opCount); } /* Size indication (purely syntactic), 0 if not present */ int opsize() const { return m_opsize; } /* Original encoding */ u32 encoding() const { return m_encoding; } /* Mnemonic (with the size indicator, eg. "mov.l") */ std::string mnemonic() const; /* Operation name (without the size indicator, eg. "mov") */ char const *operationString() const; /* Size indicator to go after the op name (".b", ".w", ".l" or empty) */ char const *operationSizeString() const; //=== Instruction classes ===// /* Whether the instruction terminates the function it's in. */ bool isReturn() const { return (m_tags & Tag::IsReturn) != 0; } /* Whether the instruction is a conditional/unconditional static jump. */ bool isConditionalJump() const { return (m_tags & Tag::IsConditionalJump) != 0; } bool isUnconditionalJump() const { return (m_tags & Tag::IsUnconditionalJump) != 0; } bool isAnyStaticJump() const { int IsJump = Tag::IsConditionalJump | Tag::IsUnconditionalJump; return (m_tags & IsJump) != 0; } /* Whether the instruction jumps to a dynamic target. This does not include *calls* to dynamic targets. These jumps are always unconditional. */ bool isDynamicJump() const { return (m_tags & Tag::IsDynamicJump) != 0; } /* Whether the instruction is a function call. */ bool isCall() const { return (m_tags & Tag::IsCall) != 0; } /* Whether the instruction has a delay slot */ bool hasDelaySlot() const { return (m_tags & Tag::HasDelaySlot) != 0; } /* Wheher the instruction terminates its basic block. */ bool isBlockTerminator() const { return isAnyStaticJump() || isDynamicJump() || isReturn(); } /* Whether the instruction can be used in a delay slot. */ bool isValidDelaySlot() const { return !isBlockTerminator() && !hasDelaySlot() && (m_tags & Tag::IsInvalidDelaySlot) == 0; } //=== Instruction info ===// /* Get the PC-relative target, assuming the instruction is at the provided address, for instructions with PC-relative offsets. */ u32 getPCRelativeTarget(u32 pc) const; private: /* Original encoding */ u32 m_encoding; /* Assembler instruction name (mov, add, etc), without size modifier. */ u8 m_operation; /* Operation size (0, 1, 2 or 4) */ i8 m_opsize; /* Number of operands */ u8 m_opCount; /* Instruction tags */ u16 m_tags; /* Operands */ std::array m_ops; }; } /* namespace FxOS */ #endif /* FXOS_LANG_H */