From 255e31ab8ff3db5d4b35984fcc66f298f45fd9f6 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sun, 26 Nov 2023 21:31:51 +0100 Subject: [PATCH] fxos: make AsmArgument -> AsmOperand more analysis-friendly --- include/fxos/lang.h | 237 +++++++++++++++++++++++++++++++---------- lib/analysis.cpp | 2 +- lib/disassembly.cpp | 4 +- lib/lang.cpp | 205 ++++++++++------------------------- lib/load-asm.l | 102 ++++++++++-------- lib/passes/pcrel.cpp | 22 ++-- lib/passes/syscall.cpp | 8 +- lib/view/assembly.cpp | 63 +++++------ 8 files changed, 344 insertions(+), 299 deletions(-) diff --git a/include/fxos/lang.h b/include/fxos/lang.h index 6807b61..c8341c0 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -29,8 +29,11 @@ #ifndef FXOS_LANG_H #define FXOS_LANG_H -#include #include +#include +#include +#include +#include namespace FxOS { @@ -89,10 +92,9 @@ private: CpuRegisterName m_name; }; -/* Addressing modes for arguments */ -struct AsmArgument +/* Operands to raw assembler instructions */ +struct AsmOperand { - /* Various addressing modes in the language */ enum Kind : i8 { Reg, /* rn */ Deref, /* @rn */ @@ -106,46 +108,143 @@ struct AsmArgument Imm, /* #imm */ }; - AsmArgument() = default; + /* Constructors, mostly internal; default is r0, rest is not relevant */ + AsmOperand(Kind kind = Reg, CpuRegister base = CpuRegister::R0); + AsmOperand(int disp, i8 opsize, CpuRegister base); + AsmOperand(CpuRegister index, CpuRegister base); + AsmOperand(Kind kind, int disp_imm, i8 opsize = 0); + + /* Actually useful factory functions */ + static AsmOperand mkReg(CpuRegister base) + { + return AsmOperand(Reg, base); + } + static AsmOperand mkDeref(CpuRegister base) + { + return AsmOperand(Deref, base); + } + static AsmOperand mkPostInc(CpuRegister base) + { + return AsmOperand(PostInc, base); + } + static AsmOperand mkPreDec(CpuRegister base) + { + return AsmOperand(PreDec, base); + } + static AsmOperand mkStructDeref(int disp, int opsize, CpuRegister base) + { + return AsmOperand(disp, opsize, base); + } + static AsmOperand mkArrayDeref(CpuRegister index, CpuRegister base) + { + return AsmOperand(index, base); + } + static AsmOperand mkPcRel(int disp, int opsize) + { + return AsmOperand(PcRel, disp, opsize); + } + static AsmOperand mkPcJump(int disp) + { + return AsmOperand(PcJump, disp); + } + static AsmOperand mkPcAddr(int disp) + { + return AsmOperand(PcAddr, disp); + } + static AsmOperand mkImm(int imm) + { + return AsmOperand(Imm, imm); + } - /* String representation */ std::string str() const; - /* Addressing mode */ - Kind kind; - /* Base register. Valid for all modes except Imm */ - CpuRegister base; - /* Index register. Valid for ArrayDeref */ - CpuRegister index; - /* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */ - i8 opsize; - - union + /* Is this operand a register? */ + bool isReg() const { - /* Displacement in bytes. For StructDeref, PcRel, PcJump, and PcAddr */ - int disp; - /* Immediate value. Valid for Imm */ - int imm; - }; + return m_kind == Reg; + } + + /* Is this operand in memory? */ + bool isMemory() const + { + switch(m_kind) { + case Deref: + case PostInc: + case PreDec: + case StructDeref: + case ArrayDeref: + case PcRel: + return true; + case Reg: + case PcJump: + case PcAddr: + case Imm: + return false; + } + } + + // TODO: RelConst modeling the memory address being being used (first + // without current state, then with it, for r0/rn, disp/pc, and r0/gbr) + + Kind kind() const + { + return m_kind; + } + + CpuRegister base() const + { + assert(m_kind != Imm && "invalid AsmOperand accessor: base"); + return m_base; + } + + CpuRegister index() const + { + assert(m_kind == ArrayDeref && "invalid AsmOperand accessor: index"); + return m_index; + } + + int opsize() const + { + return m_opsize; + } + + int disp() const + { + assert((m_kind == StructDeref || m_kind == PcRel || m_kind == PcJump + || m_kind == PcAddr) + && "invalid AsmOperand accessor: disp"); + return m_disp_imm; + } + + int imm() const + { + assert(m_kind == Imm && "invalid AsmOperand accessor: imm"); + return m_disp_imm; + } + + /* Is this operand computed using a PC-relative address? */ + bool usesPCRelativeAddressing() const + { + return m_kind == PcRel || m_kind == PcJump || m_kind == PcAddr; + } /* Get the PC-relative target, assuming the instruction is at the provided address, for arguments with PC-relative offsets. */ u32 getPCRelativeTarget(u32 pc, int size) const; + +private: + Kind m_kind; + /* Base register. Valid for all modes except Imm */ + CpuRegister m_base; + /* Index register. Valid for ArrayDeref */ + CpuRegister m_index; + /* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */ + i8 m_opsize; + /* Displacement in bytes for StructDeref, PcRel, PcJump, and PcAddr, or + immediate value for Imm. */ + int m_disp_imm; }; -/* AsmArgument constructors */ - -AsmArgument AsmArgument_Reg(CpuRegister base); -AsmArgument AsmArgument_Deref(CpuRegister base); -AsmArgument AsmArgument_PostInc(CpuRegister base); -AsmArgument AsmArgument_PreDec(CpuRegister base); -AsmArgument AsmArgument_StructDeref(int disp, int opsize, CpuRegister base); -AsmArgument AsmArgument_ArrayDeref(CpuRegister index, CpuRegister base); -AsmArgument AsmArgument_PcRel(int disp, int opsize); -AsmArgument AsmArgument_PcJump(int disp); -AsmArgument AsmArgument_PcAddr(int disp); -AsmArgument AsmArgument_Imm(int imm); - /* Assembler instruction */ struct AsmInstruction { @@ -167,64 +266,74 @@ struct AsmInstruction #undef GENDEFS_INSN static_assert(SH_MAX <= 0xff); - AsmInstruction() = default; + AsmInstruction(u32 encoding, char const *mnemonic, int tags, int opCount, + AsmOperand op1 = {}, AsmOperand op2 = {}); - /* Construct with one or several arguments */ - AsmInstruction(char const *mnemonic); - AsmInstruction(char const *mnemonic, AsmArgument arg); - AsmInstruction(char const *mnemonic, AsmArgument arg1, AsmArgument arg2); - - /* Original opcode. Initialized to 0 when unset, which is an invalid - instruction by design. */ - u32 encoding; - /* Operation size (0, 1, 2 or 4) */ - i8 opsize; - /* Number of arguments */ - u8 arg_count; - /* Instruction tags */ - u16 tags; + /* Original encoding */ + u32 encoding() const + { + return m_encoding; + } /* Mnemonic **without the size indicator** */ char const *mnemonic() const; - /* Arguments (up to 2) */ - AsmArgument args[2]; + /* Size indication (purely syntactic), 0 if not present */ + int opsize() const + { + return m_opsize; + } + + /* Access to operands */ + int operandCount() const + { + return m_opCount; + } + AsmOperand const &operand(int i) const + { + assert(i >= 0 && i < m_opCount && "operand out-of-bounds"); + return m_ops[i]; + } + auto operands() const // -> [AsmOperand const &] + { + return std::views::take(m_ops, m_opCount); + } //=== Instruction classes ===// /* Whether the instruction terminates the function it's in. */ bool isReturn() const { - return (this->tags & Tag::IsReturn) != 0; + return (m_tags & Tag::IsReturn) != 0; } /* Whether the instruction is a conditional/unconditional static jump. */ bool isConditionalJump() const { - return (this->tags & Tag::IsConditionalJump) != 0; + return (m_tags & Tag::IsConditionalJump) != 0; } bool isUnconditionalJump() const { - return (this->tags & Tag::IsUnconditionalJump) != 0; + return (m_tags & Tag::IsUnconditionalJump) != 0; } bool isAnyStaticJump() const { int IsJump = Tag::IsConditionalJump | Tag::IsUnconditionalJump; - return (this->tags & IsJump) != 0; + return (m_tags & IsJump) != 0; } /* Whether the instruction jumps to a dynamic target. This does not include *calls* to dynamic targets. These jumps are always unconditional. */ bool isDynamicJump() const { - return (this->tags & Tag::IsDynamicJump) != 0; + return (m_tags & Tag::IsDynamicJump) != 0; } /* Whether the instruction is a function call. */ bool isCall() const { - return (this->tags & Tag::IsCall) != 0; + return (m_tags & Tag::IsCall) != 0; } /* Whether the instruction has a delay slot */ bool hasDelaySlot() const { - return (this->tags & Tag::HasDelaySlot) != 0; + return (m_tags & Tag::HasDelaySlot) != 0; } /* Wheher the instruction terminates its basic block. */ bool isBlockTerminator() const @@ -235,7 +344,7 @@ struct AsmInstruction bool isValidDelaySlot() const { return !isBlockTerminator() && !hasDelaySlot() - && (this->tags & Tag::IsInvalidDelaySlot) == 0; + && (m_tags & Tag::IsInvalidDelaySlot) == 0; } //=== Instruction info ===// @@ -245,8 +354,18 @@ struct AsmInstruction u32 getPCRelativeTarget(u32 pc) const; private: + /* Original encoding */ + u32 m_encoding; /* Assembler instruction name (mov, add, etc), without size modifier. */ u8 m_opcode; + /* Operation size (0, 1, 2 or 4) */ + i8 m_opsize; + /* Number of operands */ + u8 m_opCount; + /* Instruction tags */ + u16 m_tags; + /* Operands */ + std::array m_ops; }; } /* namespace FxOS */ diff --git a/lib/analysis.cpp b/lib/analysis.cpp index ea219a0..bcaedb5 100644 --- a/lib/analysis.cpp +++ b/lib/analysis.cpp @@ -109,7 +109,7 @@ static ProgramStateDiff interpretInstruction( diff.setUnknown(); // TODO: Do this properly - u16 opc = ins.opcode().encoding; + u16 opc = ins.opcode().encoding(); if((opc & 0xf000) == 0xe000) { int reg = (opc >> 8) & 0xf; int val = (int8_t)opc; diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 8c561c6..b62dfdf 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -19,7 +19,7 @@ std::array, 65536> insmap; void register_instruction(AsmInstruction const &ins) { - uint16_t encoding = ins.encoding; + uint16_t encoding = ins.encoding(); if(insmap[encoding]) FxOS_log(ERR, "encoding collision between a %s and a %s at %04x", @@ -40,7 +40,7 @@ Argument::Argument() } OldInstruction::OldInstruction(AsmInstruction const *inst): - inst {inst}, args {}, opcode {inst->encoding}, leader {false}, + inst {inst}, args {}, opcode {inst->encoding()}, leader {false}, delayslot {false}, terminal {false}, jump {false}, condjump {false}, jmptarget {0xffffffff} { diff --git a/lib/lang.cpp b/lib/lang.cpp index abaad9a..0dee85a 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -14,7 +14,7 @@ namespace FxOS { //--- -// CPU registers +// CPU registers //--- // clang-format off @@ -56,140 +56,73 @@ std::string CpuRegister::str() const noexcept } //--- -// Instruction arguments +// Instruction operands //--- -/* External constructors */ - -AsmArgument AsmArgument_Reg(CpuRegister base) +AsmOperand::AsmOperand(Kind kind, CpuRegister base): + m_kind {kind}, m_base {base} { - AsmArgument arg; - arg.kind = AsmArgument::Reg; - arg.base = base; - return arg; } -AsmArgument AsmArgument_Deref(CpuRegister base) +AsmOperand::AsmOperand(int disp, i8 opsize, CpuRegister base): + m_kind {StructDeref}, m_base {base}, m_opsize {opsize}, m_disp_imm {disp} { - AsmArgument arg; - arg.kind = AsmArgument::Deref; - arg.base = base; - return arg; } -AsmArgument AsmArgument_PostInc(CpuRegister base) +AsmOperand::AsmOperand(CpuRegister index, CpuRegister base): + m_kind {ArrayDeref}, m_base {base}, m_index {index} { - AsmArgument arg; - arg.kind = AsmArgument::PostInc; - arg.base = base; - return arg; } -AsmArgument AsmArgument_PreDec(CpuRegister base) +AsmOperand::AsmOperand(Kind kind, int disp_imm, i8 opsize): + m_kind {kind}, m_opsize {opsize}, m_disp_imm {disp_imm} { - AsmArgument arg; - arg.kind = AsmArgument::PreDec; - arg.base = base; - return arg; } -AsmArgument AsmArgument_StructDeref(int disp, int opsize, CpuRegister base) +std::string AsmOperand::str() const { - AsmArgument arg; - arg.kind = AsmArgument::StructDeref; - arg.base = base; - arg.disp = disp; - arg.opsize = opsize; - return arg; -} - -AsmArgument AsmArgument_ArrayDeref(CpuRegister index, CpuRegister base) -{ - AsmArgument arg; - arg.kind = AsmArgument::ArrayDeref; - arg.base = base; - arg.index = index; - return arg; -} - -AsmArgument AsmArgument_PcRel(int disp, int opsize) -{ - AsmArgument arg; - arg.kind = AsmArgument::PcRel; - arg.disp = disp; - arg.opsize = opsize; - return arg; -} - -AsmArgument AsmArgument_PcJump(int disp) -{ - AsmArgument arg; - arg.kind = AsmArgument::PcJump; - arg.disp = disp; - return arg; -} - -AsmArgument AsmArgument_PcAddr(int disp) -{ - AsmArgument arg; - arg.kind = AsmArgument::PcAddr; - arg.disp = disp; - return arg; -} - -AsmArgument AsmArgument_Imm(int imm) -{ - AsmArgument arg; - arg.kind = AsmArgument::Imm; - arg.imm = imm; - return arg; -} - -/* String representation */ -std::string AsmArgument::str() const -{ - switch(kind) { - case AsmArgument::Reg: - return base.str(); - case AsmArgument::Deref: - return format("@%s", base.str()); - case AsmArgument::PostInc: - return format("@%s+", base.str()); - case AsmArgument::PreDec: - return format("@-%s", base.str()); - case AsmArgument::StructDeref: - return format("@(%d,%s)", disp, base.str().c_str()); - case AsmArgument::ArrayDeref: - return format("@(%s,%s)", index.str().c_str(), base.str().c_str()); - case AsmArgument::PcRel: - return format("@(%d,pc)", disp); - case AsmArgument::PcJump: - return format("pc+%d", disp); - case AsmArgument::PcAddr: - return format("pc+%u", disp); - case AsmArgument::Imm: - return format("#%d", imm); + switch(m_kind) { + case Reg: + return m_base.str(); + case Deref: + return format("@%s", m_base.str()); + case PostInc: + return format("@%s+", m_base.str()); + case PreDec: + return format("@-%s", m_base.str()); + case StructDeref: + return format("@(%d,%s)", m_disp_imm, m_base.str().c_str()); + case ArrayDeref: + return format("@(%s,%s)", m_index.str().c_str(), m_base.str().c_str()); + case PcRel: + return format("@(%d,pc)", m_disp_imm); + case PcJump: + return format("pc+%d", m_disp_imm); + case PcAddr: + return format("pc+%u", m_disp_imm); + case Imm: + return format("#%d", m_disp_imm); default: return "(invalid)"; } } -u32 AsmArgument::getPCRelativeTarget(u32 pc, int size) const +u32 AsmOperand::getPCRelativeTarget(u32 pc, int size) const { size = size + (size == 0); - if(this->kind == AsmArgument::PcRel) - return (pc & -size) + 4 + this->disp; - if(this->kind == AsmArgument::PcJump) - return pc + 4 + this->disp; - if(this->kind == AsmArgument::PcAddr) - return (pc & -4) + 4 + this->disp; + if(m_kind == AsmOperand::PcRel) + return (pc & -size) + 4 + m_disp_imm; + if(m_kind == AsmOperand::PcJump) + return pc + 4 + m_disp_imm; + if(m_kind == AsmOperand::PcAddr) + return (pc & -4) + 4 + m_disp_imm; /* SH3 manual says that mova uses the target address of the jump when in a delay slot. SH4AL-DSP makes it invalid. Supporting this would be very tricky since the target PC is often dynamic (eg. rts). */ - return 0xffffffff; + return -1; } //--- @@ -202,29 +135,26 @@ static char const *instructionMnemonics[] = { #undef GENDEFS_INSN }; -AsmInstruction::AsmInstruction(char const *mn): - encoding {0}, opsize {0}, arg_count {0} +AsmInstruction::AsmInstruction(u32 encoding, char const *mnemonic, int tags, + int opCount, AsmOperand op1, AsmOperand op2): + m_encoding {encoding}, + m_opsize {0}, m_opCount {(u8)opCount}, m_tags {(u16)tags}, m_ops {op1, op2} { - int len = strlen(mn); - int pos = std::max(0, len - 2); + std::string mn {mnemonic}; - if(!strncmp(mn + pos, ".b", 2)) { - opsize = 1; - len -= 2; - } - else if(!strncmp(mn + pos, ".w", 2)) { - opsize = 2; - len -= 2; - } - else if(!strncmp(mn + pos, ".l", 2)) { - opsize = 4; - len -= 2; - } + if(mn.ends_with(".b")) + m_opsize = 1; + else if(mn.ends_with(".w")) + m_opsize = 2; + else if(mn.ends_with(".l")) + m_opsize = 4; + + if(m_opsize != 0) + mn = mn.substr(0, mn.size() - 2); int i; for(i = 0; i < SH_MAX; i++) { - if(!strncmp(mn, instructionMnemonics[i], len) - && !instructionMnemonics[i][len]) { + if(mn == instructionMnemonics[i]) { m_opcode = i; break; } @@ -232,22 +162,6 @@ AsmInstruction::AsmInstruction(char const *mn): assert(i < SH_MAX && "AsmInstruction with unknown opcode string"); } -AsmInstruction::AsmInstruction(char const *mn, AsmArgument arg): - AsmInstruction(mn) -{ - args[0] = arg; - arg_count = 1; -} - -AsmInstruction::AsmInstruction( - char const *mn, AsmArgument arg1, AsmArgument arg2): - AsmInstruction(mn) -{ - args[0] = arg1; - args[1] = arg2; - arg_count = 2; -} - char const *AsmInstruction::mnemonic() const { assert(m_opcode < SH_MAX); @@ -256,12 +170,11 @@ char const *AsmInstruction::mnemonic() const u32 AsmInstruction::getPCRelativeTarget(u32 pc) const { - for(int i = 0; i < this->arg_count; i++) { - u32 target = this->args[i].getPCRelativeTarget(pc, this->opsize); - if(target != 0xffffffff) - return target; + for(AsmOperand const &op: operands()) { + if(op.usesPCRelativeAddressing()) + return op.getPCRelativeTarget(pc, m_opsize); } - return 0xffffffff; + return -1; } } /* namespace FxOS */ diff --git a/lib/load-asm.l b/lib/load-asm.l index b480ca2..2005c81 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -177,7 +177,7 @@ static Pattern make_pattern(char const *code) @opsize Operation size indicated in the mnemonic @m @n @d @i Instruction instance Returns a semantic FxOS::Argument. */ -static AsmArgument make_arg(int token, int opsize, int m, int n, int d, int i) +static AsmOperand make_arg(int token, int opsize, int m, int n, int d, int i) { using Reg = CpuRegister; static Reg general_purpose[16] = { @@ -196,51 +196,51 @@ static AsmArgument make_arg(int token, int opsize, int m, int n, int d, int i) int32_t i8 = (int8_t)i; switch(token) { - case R0: return AsmArgument_Reg(Reg::R0); - case RN: return AsmArgument_Reg(Rn); - case RM: return AsmArgument_Reg(Rm); - case R0_BANK: return AsmArgument_Reg(Reg::R0B); - case R1_BANK: return AsmArgument_Reg(Reg::R1B); - case R2_BANK: return AsmArgument_Reg(Reg::R2B); - case R3_BANK: return AsmArgument_Reg(Reg::R3B); - case R4_BANK: return AsmArgument_Reg(Reg::R4B); - case R5_BANK: return AsmArgument_Reg(Reg::R5B); - case R6_BANK: return AsmArgument_Reg(Reg::R6B); - case R7_BANK: return AsmArgument_Reg(Reg::R7B); - case SR: return AsmArgument_Reg(Reg::SR); - case PR: return AsmArgument_Reg(Reg::PR); - case GBR: return AsmArgument_Reg(Reg::GBR); - case VBR: return AsmArgument_Reg(Reg::VBR); - case DBR: return AsmArgument_Reg(Reg::DBR); - case SSR: return AsmArgument_Reg(Reg::SSR); - case SPC: return AsmArgument_Reg(Reg::SPC); - case SGR: return AsmArgument_Reg(Reg::SGR); - case MACH: return AsmArgument_Reg(Reg::MACH); - case MACL: return AsmArgument_Reg(Reg::MACL); - case JUMP8: return AsmArgument_PcJump(d8 * 2); - case JUMP12: return AsmArgument_PcJump(d12 * 2); - case DPC: return AsmArgument_PcAddr(d * 4); - case IMM: return AsmArgument_Imm(i8); - case AT_RN: return AsmArgument_Deref(Rn); - case AT_RM: return AsmArgument_Deref(Rm); - case AT_RMP: return AsmArgument_PostInc(Rm); - case AT_RNP: return AsmArgument_PostInc(Rn); - case AT_MRN: return AsmArgument_PreDec(Rn); - case AT_DRN: return AsmArgument_StructDeref(d*opsize, opsize, Rn); - case AT_DRM: return AsmArgument_StructDeref(d*opsize, opsize, Rm); - case AT_DGBR: return AsmArgument_StructDeref(d*opsize, opsize, Reg::GBR); - case AT_R0RN: return AsmArgument_ArrayDeref(Reg::R0, Rn); - case AT_R0RM: return AsmArgument_ArrayDeref(Reg::R0, Rm); - case AT_R0GBR: return AsmArgument_ArrayDeref(Reg::R0, Reg::GBR); + case R0: return AsmOperand::mkReg(Reg::R0); + case RN: return AsmOperand::mkReg(Rn); + case RM: return AsmOperand::mkReg(Rm); + case R0_BANK: return AsmOperand::mkReg(Reg::R0B); + case R1_BANK: return AsmOperand::mkReg(Reg::R1B); + case R2_BANK: return AsmOperand::mkReg(Reg::R2B); + case R3_BANK: return AsmOperand::mkReg(Reg::R3B); + case R4_BANK: return AsmOperand::mkReg(Reg::R4B); + case R5_BANK: return AsmOperand::mkReg(Reg::R5B); + case R6_BANK: return AsmOperand::mkReg(Reg::R6B); + case R7_BANK: return AsmOperand::mkReg(Reg::R7B); + case SR: return AsmOperand::mkReg(Reg::SR); + case PR: return AsmOperand::mkReg(Reg::PR); + case GBR: return AsmOperand::mkReg(Reg::GBR); + case VBR: return AsmOperand::mkReg(Reg::VBR); + case DBR: return AsmOperand::mkReg(Reg::DBR); + case SSR: return AsmOperand::mkReg(Reg::SSR); + case SPC: return AsmOperand::mkReg(Reg::SPC); + case SGR: return AsmOperand::mkReg(Reg::SGR); + case MACH: return AsmOperand::mkReg(Reg::MACH); + case MACL: return AsmOperand::mkReg(Reg::MACL); + case JUMP8: return AsmOperand::mkPcJump(d8 * 2); + case JUMP12: return AsmOperand::mkPcJump(d12 * 2); + case DPC: return AsmOperand::mkPcAddr(d * 4); + case IMM: return AsmOperand::mkImm(i8); + case AT_RN: return AsmOperand::mkDeref(Rn); + case AT_RM: return AsmOperand::mkDeref(Rm); + case AT_RMP: return AsmOperand::mkPostInc(Rm); + case AT_RNP: return AsmOperand::mkPostInc(Rn); + case AT_MRN: return AsmOperand::mkPreDec(Rn); + case AT_DRN: return AsmOperand::mkStructDeref(d*opsize, opsize, Rn); + case AT_DRM: return AsmOperand::mkStructDeref(d*opsize, opsize, Rm); + case AT_DGBR: return AsmOperand::mkStructDeref(d*opsize, opsize, Reg::GBR); + case AT_R0RN: return AsmOperand::mkArrayDeref(Reg::R0, Rn); + case AT_R0RM: return AsmOperand::mkArrayDeref(Reg::R0, Rm); + case AT_R0GBR: return AsmOperand::mkArrayDeref(Reg::R0, Reg::GBR); case AT_DPC: if(!opsize) err("@(disp,pc) must have a size (.w, .l)"); - return AsmArgument_PcRel(d*opsize, opsize); + return AsmOperand::mkPcRel(d*opsize, opsize); } FxOS_log(ERR, "bad argument token: %d", token); - return AsmArgument_Reg(Reg::UNDEFINED); + return AsmOperand::mkReg(Reg::UNDEFINED); } /* Record all the instances of an instruction in the disassembly table. @@ -261,25 +261,35 @@ static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1, for(int m = 0; m < (1 << p.m_size); m++) for(int d = 0; d < (1 << p.d_size); d++) for(int i = 0; i < (1 << p.i_size); i++) { + // TODO: Support 32-bit instructions uint16_t opcode = p.bits; opcode |= (n << p.n_sh); opcode |= (m << p.m_sh); opcode |= (d << p.d_sh); opcode |= (i << p.i_sh); - AsmInstruction ins(mnemonic); - ins.encoding = opcode; - ins.tags = insntags; + AsmOperand op1, op2; + int opCount = 0; + + int opsize = 0; + std::string mn {mnemonic}; + if(mn.ends_with(".b")) + opsize = 1; + else if(mn.ends_with(".w")) + opsize = 2; + else if(mn.ends_with(".l")) + opsize = 4; if(argtoken1) { - ins.args[0] = make_arg(argtoken1, ins.opsize, m,n,d,i); - ins.arg_count = 1; + op1 = make_arg(argtoken1, opsize, m, n, d, i); + opCount++; } if(argtoken2) { - ins.args[1] = make_arg(argtoken2, ins.opsize, m,n,d,i); - ins.arg_count = 2; + op2 = make_arg(argtoken2, opsize, m, n, d, i); + opCount++; } + AsmInstruction ins(opcode, mnemonic, insntags, opCount, op1, op2); register_instruction(ins); total++; } diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index 5eef900..d5dd474 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -20,13 +20,13 @@ bool PcrelPass::analyzeInstruction(uint32_t pc, OldInstruction &ci) if(!i) return true; - for(size_t n = 0; n < i->arg_count; n++) { - AsmArgument const &arg = i->args[n]; + for(int n = 0; n < i->operandCount(); n++) { + AsmOperand const &arg = i->operand(n); Argument &a = ci.args[n]; - if(arg.kind == AsmArgument::PcRel - && (i->opsize == 2 || i->opsize == 4)) { - uint32_t addr = (pc & ~(arg.opsize - 1)) + 4 + arg.disp; + if(arg.kind() == AsmOperand::PcRel + && (i->opsize() == 2 || i->opsize() == 4)) { + uint32_t addr = (pc & ~(arg.opsize() - 1)) + 4 + arg.disp(); a.location = RelConstDomain().constant(addr); /* Also compute the value. This is sign-extended from 16-bit with @@ -34,22 +34,22 @@ bool PcrelPass::analyzeInstruction(uint32_t pc, OldInstruction &ci) VirtualSpace &space = m_binary.vspace(); uint32_t v = -1; - if(i->opsize == 2 && space.covers(addr, 2)) { + if(i->opsize() == 2 && space.covers(addr, 2)) { v = space.read_i16(addr); a.value = RelConstDomain().constant(v); } - if(i->opsize == 4 && space.covers(addr, 4)) { + if(i->opsize() == 4 && space.covers(addr, 4)) { v = space.read_i32(addr); a.value = RelConstDomain().constant(v); } } - else if(arg.kind == AsmArgument::PcJump) { - uint32_t addr = pc + 4 + arg.disp; + else if(arg.kind() == AsmOperand::PcJump) { + uint32_t addr = pc + 4 + arg.disp(); a.location = RelConstDomain().constant(addr); a.value = RelConstDomain().constant(addr); } - else if(arg.kind == AsmArgument::PcAddr) { - uint32_t addr = (pc & ~3) + 4 + arg.disp; + else if(arg.kind() == AsmOperand::PcAddr) { + uint32_t addr = (pc & ~3) + 4 + arg.disp(); /* SH3 manual says that mova uses the target address of the jump when in a delay slot. SH4AL-DSP makes it invalid. */ diff --git a/lib/passes/syscall.cpp b/lib/passes/syscall.cpp index 7d50642..b7a1d92 100644 --- a/lib/passes/syscall.cpp +++ b/lib/passes/syscall.cpp @@ -26,19 +26,19 @@ bool SyscallPass::analyzeInstruction(uint32_t pc, OldInstruction &ci) if(!i) return true; - for(size_t n = 0; n < i->arg_count; n++) { - AsmArgument const &arg = i->args[n]; + for(int n = 0; n < i->operandCount(); n++) { + AsmOperand const &arg = i->operand(n); Argument &a = ci.args[n]; bool eligible = false; uint32_t address; - if(arg.kind == AsmArgument::PcRel && a.value + if(arg.kind() == AsmOperand::PcRel && a.value && RelConstDomain().is_constant(a.value)) { eligible = true; address = RelConstDomain().constant_value(a.value); } - if(arg.kind == AsmArgument::PcJump && a.location + if(arg.kind() == AsmOperand::PcJump && a.location && RelConstDomain().is_constant(a.location)) { eligible = true; address = RelConstDomain().constant_value(a.location); diff --git a/lib/view/assembly.cpp b/lib/view/assembly.cpp index 0d92a96..021d97b 100644 --- a/lib/view/assembly.cpp +++ b/lib/view/assembly.cpp @@ -19,11 +19,11 @@ namespace FxOS { -/* Output for a single argument, which consists of one or more text segments +/* Output for a single operand, which consists of one or more text segments each with their own text style. */ -using ArgumentOutput = std::vector>; +using OperandOutput = std::vector>; -static inline bool output(ArgumentOutput &out, ViewAssemblyOptions::Promotion p, +static inline bool output(OperandOutput &out, ViewAssemblyOptions::Promotion p, fmt::text_style style, std::string str) { if(p == ViewAssemblyOptions::Never) @@ -36,10 +36,10 @@ static inline bool output(ArgumentOutput &out, ViewAssemblyOptions::Promotion p, } // TODO: Take advantage of Instruction's info -static void renderArgument(AsmArgument const &arg, u32 pc, int opsize, - ArgumentOutput &out, ViewAssemblyOptions const &opts) +static void renderOperand(AsmOperand const &op, u32 pc, int opsize, + OperandOutput &out, ViewAssemblyOptions const &opts) { - out.push_back({{}, arg.str()}); + out.push_back({{}, op.str()}); // clang-format off enum { None, PCJump, PCRelative, PCAddr, Location, Constant, SyscallNumber, @@ -47,11 +47,11 @@ static void renderArgument(AsmArgument const &arg, u32 pc, int opsize, type = None; // clang-format on - if(arg.kind == AsmArgument::PcJump) + if(op.kind() == AsmOperand::PcJump) type = PCJump; - else if(arg.kind == AsmArgument::PcRel) + else if(op.kind() == AsmOperand::PcRel) type = PCRelative; - else if(arg.kind == AsmArgument::PcAddr) + else if(op.kind() == AsmOperand::PcAddr) type = PCAddr; u32 value = 0; @@ -63,9 +63,10 @@ static void renderArgument(AsmArgument const &arg, u32 pc, int opsize, auto p = (type == PCJump) ? opts.promotions.PCJump_to_Location : (type == PCAddr) ? opts.promotions.PCAddr_to_Location : opts.promotions.PCRelative_to_Location; - u32 location = arg.getPCRelativeTarget(pc, opsize); - if(location == 0xffffffff) + if(!op.usesPCRelativeAddressing()) return; + + u32 location = op.getPCRelativeTarget(pc, opsize); if(output(out, p, {}, format("<%08x>", location))) return; type = (type == PCRelative) ? Location : Constant; @@ -124,7 +125,7 @@ static void doOldInst(u32 pc, OldInstruction &i, ViewAssemblyOptions const &opts, u32 &m_lastAddress) { OS *os = opts.binary ? opts.binary->OSAnalysis() : nullptr; - ArgumentOutput argout; + OperandOutput opout; /* Ellipsis if there is a gap since last instruction */ if(m_lastAddress + 1 != 0 && pc != m_lastAddress + 2) @@ -141,7 +142,7 @@ static void doOldInst(u32 pc, OldInstruction &i, } /* Raw data if instruction cannot be decoded */ - printf(" %08x: %04x", pc, (i.inst ? i.inst->encoding : i.opcode)); + printf(" %08x: %04x", pc, (i.inst ? i.inst->encoding() : i.opcode)); if(!i.inst) { printf("\n"); m_lastAddress = pc; @@ -150,27 +151,28 @@ static void doOldInst(u32 pc, OldInstruction &i, /* Mnemonic */ static char const *suffixes[5] = {"", ".b", ".w", "", ".l"}; - char const *suffix = suffixes[(i.inst->opsize <= 4) ? i.inst->opsize : 0]; + char const *suffix + = suffixes[(i.inst->opsize() <= 4) ? i.inst->opsize() : 0]; - int spacing = i.inst->arg_count + int spacing = i.inst->operandCount() ? 8 - strlen(i.inst->mnemonic()) - strlen(suffix) : 0; printf(" %s%s%*s", i.inst->mnemonic(), suffix, spacing, ""); /* Arguments */ - for(size_t n = 0; n < i.inst->arg_count; n++) { + for(int n = 0; n < i.inst->operandCount(); n++) { if(n) printf(", "); - renderArgument(i.inst->args[n], pc, i.inst->opsize, argout, opts); + renderOperand(i.inst->operand(n), pc, i.inst->opsize(), opout, opts); - for(size_t i = 0; i < argout.size(); i++) { + for(size_t i = 0; i < opout.size(); i++) { if(i != 0) printf(" "); - printf("%s", argout[i].second.c_str()); + printf("%s", opout[i].second.c_str()); } - argout.clear(); + opout.clear(); } printf("\n"); @@ -237,36 +239,37 @@ void viewAssemblyInstruction(Instruction const &ins, ViewAssemblyOptions *opts) opts = opts ? opts : &defaultOptions; AsmInstruction opcode = ins.opcode(); - ArgumentOutput argout; + OperandOutput opout; u32 pc = ins.address(); - printf(" %08x: %04x", pc, opcode.encoding); + printf(" %08x: %04x", pc, opcode.encoding()); /* Mnemonic */ static char const *suffixes[5] = {"", ".b", ".w", "", ".l"}; - char const *suffix = suffixes[(opcode.opsize <= 4) ? opcode.opsize : 0]; + char const *suffix = suffixes[(opcode.opsize() <= 4) ? opcode.opsize() : 0]; - int spacing - = opcode.arg_count ? 8 - strlen(opcode.mnemonic()) - strlen(suffix) : 0; + int spacing = opcode.operandCount() + ? 8 - strlen(opcode.mnemonic()) - strlen(suffix) + : 0; std::string str = " "; str += opcode.mnemonic(); str += suffix; str += std::string(spacing, ' '); /* Arguments */ - for(size_t n = 0; n < opcode.arg_count; n++) { + for(int n = 0; n < opcode.operandCount(); n++) { if(n) str += ", "; - renderArgument(opcode.args[n], pc, opcode.opsize, argout, *opts); + renderOperand(opcode.operand(n), pc, opcode.opsize(), opout, *opts); - for(size_t i = 0; i < argout.size(); i++) { + for(size_t i = 0; i < opout.size(); i++) { if(i != 0) str += " "; - str += argout[i].second; + str += opout[i].second; } - argout.clear(); + opout.clear(); } if(opts->printFunctionAnalysis) {