From 47764a61eb6cdb7f50ac4da708119a276564b5d1 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sun, 27 Aug 2023 23:25:35 +0200 Subject: [PATCH] fxos: refactor AsmInstruction categories into assembly table tags --- base-library/asmtables/sh3.txt | 30 ++++++------- base-library/asmtables/sh4.txt | 4 -- include/fxos/lang.h | 65 +++++++++++++++++++++------ lib/lang.cpp | 82 ---------------------------------- lib/load-asm.l | 42 +++++++++++++++-- lib/passes/cfg.cpp | 24 +++++----- 6 files changed, 116 insertions(+), 131 deletions(-) diff --git a/base-library/asmtables/sh3.txt b/base-library/asmtables/sh3.txt index 228c578..3d71eb9 100644 --- a/base-library/asmtables/sh3.txt +++ b/base-library/asmtables/sh3.txt @@ -1,7 +1,3 @@ -type: assembly -name: sh-3 ---- - # Format: [01nmdi]{16}, followed by the mnemonic and the list of arguments. # In each opcode, there should be at most one sequence of "m", "n", "d" and "i" # each (representing the location of the argument). @@ -28,8 +24,8 @@ name: sh-3 0000000000011001 div0u 0000000000111000 ldtlb 0000000000001001 nop -0000000000101011 rte -0000000000001011 rts +0000000000101011 rte %ret %delay +0000000000001011 rts %ret %delay 0000000001011000 sets 0000000000011000 sett 0000000000011011 sleep @@ -123,8 +119,8 @@ name: sh-3 0000nnnn00011010 sts macl, rn 0000nnnn00101010 sts pr, rn -0100nnnn00101011 jmp @rn -0100nnnn00001011 jsr @rn +0100nnnn00101011 jmp @rn %ret %delay +0100nnnn00001011 jsr @rn %call %delay 0000nnnn10000011 pref @rn 0100nnnn00011011 tas.b @rn 0010nnnnmmmm0000 mov.b rm, @rn @@ -204,16 +200,16 @@ name: sh-3 1001nnnndddddddd mov.w @(disp,pc), rn 1101nnnndddddddd mov.l @(disp,pc), rn -11000111dddddddd mova.l @(disp,pc), r0 +11000111dddddddd mova.l @(disp,pc), r0 %islot -0000mmmm00100011 braf rm -0000mmmm00000011 bsrf rm -10001011dddddddd bf jump8 -10001111dddddddd bf.s jump8 -10001001dddddddd bt jump8 -10001101dddddddd bt.s jump8 -1010dddddddddddd bra jump12 -1011dddddddddddd bsr jump12 +0000mmmm00100011 braf rm %ret %delay +0000mmmm00000011 bsrf rm %call %delay +10001011dddddddd bf jump8 %condjump +10001111dddddddd bf.s jump8 %condjump %delay +10001001dddddddd bt jump8 %condjump +10001101dddddddd bt.s jump8 %condjump %delay +1010dddddddddddd bra jump12 %uncondjump %delay +1011dddddddddddd bsr jump12 %call %delay 0111nnnniiiiiiii add #imm, rn 11001001iiiiiiii and #imm, r0 diff --git a/base-library/asmtables/sh4.txt b/base-library/asmtables/sh4.txt index 09f076a..620a3fa 100644 --- a/base-library/asmtables/sh4.txt +++ b/base-library/asmtables/sh4.txt @@ -1,7 +1,3 @@ -type: assembly -name: sh-4a-extensions ---- - 0000nnnn01110011 movco.l r0, @rn 0000mmmm01100011 movli.l @rm, r0 0100mmmm10101001 movua.l @rm, r0 diff --git a/include/fxos/lang.h b/include/fxos/lang.h index 90c86ac..dbcd554 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -145,6 +145,15 @@ AsmArgument AsmArgument_Imm(int imm); /* Assembler instruction */ struct AsmInstruction { + enum Tag { + IsReturn = 0x01, + IsUnconditionalJump = 0x02, + IsConditionalJump = 0x04, + IsCall = 0x08, + HasDelaySlot = 0x10, + IsInvalidDelaySlot = 0x20, + }; + AsmInstruction() = default; /* Construct with one or several arguments */ @@ -154,11 +163,13 @@ struct AsmInstruction /* Original opcode. Initialized to 0 when unset, which is an invalid instruction by design. */ - uint16_t opcode; + uint32_t opcode; /* Operation size (0, 1, 2 or 4) */ int8_t opsize; /* Number of arguments */ uint8_t arg_count; + /* Instruction tags */ + uint16_t tags; /* Mnemonic **without the size indicator** */ char mnemonic[12]; @@ -169,18 +180,46 @@ struct AsmInstruction // Instruction classes //--- - /* Check whether instruction terminates the function */ - bool isterminal() const noexcept; - /* Check whether instruction is an unconditional jump */ - bool isjump() const noexcept; - /* Check whether it's a conditional jump */ - bool iscondjump() const noexcept; - /* Check whether instruction is a function call */ - bool iscall() const noexcept; - /* Check whether instruction has a delay slot */ - bool isdelayed() const noexcept; - /* Check whether instruction can be used in a delay slot */ - bool isvaliddelayslot() const noexcept; + /* Whether the instruction terminates the function it's in. */ + bool isReturn() const + { + return (this->tags & Tag::IsReturn) != 0; + } + /* Whether the instruction is a conditional/unconditional jump. */ + bool isConditionalJump() const + { + return (this->tags & Tag::IsConditionalJump) != 0; + } + bool isUnconditionalJump() const + { + return (this->tags & Tag::IsUnconditionalJump) != 0; + } + bool isAnyJump() const + { + int IsJump = Tag::IsConditionalJump | Tag::IsUnconditionalJump; + return (this->tags & IsJump) != 0; + } + /* Whether the instruction is a function call. */ + bool isCall() const + { + return (this->tags & Tag::IsCall) != 0; + } + /* Whether the instruction has a delay slot */ + bool hasDelaySlot() const + { + return (this->tags & Tag::HasDelaySlot) != 0; + } + /* Wheher the instruction terminates its basic block. */ + bool isBlockTerminator() const + { + return isAnyJump() || isReturn(); + } + /* Whether the instruction can be used in a delay slot. */ + bool isValidDelaySlot() const + { + return !isBlockTerminator() && !hasDelaySlot() + && (this->tags & Tag::IsInvalidDelaySlot) == 0; + } }; } /* namespace FxOS */ diff --git a/lib/lang.cpp b/lib/lang.cpp index f3d386b..4f1cff5 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -217,86 +217,4 @@ AsmInstruction::AsmInstruction( arg_count = 2; } -//--- -// Instruction classes -//--- - -bool AsmInstruction::isterminal() const noexcept -{ - if(!strcmp(mnemonic, "rte") || !strcmp(mnemonic, "rts")) - return true; - - /* Also jmp @rn which is regarded as a terminal call */ - if(!strcmp(mnemonic, "jmp") && args[0].kind == AsmArgument::Deref) - return true; - - /* Same for braf because we can't analyse further */ - if(!strcmp(mnemonic, "braf")) - return true; - - return false; -} - -bool AsmInstruction::isjump() const noexcept -{ - return !strcmp(mnemonic, "bra"); -} - -bool AsmInstruction::iscondjump() const noexcept -{ - char const *v[] = { - "bf", - "bf.s", - "bf/s", - "bt", - "bt.s", - "bt/s", - NULL, - }; - - for(int i = 0; v[i]; i++) { - if(!strcmp(mnemonic, v[i])) - return true; - } - return false; -} - -bool AsmInstruction::iscall() const noexcept -{ - return !strcmp(mnemonic, "jsr") || !strcmp(mnemonic, "bsr") - || !strcmp(mnemonic, "bsrf"); -} - -bool AsmInstruction::isdelayed() const noexcept -{ - char const *v[] = { - "rte", - "rts", - "jmp", - "jsr", - "bra", - "braf", - "bsr", - "bsrf", - "bf.s", - "bf/s", - "bt.s", - "bt/s", - NULL, - }; - - for(int i = 0; v[i]; i++) { - if(!strcmp(mnemonic, v[i])) - return true; - } - return false; -} - -bool AsmInstruction::isvaliddelayslot() const noexcept -{ - // TODO: PC-relative move is a valid delay slot but it doesn't work - return !isdelayed() && !isterminal() && !isjump() && !iscondjump() - && strcmp(this->mnemonic, "mova") != 0; -} - } /* namespace FxOS */ diff --git a/lib/load-asm.l b/lib/load-asm.l index 1eddd77..79b4954 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -39,6 +39,8 @@ enum Token { AT_DRN, AT_DRM, AT_DGBR, /* Array dereferencing */ AT_R0RN, AT_R0RM, AT_R0GBR, + /* Tags */ + TAG_RET, TAG_UNCONDJUMP, TAG_CONDJUMP, TAG_CALL, TAG_DELAY, TAG_ISLOT, }; /* Instruction opcode pattern */ @@ -116,6 +118,13 @@ space [ \t]+ "mach" { return MACH; } "macl" { return MACL; } +"%ret" { return TAG_RET; } +"%uncondjump" { return TAG_UNCONDJUMP; } +"%condjump" { return TAG_CONDJUMP; } +"%call" { return TAG_CALL; } +"%delay" { return TAG_DELAY; } +"%islot" { return TAG_ISLOT; } + {mnemonic} { yylval = strdup(yytext); return MNEMONIC; } . { err("lex error near %s", yytext); } @@ -228,7 +237,7 @@ static AsmArgument make_arg(int token, int opsize, int m, int n, int d, int i) return AsmArgument_PcRel(d*opsize, opsize); } - FxOS_log(ERR, "bad token %d found as argument of instruction sped", token); + FxOS_log(ERR, "bad argument token: %d", token); return AsmArgument_Reg(Reg::UNDEFINED); } @@ -237,11 +246,12 @@ static AsmArgument make_arg(int token, int opsize, int m, int n, int d, int i) @mnemonic Mnemonic (especially important for operation size suffixes) @argtoken1 Token corresponding to the first argument (0 if no argument) @argtoken2 Token corresponding to the second argument (0 if unused) + @insntags Instruction tags Generates all the instances of the instruction, then sends them to the disassembler for fast lookup. Returns number of instantiated opcodes. */ static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1, - int argtoken2) + int argtoken2, int insntags) { int total = 0; @@ -257,6 +267,7 @@ static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1, AsmInstruction ins(mnemonic); ins.opcode = opcode; + ins.tags = insntags; if(argtoken1) { ins.args[0] = make_arg(argtoken1, ins.opsize, m,n,d,i); @@ -274,6 +285,25 @@ static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1, return total; } +/* Get the AsmInstruction tag associated with a tag token. */ +int get_tag(int t) +{ + if(t == TAG_RET) + return AsmInstruction::Tag::IsReturn; + if(t == TAG_UNCONDJUMP) + return AsmInstruction::Tag::IsUnconditionalJump; + if(t == TAG_CONDJUMP) + return AsmInstruction::Tag::IsConditionalJump; + if(t == TAG_CALL) + return AsmInstruction::Tag::IsCall; + if(t == TAG_DELAY) + return AsmInstruction::Tag::HasDelaySlot; + if(t == TAG_ISLOT) + return AsmInstruction::Tag::IsInvalidDelaySlot; + + return 0; +} + /* Load an assembly instruction table for the disassembler. */ int load_instructions(Buffer const &file) { @@ -289,12 +319,14 @@ int load_instructions(Buffer const &file) /* Instruction information */ char *code=nullptr, *mnemonic=nullptr; int argtoken1=0, argtoken2=0; + int insntags=0; /* Current line */ int line = -1; while(1) { int t = yylex(); + int tag = get_tag(t); if(line >= 0 && (yylineno != line || t == PATTERN || t == -1)) { /* Finalize current instruction */ @@ -304,7 +336,7 @@ int load_instructions(Buffer const &file) } Pattern p = make_pattern(code); - total += instantiate(p, mnemonic, argtoken1,argtoken2); + total += instantiate(p, mnemonic, argtoken1, argtoken2, insntags); if(code) free(code); if(mnemonic) free(mnemonic); @@ -318,6 +350,7 @@ int load_instructions(Buffer const &file) mnemonic = nullptr; argtoken1 = 0; argtoken2 = 0; + insntags = 0; } else if(t == MNEMONIC && !mnemonic) { mnemonic = yylval; @@ -326,6 +359,9 @@ int load_instructions(Buffer const &file) err("missing mnemonic at line %d", line); break; } + else if(tag) { + insntags |= tag; + } else if(!argtoken1) { argtoken1 = t; } diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index 22b1d08..b916183 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -34,7 +34,7 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i) "terminal" to avoid the computation!) */ uint32_t jmptarget = 0xffffffff; - if(i.inst->isjump() || i.inst->iscondjump()) { + if(i.inst->isAnyJump()) { auto &args = i.inst->args; if(i.inst->arg_count != 1 || args[0].kind != AsmArgument::PcJump) { @@ -61,13 +61,13 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i) delay slot has no branching properties on its own, so nothing new to set in the properties. */ if(i.delayslot) { - if(!i.inst->isvaliddelayslot()) { + if(!i.inst->isValidDelaySlot()) { FxOS_log(ERR, "invalid delay slot at 0x%08x", pc); return false; } } /* If it has a delay slot, create it at the next instruction */ - else if(i.inst->isdelayed()) { + else if(i.inst->hasDelaySlot()) { Instruction &slot = *m_disasm.getInstructionAt(pc + 2, true); if(slot.leader) throw std::logic_error(format( @@ -75,22 +75,22 @@ bool CfgPass::analyzeInstruction(uint32_t pc, Instruction &i) " slot - this is unsupported by fxos and will produce garbage " "analysis! (x_x)", pc + 2)); - if(!slot.inst->isvaliddelayslot()) { + if(!slot.inst->isValidDelaySlot()) { FxOS_log(ERR, "invalid delay slot at 0x%08x", pc + 2); return false; } slot.delayslot = true; - slot.terminal = i.inst->isterminal(); - slot.jump = i.inst->isjump(); - slot.condjump = i.inst->iscondjump(); + slot.terminal = i.inst->isReturn(); + slot.jump = i.inst->isUnconditionalJump(); + slot.condjump = i.inst->isConditionalJump(); slot.jmptarget = jmptarget; } /* Otherwise, use standard properties */ - else if(!i.inst->isdelayed()) { - i.terminal = i.inst->isterminal(); - i.jump = i.inst->isjump(); - i.condjump = i.inst->iscondjump(); + else if(!i.inst->hasDelaySlot()) { + i.terminal = i.inst->isReturn(); + i.jump = i.inst->isUnconditionalJump(); + i.condjump = i.inst->isConditionalJump(); i.jmptarget = jmptarget; } @@ -117,7 +117,7 @@ bool CfgPass::exploreFunction(uint32_t pc) AsmInstruction const &i = *ci->inst; /* Find function call instructions */ - if(i.isterminal() || !i.iscall() || i.arg_count < 1) + if(i.isReturn() || !i.isCall() || i.arg_count < 1) continue; /* The target must be known */