%{ #include #include #include #include #include #include /* Text value for parser */ static char *yylval; /* Argument tokens */ enum Token { /* Instruction pattern and mnemonic */ PATTERN = 1, MNEMONIC, /* General-purpose registers */ R0, RN, RM, /* Banked registers */ R0_BANK, R1_BANK, R2_BANK, R3_BANK, R4_BANK, R5_BANK, R6_BANK, R7_BANK, /* Control registers */ SR, PR, GBR, VBR, DBR, SSR, SPC, SGR, MACH, MACL, /* PC-relative jumps and displacements (with 4-alignment correction) */ JUMP8, JUMP12, AT_DPC, /* Immediate operands */ IMM, /* Memory access with post-increment and pre-decrement */ AT_RN, AT_RM, AT_RMP, AT_RNP, AT_MRN, /* Structure dereferencing */ AT_DRN, AT_DRM, AT_DGBR, /* Array dereferencing */ AT_R0RN, AT_R0RM, AT_R0GBR, }; /* Instruction opcode pattern */ struct Pattern { /* 16-bit opcode, bits corresponding to arguments are clear */ uint16_t bits; /* Position of the arguments */ uint8_t n_sh, m_sh, d_sh, i_sh; /* Length of arguments, in bits */ uint16_t n_size, m_size, d_size, i_size; }; /* Current file name */ static std::string filename; /* Error messages and exceptions */ static void err(char const *format, ...) { static char buf[256]; va_list args; va_start(args, format); vsnprintf(buf, 256, format, args); va_end(args); throw FxOS::SyntaxError(filename.c_str(), yylineno, buf); } %} %option prefix="asm" %option noyywrap %option nounput pattern ^[01nmdi]{16} mnemonic [a-zA-Z0-9./]+ space [ \t]+ %% ^#[^\n]* ; {space} ; , ; [\n] yylineno++; {pattern} { yylval = strdup(yytext); return PATTERN; } ^.{0,16} { err("invalid opcode at start of line"); } "#imm" { return IMM; } "rn" { return RN; } "rm" { return RM; } "jump8" { return JUMP8; } "jump12" { return JUMP12; } "@(disp,"[ ]*"pc)" { return AT_DPC; } "@rn" { return AT_RN; } "@rm" { return AT_RM; } "@rm+" { return AT_RMP; } "@rn+" { return AT_RNP; } "@-rn" { return AT_MRN; } "@(disp,"[ ]*"rn)" { return AT_DRN; } "@(disp,"[ ]*"rm)" { return AT_DRM; } "@(r0,"[ ]*"rn)" { return AT_R0RN; } "@(r0,"[ ]*"rm)" { return AT_R0RM; } "@(disp,"[ ]*"gbr)" { return AT_DGBR; } "@(r0,"[ ]*"gbr)" { return AT_R0GBR; } "r0" { return R0; } "sr" { return SR; } "pr" { return PR; } "gbr" { return GBR; } "vbr" { return VBR; } "ssr" { return SSR; } "spc" { return SPC; } "sgr" { return SGR; } "dbr" { return DBR; } "r0_bank" { return R0_BANK; } "r1_bank" { return R1_BANK; } "r2_bank" { return R2_BANK; } "r3_bank" { return R3_BANK; } "r4_bank" { return R4_BANK; } "r5_bank" { return R5_BANK; } "r6_bank" { return R6_BANK; } "r7_bank" { return R7_BANK; } "mach" { return MACH; } "macl" { return MACL; } {mnemonic} { yylval = strdup(yytext); return MNEMONIC; } . { err("lex error near '%s'", yytext); } <> { return -1; } %% namespace FxOS { /* Build a pattern for an opcode. @code 16-byte string using characters from "01mndi" Returns a logical pattern representing the opcode and its arguments. Each argument has two parameters: shift ("sh") and size ("size"). From an instance of the instruction, the value of parameter x can be recovered as: x = (opcode >> x_sh) & ((1 << x_size) - 1); (Originally, the second parameter was named x_mask and was equal to the rhs of the [&] operator. But this decoding method is now unused.) */ static Pattern make_pattern(char const *code) { Pattern p {}; for(int i = 0; i < 16; i++) { int c = code[i]; /* Constant bits */ if(c == '0' || c == '1') { p.bits = (p.bits << 1) | (c - '0'); continue; } /* Argument bits */ p.bits <<= 1; if(c == 'n') p.n_sh = 15 - i, p.n_size++; if(c == 'm') p.m_sh = 15 - i, p.m_size++; if(c == 'd') p.d_sh = 15 - i, p.d_size++; if(c == 'i') p.i_sh = 15 - i, p.i_size++; } return p; } /* Instantiate an argument token as an fxos language structure. @token Argument token (tokens for other objets will be rejected) @opsize Operation size indicated in the mnemonic @m @n @d @i Instruction instance Returns a semantic FxOS::Argument. */ static Argument make_arg(int token, int opsize, int m, int n, int d, int i) { /* TODO: This function is too slow for the ~100k times it is called. */ /* Registers rn and rm */ CpuRegister Rn(format("r%d", n & 0xf)); CpuRegister Rm(format("r%d", m & 0xf)); /* Sign extensions of d to 8 and 12 bits */ int32_t d8 = (int8_t)d; int32_t d12 = (d & 0x800) ? (int32_t)(d | 0xfffff000) : (d); /* Sign extension of i to 8 bits */ int32_t i8 = (int8_t)i; using Reg = CpuRegister; switch(token) { case R0: return Argument_Reg(Reg::R0); case RN: return Argument_Reg(Rn); case RM: return Argument_Reg(Rm); case R0_BANK: return Argument_Reg(Reg::R0B); case R1_BANK: return Argument_Reg(Reg::R1B); case R2_BANK: return Argument_Reg(Reg::R2B); case R3_BANK: return Argument_Reg(Reg::R3B); case R4_BANK: return Argument_Reg(Reg::R4B); case R5_BANK: return Argument_Reg(Reg::R5B); case R6_BANK: return Argument_Reg(Reg::R6B); case R7_BANK: return Argument_Reg(Reg::R7B); case SR: return Argument_Reg(Reg::SR); case PR: return Argument_Reg(Reg::PR); case GBR: return Argument_Reg(Reg::GBR); case VBR: return Argument_Reg(Reg::VBR); case DBR: return Argument_Reg(Reg::DBR); case SSR: return Argument_Reg(Reg::SSR); case SPC: return Argument_Reg(Reg::SPC); case SGR: return Argument_Reg(Reg::SGR); case MACH: return Argument_Reg(Reg::MACH); case MACL: return Argument_Reg(Reg::MACL); case JUMP8: return Argument_PcJump(d8 * 2); case JUMP12: return Argument_PcJump(d12 * 2); case IMM: return Argument_Imm(i8); case AT_RN: return Argument_Deref(Rn); case AT_RM: return Argument_Deref(Rm); case AT_RMP: return Argument_PostInc(Rm); case AT_RNP: return Argument_PostInc(Rn); case AT_MRN: return Argument_PreDec(Rn); case AT_DRN: return Argument_StructDeref(d, opsize, Rn); case AT_DRM: return Argument_StructDeref(d, opsize, Rm); case AT_DGBR: return Argument_StructDeref(d, opsize, Reg::GBR); case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn); case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm); case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR); case AT_DPC: if(!opsize) err("@(disp,pc) must have a size (.w, .l)"); return Argument_PcRel(d, opsize); } throw std::logic_error("lex asm builds args from bad tokens"); } /* Record all the instances of an instruction in the disassembly table. @p Instruction binary pattern @mnemonic Mnemonic (especially important for operation size suffixes) @argtoken1 Token corresponding to the first argument (0 if no argument) @argtoken2 Token corresponding to the second argument (0 if unused) Generates all the instances of the instruction, then sends them to the disassembler for fast lookup. */ static void instantiate(struct Pattern p, std::string mnemonic, int argtoken1, int argtoken2) { for(int n = 0; n < (1 << p.n_size); n++) for(int m = 0; m < (1 << p.m_size); m++) for(int d = 0; d < (1 << p.d_size); d++) for(int i = 0; i < (1 << p.i_size); i++) { uint16_t opcode = p.bits; opcode |= (n << p.n_sh); opcode |= (m << p.m_sh); opcode |= (d << p.d_sh); opcode |= (i << p.i_sh); Instruction ins(mnemonic); ins.opcode = opcode; if(argtoken1) ins.args.push_back( make_arg(argtoken1, ins.opsize, m, n, d, i)); if(argtoken2) ins.args.push_back( make_arg(argtoken2, ins.opsize, m, n, d, i)); register_instruction(ins); } } /* Load an assembly instruction table for the disassembler. */ void load_asm(File &file, size_t start_offset, size_t start_line) { /* Lex all instructions and fill in the general assembly table */ YY_BUFFER_STATE buf = yy_scan_bytes(file.data() + start_offset, file.size() - start_offset); yylineno = start_line; filename = file.path(); /* Insruction information */ char *code=nullptr, *mnemonic=nullptr; int argtoken1=0, argtoken2=0; /* Current line */ int line = -1; while(1) { int t = yylex(); if(line >= 0 && (yylineno != line || t == PATTERN || t == -1)) { /* Finalize current instruction */ if(!mnemonic) err("%d: missing mnemonic", line); /* TODO: Generate all parameters and fill */ Pattern p = make_pattern(code); instantiate(p, mnemonic, argtoken1, argtoken2); if(code) free(code); if(mnemonic) free(mnemonic); } if(t == -1) break; if(t == PATTERN) { code = yylval; line = yylineno; mnemonic = nullptr; argtoken1 = 0; argtoken2 = 0; } else if(t == MNEMONIC && !mnemonic) { mnemonic = yylval; } else if(!mnemonic) { err("%d: missing mnemonic", line); } else if(!argtoken1) { argtoken1 = t; } else if(!argtoken2) { argtoken2 = t; } } yy_delete_buffer(buf); } } /* namespace FxOS */