fxos/lib/load-asm.l

342 lines
11 KiB
Plaintext

%{
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/lang.h>
#include <fxos/disassembly.h>
#include <fxos/util/format.h>
#include <fxos/util/log.h>
#include <cstdarg>
#include <string>
/* Text value for parser */
static char *yylval;
/* Argument tokens */
enum Token {
/* Instruction pattern and mnemonic */
PATTERN = 1, MNEMONIC,
/* General-purpose registers */
R0, RN, RM,
/* Banked registers */
R0_BANK, R1_BANK, R2_BANK, R3_BANK, R4_BANK, R5_BANK, R6_BANK, R7_BANK,
/* Control registers */
SR, PR, GBR, VBR, DBR, SSR, SPC, SGR, MACH, MACL,
/* PC-relative jumps and displacements (with 4-alignment correction) */
JUMP8, JUMP12, AT_DPC,
/* PC-relative address access (without memory access) */
DPC,
/* Immediate operands */
IMM,
/* Memory access with post-increment and pre-decrement */
AT_RN, AT_RM, AT_RMP, AT_RNP, AT_MRN,
/* Structure dereferencing */
AT_DRN, AT_DRM, AT_DGBR,
/* Array dereferencing */
AT_R0RN, AT_R0RM, AT_R0GBR,
};
/* Instruction opcode pattern */
struct Pattern {
/* 16-bit opcode, bits corresponding to arguments are clear */
uint16_t bits;
/* Position of the arguments */
uint8_t n_sh, m_sh, d_sh, i_sh;
/* Length of arguments, in bits */
uint16_t n_size, m_size, d_size, i_size;
};
/* Current file name */
static std::string filename;
#define err(fmt, ...) \
FxOS_log(ERR, "%s:%d: " fmt, filename, yylineno, ##__VA_ARGS__)
%}
%option prefix="asm"
%option noyywrap
%option nounput
pattern ^[01nmdi]{16}
mnemonic [a-zA-Z0-9./]+
space [ \t]+
%%
^#[^\n]* ;
{space} ;
, ;
[\n] yylineno++;
{pattern} { yylval = strdup(yytext); return PATTERN; }
^.{0,16} { err("invalid opcode at start of line"); }
"#imm" { return IMM; }
"rn" { return RN; }
"rm" { return RM; }
"jump8" { return JUMP8; }
"jump12" { return JUMP12; }
"pc+disp" { return DPC; }
"@(disp,"[ ]*"pc)" { return AT_DPC; }
"@rn" { return AT_RN; }
"@rm" { return AT_RM; }
"@rm+" { return AT_RMP; }
"@rn+" { return AT_RNP; }
"@-rn" { return AT_MRN; }
"@(disp,"[ ]*"rn)" { return AT_DRN; }
"@(disp,"[ ]*"rm)" { return AT_DRM; }
"@(r0,"[ ]*"rn)" { return AT_R0RN; }
"@(r0,"[ ]*"rm)" { return AT_R0RM; }
"@(disp,"[ ]*"gbr)" { return AT_DGBR; }
"@(r0,"[ ]*"gbr)" { return AT_R0GBR; }
"r0" { return R0; }
"sr" { return SR; }
"pr" { return PR; }
"gbr" { return GBR; }
"vbr" { return VBR; }
"ssr" { return SSR; }
"spc" { return SPC; }
"sgr" { return SGR; }
"dbr" { return DBR; }
"r0_bank" { return R0_BANK; }
"r1_bank" { return R1_BANK; }
"r2_bank" { return R2_BANK; }
"r3_bank" { return R3_BANK; }
"r4_bank" { return R4_BANK; }
"r5_bank" { return R5_BANK; }
"r6_bank" { return R6_BANK; }
"r7_bank" { return R7_BANK; }
"mach" { return MACH; }
"macl" { return MACL; }
{mnemonic} { yylval = strdup(yytext); return MNEMONIC; }
. { err("lex error near %s", yytext); }
<<EOF>> { return -1; }
%%
namespace FxOS {
/* Build a pattern for an opcode.
@code 16-byte string using characters from "01mndi"
Returns a logical pattern representing the opcode and its arguments.
Each argument has two parameters: shift ("sh") and size ("size"). From an
instance of the instruction, the value of parameter x can be recovered as:
x = (opcode >> x_sh) & ((1 << x_size) - 1);
(Originally, the second parameter was named x_mask and was equal to the rhs
of the [&] operator. But this decoding method is now unused.) */
static Pattern make_pattern(char const *code)
{
Pattern p {};
for(int i = 0; i < 16; i++) {
int c = code[i];
/* Constant bits */
if(c == '0' || c == '1') {
p.bits = (p.bits << 1) | (c - '0');
continue;
}
/* Argument bits */
p.bits <<= 1;
if(c == 'n') p.n_sh = 15 - i, p.n_size++;
if(c == 'm') p.m_sh = 15 - i, p.m_size++;
if(c == 'd') p.d_sh = 15 - i, p.d_size++;
if(c == 'i') p.i_sh = 15 - i, p.i_size++;
}
return p;
}
/* Instantiate an argument token as an fxos language structure.
@token Argument token (tokens for other objets will be rejected)
@opsize Operation size indicated in the mnemonic
@m @n @d @i Instruction instance
Returns a semantic FxOS::Argument. */
static AsmArgument make_arg(int token, int opsize, int m, int n, int d, int i)
{
using Reg = CpuRegister;
static Reg general_purpose[16] = {
Reg::R0, Reg::R1, Reg::R2, Reg::R3, Reg::R4, Reg::R5,
Reg::R6, Reg::R7, Reg::R8, Reg::R9, Reg::R10, Reg::R11,
Reg::R12, Reg::R13, Reg::R14, Reg::R15,
};
/* Registers rn and rm */
CpuRegister Rn = general_purpose[n & 0xf];
CpuRegister Rm = general_purpose[m & 0xf];
/* Sign extensions of d to 8 and 12 bits */
int32_t d8 = (int8_t)d;
int32_t d12 = (d & 0x800) ? (int32_t)(d | 0xfffff000) : (d);
/* Sign extension of i to 8 bits */
int32_t i8 = (int8_t)i;
switch(token) {
case R0: return AsmArgument_Reg(Reg::R0);
case RN: return AsmArgument_Reg(Rn);
case RM: return AsmArgument_Reg(Rm);
case R0_BANK: return AsmArgument_Reg(Reg::R0B);
case R1_BANK: return AsmArgument_Reg(Reg::R1B);
case R2_BANK: return AsmArgument_Reg(Reg::R2B);
case R3_BANK: return AsmArgument_Reg(Reg::R3B);
case R4_BANK: return AsmArgument_Reg(Reg::R4B);
case R5_BANK: return AsmArgument_Reg(Reg::R5B);
case R6_BANK: return AsmArgument_Reg(Reg::R6B);
case R7_BANK: return AsmArgument_Reg(Reg::R7B);
case SR: return AsmArgument_Reg(Reg::SR);
case PR: return AsmArgument_Reg(Reg::PR);
case GBR: return AsmArgument_Reg(Reg::GBR);
case VBR: return AsmArgument_Reg(Reg::VBR);
case DBR: return AsmArgument_Reg(Reg::DBR);
case SSR: return AsmArgument_Reg(Reg::SSR);
case SPC: return AsmArgument_Reg(Reg::SPC);
case SGR: return AsmArgument_Reg(Reg::SGR);
case MACH: return AsmArgument_Reg(Reg::MACH);
case MACL: return AsmArgument_Reg(Reg::MACL);
case JUMP8: return AsmArgument_PcJump(d8 * 2);
case JUMP12: return AsmArgument_PcJump(d12 * 2);
case DPC: return AsmArgument_PcAddr(d * 4);
case IMM: return AsmArgument_Imm(i8);
case AT_RN: return AsmArgument_Deref(Rn);
case AT_RM: return AsmArgument_Deref(Rm);
case AT_RMP: return AsmArgument_PostInc(Rm);
case AT_RNP: return AsmArgument_PostInc(Rn);
case AT_MRN: return AsmArgument_PreDec(Rn);
case AT_DRN: return AsmArgument_StructDeref(d*opsize, opsize, Rn);
case AT_DRM: return AsmArgument_StructDeref(d*opsize, opsize, Rm);
case AT_DGBR: return AsmArgument_StructDeref(d*opsize, opsize, Reg::GBR);
case AT_R0RN: return AsmArgument_ArrayDeref(Reg::R0, Rn);
case AT_R0RM: return AsmArgument_ArrayDeref(Reg::R0, Rm);
case AT_R0GBR: return AsmArgument_ArrayDeref(Reg::R0, Reg::GBR);
case AT_DPC:
if(!opsize)
err("@(disp,pc) must have a size (.w, .l)");
return AsmArgument_PcRel(d*opsize, opsize);
}
FxOS_log(ERR, "bad token %d found as argument of instruction sped", token);
return AsmArgument_Reg(Reg::UNDEFINED);
}
/* Record all the instances of an instruction in the disassembly table.
@p Instruction binary pattern
@mnemonic Mnemonic (especially important for operation size suffixes)
@argtoken1 Token corresponding to the first argument (0 if no argument)
@argtoken2 Token corresponding to the second argument (0 if unused)
Generates all the instances of the instruction, then sends them to the
disassembler for fast lookup. Returns number of instantiated opcodes. */
static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1,
int argtoken2)
{
int total = 0;
for(int n = 0; n < (1 << p.n_size); n++)
for(int m = 0; m < (1 << p.m_size); m++)
for(int d = 0; d < (1 << p.d_size); d++)
for(int i = 0; i < (1 << p.i_size); i++) {
uint16_t opcode = p.bits;
opcode |= (n << p.n_sh);
opcode |= (m << p.m_sh);
opcode |= (d << p.d_sh);
opcode |= (i << p.i_sh);
AsmInstruction ins(mnemonic);
ins.opcode = opcode;
if(argtoken1) {
ins.args[0] = make_arg(argtoken1, ins.opsize, m,n,d,i);
ins.arg_count = 1;
}
if(argtoken2) {
ins.args[1] = make_arg(argtoken2, ins.opsize, m,n,d,i);
ins.arg_count = 2;
}
register_instruction(ins);
total++;
}
return total;
}
/* Load an assembly instruction table for the disassembler. */
int load_instructions(Buffer const &file)
{
/* Lex all instructions and fill in the general assembly table */
YY_BUFFER_STATE buf = yy_scan_bytes(file.data.get(), file.size);
yylineno = 1;
filename = file.path;
/* Number of instructions lexed */
int total = 0;
/* Instruction information */
char *code=nullptr, *mnemonic=nullptr;
int argtoken1=0, argtoken2=0;
/* Current line */
int line = -1;
while(1) {
int t = yylex();
if(line >= 0 && (yylineno != line || t == PATTERN || t == -1)) {
/* Finalize current instruction */
if(!mnemonic) {
err("missing mnemonic at line %d", line);
break;
}
Pattern p = make_pattern(code);
total += instantiate(p, mnemonic, argtoken1,argtoken2);
if(code) free(code);
if(mnemonic) free(mnemonic);
}
if(t == -1) break;
if(t == PATTERN) {
code = yylval;
line = yylineno;
mnemonic = nullptr;
argtoken1 = 0;
argtoken2 = 0;
}
else if(t == MNEMONIC && !mnemonic) {
mnemonic = yylval;
}
else if(!mnemonic) {
err("missing mnemonic at line %d", line);
break;
}
else if(!argtoken1) {
argtoken1 = t;
}
else if(!argtoken2) {
argtoken2 = t;
}
}
yy_delete_buffer(buf);
return total;
}
} /* namespace FxOS */