fxos/lib/load-asm.l

333 lines
9.2 KiB
Plaintext

%{
#include <fxos/load.h>
#include <fxos/lang.h>
#include <fxos/disassembly.h>
#include <fxos/errors.h>
#include <fxos/util.h>
#include <cstdarg>
/* Text value for parser */
static char *yylval;
/* Argument tokens */
enum Token {
/* Instruction pattern and mnemonic */
PATTERN = 1, MNEMONIC,
/* General-purpose registers */
R0, RN, RM,
/* Banked registers */
R0_BANK, R1_BANK, R2_BANK, R3_BANK, R4_BANK, R5_BANK, R6_BANK, R7_BANK,
/* Control registers */
SR, PR, GBR, VBR, DBR, SSR, SPC, SGR, MACH, MACL,
/* PC-relative jumps and displacements (with 4-alignment correction) */
JUMP8, JUMP12, AT_DPC,
/* Immediate operands */
IMM,
/* Memory access with post-increment and pre-decrement */
AT_RN, AT_RM, AT_RMP, AT_RNP, AT_MRN,
/* Structure dereferencing */
AT_DRN, AT_DRM, AT_DGBR,
/* Array dereferencing */
AT_R0RN, AT_R0RM, AT_R0GBR,
};
/* Instruction opcode pattern */
struct Pattern {
/* 16-bit opcode, bits corresponding to arguments are clear */
uint16_t bits;
/* Position of the arguments */
uint8_t n_sh, m_sh, d_sh, i_sh;
/* Length of arguments, in bits */
uint16_t n_size, m_size, d_size, i_size;
};
/* Current file name */
static std::string filename;
/* Error messages and exceptions */
static void err(char const *format, ...)
{
static char buf[256];
va_list args;
va_start(args, format);
vsnprintf(buf, 256, format, args);
va_end(args);
throw FxOS::SyntaxError(filename.c_str(), yylineno, buf);
}
%}
%option prefix="asm"
%option noyywrap
%option nounput
pattern ^[01nmdi]{16}
mnemonic [a-zA-Z0-9./]+
space [ \t]+
%%
^#[^\n]* ;
{space} ;
, ;
[\n] yylineno++;
{pattern} { yylval = strdup(yytext); return PATTERN; }
^.{0,16} { err("invalid opcode at start of line"); }
"#imm" { return IMM; }
"rn" { return RN; }
"rm" { return RM; }
"jump8" { return JUMP8; }
"jump12" { return JUMP12; }
"@(disp,"[ ]*"pc)" { return AT_DPC; }
"@rn" { return AT_RN; }
"@rm" { return AT_RM; }
"@rm+" { return AT_RMP; }
"@rn+" { return AT_RNP; }
"@-rn" { return AT_MRN; }
"@(disp,"[ ]*"rn)" { return AT_DRN; }
"@(disp,"[ ]*"rm)" { return AT_DRM; }
"@(r0,"[ ]*"rn)" { return AT_R0RN; }
"@(r0,"[ ]*"rm)" { return AT_R0RM; }
"@(disp,"[ ]*"gbr)" { return AT_DGBR; }
"@(r0,"[ ]*"gbr)" { return AT_R0GBR; }
"r0" { return R0; }
"sr" { return SR; }
"pr" { return PR; }
"gbr" { return GBR; }
"vbr" { return VBR; }
"ssr" { return SSR; }
"spc" { return SPC; }
"sgr" { return SGR; }
"dbr" { return DBR; }
"r0_bank" { return R0_BANK; }
"r1_bank" { return R1_BANK; }
"r2_bank" { return R2_BANK; }
"r3_bank" { return R3_BANK; }
"r4_bank" { return R4_BANK; }
"r5_bank" { return R5_BANK; }
"r6_bank" { return R6_BANK; }
"r7_bank" { return R7_BANK; }
"mach" { return MACH; }
"macl" { return MACL; }
{mnemonic} { yylval = strdup(yytext); return MNEMONIC; }
. { err("lex error near '%s'", yytext); }
<<EOF>> { return -1; }
%%
namespace FxOS {
/* Build a pattern for an opcode.
@code 16-byte string using characters from "01mndi"
Returns a logical pattern representing the opcode and its arguments.
Each argument has two parameters: shift ("sh") and size ("size"). From an
instance of the instruction, the value of parameter x can be recovered as:
x = (opcode >> x_sh) & ((1 << x_size) - 1);
(Originally, the second parameter was named x_mask and was equal to the rhs
of the [&] operator. But this decoding method is now unused.) */
static Pattern make_pattern(char const *code)
{
Pattern p {};
for(int i = 0; i < 16; i++)
{
int c = code[i];
/* Constant bits */
if(c == '0' || c == '1')
{
p.bits = (p.bits << 1) | (c - '0');
continue;
}
/* Argument bits */
p.bits <<= 1;
if(c == 'n') p.n_sh = 15 - i, p.n_size++;
if(c == 'm') p.m_sh = 15 - i, p.m_size++;
if(c == 'd') p.d_sh = 15 - i, p.d_size++;
if(c == 'i') p.i_sh = 15 - i, p.i_size++;
}
return p;
}
/* Instantiate an argument token as an fxos language structure.
@token Argument token (tokens for other objets will be rejected)
@opsize Operation size indicated in the mnemonic
@m @n @d @i Instruction instance
Returns a semantic FxOS::Argument. */
static Argument make_arg(int token, int opsize, int m, int n, int d, int i)
{
/* TODO: This function is too slow for the ~100k times it is called. */
/* Registers rn and rm */
CpuRegister Rn(format("r%d", n & 0xf));
CpuRegister Rm(format("r%d", m & 0xf));
/* Sign extensions of d to 8 and 12 bits */
int32_t d8 = (int8_t)d;
int32_t d12 = (d & 0x800) ? (int32_t)(d | 0xfffff000) : (d);
/* Sign extension of i to 8 bits */
int32_t i8 = (int8_t)i;
using Reg = CpuRegister;
switch(token)
{
case R0: return Argument_Reg(Reg::R0);
case RN: return Argument_Reg(Rn);
case RM: return Argument_Reg(Rm);
case R0_BANK: return Argument_Reg(Reg::R0B);
case R1_BANK: return Argument_Reg(Reg::R1B);
case R2_BANK: return Argument_Reg(Reg::R2B);
case R3_BANK: return Argument_Reg(Reg::R3B);
case R4_BANK: return Argument_Reg(Reg::R4B);
case R5_BANK: return Argument_Reg(Reg::R5B);
case R6_BANK: return Argument_Reg(Reg::R6B);
case R7_BANK: return Argument_Reg(Reg::R7B);
case SR: return Argument_Reg(Reg::SR);
case PR: return Argument_Reg(Reg::PR);
case GBR: return Argument_Reg(Reg::GBR);
case VBR: return Argument_Reg(Reg::VBR);
case DBR: return Argument_Reg(Reg::DBR);
case SSR: return Argument_Reg(Reg::SSR);
case SPC: return Argument_Reg(Reg::SPC);
case SGR: return Argument_Reg(Reg::SGR);
case MACH: return Argument_Reg(Reg::MACH);
case MACL: return Argument_Reg(Reg::MACL);
case JUMP8: return Argument_PcJump(d8 * 2);
case JUMP12: return Argument_PcJump(d12 * 2);
case IMM: return Argument_Imm(i8);
case AT_RN: return Argument_Deref(Rn);
case AT_RM: return Argument_Deref(Rm);
case AT_RMP: return Argument_PostInc(Rm);
case AT_RNP: return Argument_PostInc(Rn);
case AT_MRN: return Argument_PreDec(Rn);
case AT_DRN: return Argument_StructDeref(d, opsize, Rn);
case AT_DRM: return Argument_StructDeref(d, opsize, Rm);
case AT_DGBR: return Argument_StructDeref(d, opsize, Reg::GBR);
case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn);
case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm);
case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR);
case AT_DPC:
if(!opsize) err("@(disp,pc) must have a size (.w, .l)");
return Argument_PcRel(d, opsize);
}
throw std::logic_error("lex asm builds args from bad tokens");
}
/* Record all the instances of an instruction in the disassembly table.
@p Instruction binary pattern
@mnemonic Mnemonic (especially important for operation size suffixes)
@argtoken1 Token corresponding to the first argument (0 if no argument)
@argtoken2 Token corresponding to the second argument (0 if unused)
Generates all the instances of the instruction, then sends them to the
disassembler for fast lookup. */
static void instantiate(struct Pattern p, std::string mnemonic, int argtoken1,
int argtoken2)
{
for(int n = 0; n < (1 << p.n_size); n++)
for(int m = 0; m < (1 << p.m_size); m++)
for(int d = 0; d < (1 << p.d_size); d++)
for(int i = 0; i < (1 << p.i_size); i++)
{
uint16_t opcode = p.bits;
opcode |= (n << p.n_sh);
opcode |= (m << p.m_sh);
opcode |= (d << p.d_sh);
opcode |= (i << p.i_sh);
Instruction ins(mnemonic);
ins.opcode = opcode;
if(argtoken1) ins.args.push_back(
make_arg(argtoken1, ins.opsize, m, n, d, i));
if(argtoken2) ins.args.push_back(
make_arg(argtoken2, ins.opsize, m, n, d, i));
register_instruction(ins);
}
}
/* Load an assembly instruction table for the disassembler. */
void load_asm(File &file, size_t start_offset, size_t start_line)
{
/* Lex all instructions and fill in the general assembly table */
YY_BUFFER_STATE buf = yy_scan_bytes(file.data() + start_offset,
file.size() - start_offset);
yylineno = start_line;
filename = file.path();
/* Insruction information */
char *code=nullptr, *mnemonic=nullptr;
int argtoken1=0, argtoken2=0;
/* Current line */
int line = -1;
while(1)
{
int t = yylex();
if(line >= 0 && (yylineno != line || t == PATTERN || t == -1))
{
/* Finalize current instruction */
if(!mnemonic) err("%d: missing mnemonic", line);
/* TODO: Generate all parameters and fill */
Pattern p = make_pattern(code);
instantiate(p, mnemonic, argtoken1, argtoken2);
if(code) free(code);
if(mnemonic) free(mnemonic);
}
if(t == -1) break;
if(t == PATTERN)
{
code = yylval;
line = yylineno;
mnemonic = nullptr;
argtoken1 = 0;
argtoken2 = 0;
}
else if(t == MNEMONIC && !mnemonic)
{
mnemonic = yylval;
}
else if(!mnemonic)
{
err("%d: missing mnemonic", line);
}
else if(!argtoken1)
{
argtoken1 = t;
}
else if(!argtoken2)
{
argtoken2 = t;
}
}
yy_delete_buffer(buf);
}
} /* namespace FxOS */