fxos/lib/load-asm.l

344 lines
9.5 KiB
Plaintext

%{
#include <fxos/load.h>
#include <fxos/lang.h>
#include <fxos/disassembly.h>
#include <fxos/errors.h>
#include <fxos/util.h>
#include <cstdarg>
/* Text value for parser */
static char *yylval;
/* Argument tokens */
enum Token {
/* Instruction pattern and mnemonic */
PATTERN = 1, MNEMONIC,
/* General-purpose registers */
R0, RN, RM,
/* Banked registers */
R0_BANK, R1_BANK, R2_BANK, R3_BANK, R4_BANK, R5_BANK, R6_BANK, R7_BANK,
/* Control registers */
SR, PR, GBR, VBR, DBR, SSR, SPC, SGR, MACH, MACL,
/* PC-relative jumps and displacements (with 4-alignment correction) */
JUMP8, JUMP12, AT_DPC,
/* PC-relative address access (without memory access) */
DPC,
/* Immediate operands */
IMM,
/* Memory access with post-increment and pre-decrement */
AT_RN, AT_RM, AT_RMP, AT_RNP, AT_MRN,
/* Structure dereferencing */
AT_DRN, AT_DRM, AT_DGBR,
/* Array dereferencing */
AT_R0RN, AT_R0RM, AT_R0GBR,
};
/* Instruction opcode pattern */
struct Pattern {
/* 16-bit opcode, bits corresponding to arguments are clear */
uint16_t bits;
/* Position of the arguments */
uint8_t n_sh, m_sh, d_sh, i_sh;
/* Length of arguments, in bits */
uint16_t n_size, m_size, d_size, i_size;
};
/* Current file name */
static std::string filename;
/* Error messages and exceptions */
static void err(char const *format, ...)
{
static char buf[256];
va_list args;
va_start(args, format);
vsnprintf(buf, 256, format, args);
va_end(args);
throw FxOS::SyntaxError(filename.c_str(), yylineno, buf);
}
%}
%option prefix="asm"
%option noyywrap
%option nounput
pattern ^[01nmdi]{16}
mnemonic [a-zA-Z0-9./]+
space [ \t]+
%%
^#[^\n]* ;
{space} ;
, ;
[\n] yylineno++;
{pattern} { yylval = strdup(yytext); return PATTERN; }
^.{0,16} { err("invalid opcode at start of line"); }
"#imm" { return IMM; }
"rn" { return RN; }
"rm" { return RM; }
"jump8" { return JUMP8; }
"jump12" { return JUMP12; }
"pc+disp" { return DPC; }
"@(disp,"[ ]*"pc)" { return AT_DPC; }
"@rn" { return AT_RN; }
"@rm" { return AT_RM; }
"@rm+" { return AT_RMP; }
"@rn+" { return AT_RNP; }
"@-rn" { return AT_MRN; }
"@(disp,"[ ]*"rn)" { return AT_DRN; }
"@(disp,"[ ]*"rm)" { return AT_DRM; }
"@(r0,"[ ]*"rn)" { return AT_R0RN; }
"@(r0,"[ ]*"rm)" { return AT_R0RM; }
"@(disp,"[ ]*"gbr)" { return AT_DGBR; }
"@(r0,"[ ]*"gbr)" { return AT_R0GBR; }
"r0" { return R0; }
"sr" { return SR; }
"pr" { return PR; }
"gbr" { return GBR; }
"vbr" { return VBR; }
"ssr" { return SSR; }
"spc" { return SPC; }
"sgr" { return SGR; }
"dbr" { return DBR; }
"r0_bank" { return R0_BANK; }
"r1_bank" { return R1_BANK; }
"r2_bank" { return R2_BANK; }
"r3_bank" { return R3_BANK; }
"r4_bank" { return R4_BANK; }
"r5_bank" { return R5_BANK; }
"r6_bank" { return R6_BANK; }
"r7_bank" { return R7_BANK; }
"mach" { return MACH; }
"macl" { return MACL; }
{mnemonic} { yylval = strdup(yytext); return MNEMONIC; }
. { err("lex error near '%s'", yytext); }
<<EOF>> { return -1; }
%%
namespace FxOS {
/* Build a pattern for an opcode.
@code 16-byte string using characters from "01mndi"
Returns a logical pattern representing the opcode and its arguments.
Each argument has two parameters: shift ("sh") and size ("size"). From an
instance of the instruction, the value of parameter x can be recovered as:
x = (opcode >> x_sh) & ((1 << x_size) - 1);
(Originally, the second parameter was named x_mask and was equal to the rhs
of the [&] operator. But this decoding method is now unused.) */
static Pattern make_pattern(char const *code)
{
Pattern p {};
for(int i = 0; i < 16; i++)
{
int c = code[i];
/* Constant bits */
if(c == '0' || c == '1')
{
p.bits = (p.bits << 1) | (c - '0');
continue;
}
/* Argument bits */
p.bits <<= 1;
if(c == 'n') p.n_sh = 15 - i, p.n_size++;
if(c == 'm') p.m_sh = 15 - i, p.m_size++;
if(c == 'd') p.d_sh = 15 - i, p.d_size++;
if(c == 'i') p.i_sh = 15 - i, p.i_size++;
}
return p;
}
/* Instantiate an argument token as an fxos language structure.
@token Argument token (tokens for other objets will be rejected)
@opsize Operation size indicated in the mnemonic
@m @n @d @i Instruction instance
Returns a semantic FxOS::Argument. */
static Argument make_arg(int token, int opsize, int m, int n, int d, int i)
{
/* TODO: This function is too slow for the ~100k times it is called. */
/* Registers rn and rm */
CpuRegister Rn(format("r%d", n & 0xf));
CpuRegister Rm(format("r%d", m & 0xf));
/* Sign extensions of d to 8 and 12 bits */
int32_t d8 = (int8_t)d;
int32_t d12 = (d & 0x800) ? (int32_t)(d | 0xfffff000) : (d);
/* Sign extension of i to 8 bits */
int32_t i8 = (int8_t)i;
using Reg = CpuRegister;
switch(token)
{
case R0: return Argument_Reg(Reg::R0);
case RN: return Argument_Reg(Rn);
case RM: return Argument_Reg(Rm);
case R0_BANK: return Argument_Reg(Reg::R0B);
case R1_BANK: return Argument_Reg(Reg::R1B);
case R2_BANK: return Argument_Reg(Reg::R2B);
case R3_BANK: return Argument_Reg(Reg::R3B);
case R4_BANK: return Argument_Reg(Reg::R4B);
case R5_BANK: return Argument_Reg(Reg::R5B);
case R6_BANK: return Argument_Reg(Reg::R6B);
case R7_BANK: return Argument_Reg(Reg::R7B);
case SR: return Argument_Reg(Reg::SR);
case PR: return Argument_Reg(Reg::PR);
case GBR: return Argument_Reg(Reg::GBR);
case VBR: return Argument_Reg(Reg::VBR);
case DBR: return Argument_Reg(Reg::DBR);
case SSR: return Argument_Reg(Reg::SSR);
case SPC: return Argument_Reg(Reg::SPC);
case SGR: return Argument_Reg(Reg::SGR);
case MACH: return Argument_Reg(Reg::MACH);
case MACL: return Argument_Reg(Reg::MACL);
case JUMP8: return Argument_PcJump(d8 * 2);
case JUMP12: return Argument_PcJump(d12 * 2);
case DPC: return Argument_PcAddr(d * 4);
case IMM: return Argument_Imm(i8);
case AT_RN: return Argument_Deref(Rn);
case AT_RM: return Argument_Deref(Rm);
case AT_RMP: return Argument_PostInc(Rm);
case AT_RNP: return Argument_PostInc(Rn);
case AT_MRN: return Argument_PreDec(Rn);
case AT_DRN: return Argument_StructDeref(d*opsize, opsize, Rn);
case AT_DRM: return Argument_StructDeref(d*opsize, opsize, Rm);
case AT_DGBR: return Argument_StructDeref(d*opsize, opsize, Reg::GBR);
case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn);
case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm);
case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR);
case AT_DPC:
if(!opsize) err("@(disp,pc) must have a size (.w, .l)");
return Argument_PcRel(d*opsize, opsize);
}
throw std::logic_error("lex asm builds args from bad tokens");
}
/* Record all the instances of an instruction in the disassembly table.
@p Instruction binary pattern
@mnemonic Mnemonic (especially important for operation size suffixes)
@argtoken1 Token corresponding to the first argument (0 if no argument)
@argtoken2 Token corresponding to the second argument (0 if unused)
Generates all the instances of the instruction, then sends them to the
disassembler for fast lookup. Returns number of instantiated opcodes. */
static int instantiate(struct Pattern p, std::string mnemonic, int argtoken1,
int argtoken2)
{
int total = 0;
for(int n = 0; n < (1 << p.n_size); n++)
for(int m = 0; m < (1 << p.m_size); m++)
for(int d = 0; d < (1 << p.d_size); d++)
for(int i = 0; i < (1 << p.i_size); i++)
{
uint16_t opcode = p.bits;
opcode |= (n << p.n_sh);
opcode |= (m << p.m_sh);
opcode |= (d << p.d_sh);
opcode |= (i << p.i_sh);
Instruction ins(mnemonic);
ins.opcode = opcode;
if(argtoken1) ins.args.push_back(
make_arg(argtoken1, ins.opsize, m, n, d, i));
if(argtoken2) ins.args.push_back(
make_arg(argtoken2, ins.opsize, m, n, d, i));
register_instruction(ins);
total++;
}
return total;
}
/* Load an assembly instruction table for the disassembler. */
int load_asm(Buffer const &file, size_t start_offset, size_t start_line)
{
/* Lex all instructions and fill in the general assembly table */
YY_BUFFER_STATE buf = yy_scan_bytes(file.data.get() + start_offset,
file.size - start_offset);
yylineno = start_line;
filename = file.path;
/* Number of instructions lexed */
int total = 0;
/* Instruction information */
char *code=nullptr, *mnemonic=nullptr;
int argtoken1=0, argtoken2=0;
/* Current line */
int line = -1;
while(1)
{
int t = yylex();
if(line >= 0 && (yylineno != line || t == PATTERN || t == -1))
{
/* Finalize current instruction */
if(!mnemonic) err("%d: missing mnemonic", line);
Pattern p = make_pattern(code);
total += instantiate(p, mnemonic, argtoken1,argtoken2);
if(code) free(code);
if(mnemonic) free(mnemonic);
}
if(t == -1) break;
if(t == PATTERN)
{
code = yylval;
line = yylineno;
mnemonic = nullptr;
argtoken1 = 0;
argtoken2 = 0;
}
else if(t == MNEMONIC && !mnemonic)
{
mnemonic = yylval;
}
else if(!mnemonic)
{
err("%d: missing mnemonic", line);
}
else if(!argtoken1)
{
argtoken1 = t;
}
else if(!argtoken2)
{
argtoken2 = t;
}
}
yy_delete_buffer(buf);
return total;
}
} /* namespace FxOS */