fxos/lib/load-asm.l

392 lines
12 KiB
Plaintext

%{
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/lang.h>
#include <fxos/function.h>
#include <fxos/util/format.h>
#include <fxos/util/log.h>
#include <cstdarg>
#include <string>
/* Text value for parser */
static char *yylval;
/* Argument tokens */
enum Token {
/* Instruction pattern and mnemonic */
PATTERN = 1, MNEMONIC,
/* General-purpose registers */
R0, RN, RM,
/* Banked registers */
R0_BANK, R1_BANK, R2_BANK, R3_BANK, R4_BANK, R5_BANK, R6_BANK, R7_BANK,
/* Control registers */
SR, PR, GBR, VBR, DBR, SSR, SPC, SGR, MACH, MACL,
/* PC-relative jumps and displacements (with 4-alignment correction) */
JUMP8, JUMP12, AT_DPC,
/* PC-relative address access (without memory access) */
DPC,
/* Immediate operands */
IMM,
/* Memory access with post-increment and pre-decrement */
AT_RN, AT_RM, AT_RMP, AT_RNP, AT_MRN,
/* Structure dereferencing */
AT_DRN, AT_DRM, AT_DGBR,
/* Array dereferencing */
AT_R0RN, AT_R0RM, AT_R0GBR,
/* Tags */
TAG_RET, TAG_UNCONDJUMP, TAG_CONDJUMP, TAG_DYNJUMP, TAG_CALL, TAG_DELAY,
TAG_ISLOT,
};
/* Instruction opcode pattern */
struct Pattern {
/* 16-bit opcode, bits corresponding to arguments are clear */
uint16_t bits;
/* Position of the arguments */
uint8_t n_sh, m_sh, d_sh, i_sh;
/* Length of arguments, in bits */
uint16_t n_size, m_size, d_size, i_size;
};
/* Current file name */
static std::string filename;
#define err(fmt, ...) \
FxOS_log(ERR, "%s:%d: " fmt, filename, yylineno, ##__VA_ARGS__)
%}
%option prefix="asm"
%option noyywrap
%option nounput
pattern ^[01nmdi]{16}
mnemonic [a-zA-Z0-9./]+
space [ \t]+
%%
^#[^\n]* ;
{space} ;
, ;
[\n] yylineno++;
{pattern} { yylval = strdup(yytext); return PATTERN; }
^.{0,16} { err("invalid opcode at start of line"); }
"#imm" { return IMM; }
"rn" { return RN; }
"rm" { return RM; }
"jump8" { return JUMP8; }
"jump12" { return JUMP12; }
"pc+disp" { return DPC; }
"@(disp,"[ ]*"pc)" { return AT_DPC; }
"@rn" { return AT_RN; }
"@rm" { return AT_RM; }
"@rm+" { return AT_RMP; }
"@rn+" { return AT_RNP; }
"@-rn" { return AT_MRN; }
"@(disp,"[ ]*"rn)" { return AT_DRN; }
"@(disp,"[ ]*"rm)" { return AT_DRM; }
"@(r0,"[ ]*"rn)" { return AT_R0RN; }
"@(r0,"[ ]*"rm)" { return AT_R0RM; }
"@(disp,"[ ]*"gbr)" { return AT_DGBR; }
"@(r0,"[ ]*"gbr)" { return AT_R0GBR; }
"r0" { return R0; }
"sr" { return SR; }
"pr" { return PR; }
"gbr" { return GBR; }
"vbr" { return VBR; }
"ssr" { return SSR; }
"spc" { return SPC; }
"sgr" { return SGR; }
"dbr" { return DBR; }
"r0_bank" { return R0_BANK; }
"r1_bank" { return R1_BANK; }
"r2_bank" { return R2_BANK; }
"r3_bank" { return R3_BANK; }
"r4_bank" { return R4_BANK; }
"r5_bank" { return R5_BANK; }
"r6_bank" { return R6_BANK; }
"r7_bank" { return R7_BANK; }
"mach" { return MACH; }
"macl" { return MACL; }
"%ret" { return TAG_RET; }
"%uncondjump" { return TAG_UNCONDJUMP; }
"%condjump" { return TAG_CONDJUMP; }
"%dynjump" { return TAG_DYNJUMP; }
"%call" { return TAG_CALL; }
"%delay" { return TAG_DELAY; }
"%islot" { return TAG_ISLOT; }
{mnemonic} { yylval = strdup(yytext); return MNEMONIC; }
. { err("lex error near %s", yytext); }
<<EOF>> { return -1; }
%%
namespace FxOS {
/* Build a pattern for an opcode.
@code 16-byte string using characters from "01mndi"
Returns a logical pattern representing the opcode and its arguments.
Each argument has two parameters: shift ("sh") and size ("size"). From an
instance of the instruction, the value of parameter x can be recovered as:
x = (opcode >> x_sh) & ((1 << x_size) - 1);
(Originally, the second parameter was named x_mask and was equal to the rhs
of the [&] operator. But this decoding method is now unused.) */
static Pattern make_pattern(char const *code)
{
Pattern p {};
for(int i = 0; i < 16; i++) {
int c = code[i];
/* Constant bits */
if(c == '0' || c == '1') {
p.bits = (p.bits << 1) | (c - '0');
continue;
}
/* Argument bits */
p.bits <<= 1;
if(c == 'n') p.n_sh = 15 - i, p.n_size++;
if(c == 'm') p.m_sh = 15 - i, p.m_size++;
if(c == 'd') p.d_sh = 15 - i, p.d_size++;
if(c == 'i') p.i_sh = 15 - i, p.i_size++;
}
return p;
}
/* Instantiate an argument token as an fxos language structure.
@token Argument token (tokens for other objets will be rejected)
@opsize Operation size indicated in the mnemonic
@m @n @d @i Instruction instance
Returns a semantic FxOS::Argument. */
static AsmOperand make_arg(int token, int opsize, int m, int n, int d, int i)
{
using Reg = CpuRegister;
static Reg general_purpose[16] = {
Reg::R0, Reg::R1, Reg::R2, Reg::R3, Reg::R4, Reg::R5,
Reg::R6, Reg::R7, Reg::R8, Reg::R9, Reg::R10, Reg::R11,
Reg::R12, Reg::R13, Reg::R14, Reg::R15,
};
/* Registers rn and rm */
CpuRegister Rn = general_purpose[n & 0xf];
CpuRegister Rm = general_purpose[m & 0xf];
/* Sign extensions of d to 8 and 12 bits */
int32_t d8 = (int8_t)d;
int32_t d12 = (d & 0x800) ? (int32_t)(d | 0xfffff000) : (d);
/* Sign extension of i to 8 bits */
int32_t i8 = (int8_t)i;
switch(token) {
case R0: return AsmOperand::mkReg(Reg::R0);
case RN: return AsmOperand::mkReg(Rn);
case RM: return AsmOperand::mkReg(Rm);
case R0_BANK: return AsmOperand::mkReg(Reg::R0B);
case R1_BANK: return AsmOperand::mkReg(Reg::R1B);
case R2_BANK: return AsmOperand::mkReg(Reg::R2B);
case R3_BANK: return AsmOperand::mkReg(Reg::R3B);
case R4_BANK: return AsmOperand::mkReg(Reg::R4B);
case R5_BANK: return AsmOperand::mkReg(Reg::R5B);
case R6_BANK: return AsmOperand::mkReg(Reg::R6B);
case R7_BANK: return AsmOperand::mkReg(Reg::R7B);
case SR: return AsmOperand::mkReg(Reg::SR);
case PR: return AsmOperand::mkReg(Reg::PR);
case GBR: return AsmOperand::mkReg(Reg::GBR);
case VBR: return AsmOperand::mkReg(Reg::VBR);
case DBR: return AsmOperand::mkReg(Reg::DBR);
case SSR: return AsmOperand::mkReg(Reg::SSR);
case SPC: return AsmOperand::mkReg(Reg::SPC);
case SGR: return AsmOperand::mkReg(Reg::SGR);
case MACH: return AsmOperand::mkReg(Reg::MACH);
case MACL: return AsmOperand::mkReg(Reg::MACL);
case JUMP8: return AsmOperand::mkPcJump(d8 * 2);
case JUMP12: return AsmOperand::mkPcJump(d12 * 2);
case DPC: return AsmOperand::mkPcAddr(d * 4);
case IMM: return AsmOperand::mkImm(i8);
case AT_RN: return AsmOperand::mkDeref(Rn, opsize);
case AT_RM: return AsmOperand::mkDeref(Rm, opsize);
case AT_RMP: return AsmOperand::mkPostInc(Rm, opsize);
case AT_RNP: return AsmOperand::mkPostInc(Rn, opsize);
case AT_MRN: return AsmOperand::mkPreDec(Rn, opsize);
case AT_DRN: return AsmOperand::mkStructDeref(d*opsize, opsize, Rn);
case AT_DRM: return AsmOperand::mkStructDeref(d*opsize, opsize, Rm);
case AT_DGBR: return AsmOperand::mkStructDeref(d*opsize, opsize, Reg::GBR);
case AT_R0RN: return AsmOperand::mkArrayDeref(Reg::R0, Rn, opsize);
case AT_R0RM: return AsmOperand::mkArrayDeref(Reg::R0, Rm, opsize);
case AT_R0GBR: return AsmOperand::mkArrayDeref(Reg::R0, Reg::GBR, opsize);
case AT_DPC:
if(!opsize)
err("@(disp,pc) must have a size (.w, .l)");
return AsmOperand::mkPcRel(d*opsize, opsize);
}
FxOS_log(ERR, "bad argument token: %d", token);
return AsmOperand::mkReg(Reg::UNDEFINED);
}
/* Record all the instances of an instruction in the disassembly table.
@p Instruction binary pattern
@mnemonic Mnemonic (especially important for operation size suffixes)
@argtoken1 Token corresponding to the first argument (0 if no argument)
@argtoken2 Token corresponding to the second argument (0 if unused)
@insntags Instruction tags
Generates all the instances of the instruction, then sends them to the
disassembler for fast lookup. Returns number of instantiated opcodes. */
static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1,
int argtoken2, int insntags)
{
int total = 0;
for(int n = 0; n < (1 << p.n_size); n++)
for(int m = 0; m < (1 << p.m_size); m++)
for(int d = 0; d < (1 << p.d_size); d++)
for(int i = 0; i < (1 << p.i_size); i++) {
// TODO: Support 32-bit instructions
uint16_t opcode = p.bits;
opcode |= (n << p.n_sh);
opcode |= (m << p.m_sh);
opcode |= (d << p.d_sh);
opcode |= (i << p.i_sh);
AsmOperand op1, op2;
int opCount = 0;
int opsize = 0;
std::string mn {mnemonic};
if(mn.ends_with(".b"))
opsize = 1;
else if(mn.ends_with(".w"))
opsize = 2;
else if(mn.ends_with(".l"))
opsize = 4;
if(argtoken1) {
op1 = make_arg(argtoken1, opsize, m, n, d, i);
opCount++;
}
if(argtoken2) {
op2 = make_arg(argtoken2, opsize, m, n, d, i);
opCount++;
}
AsmInstruction ins(opcode, mnemonic, insntags, opCount, op1, op2);
register_instruction(ins);
total++;
}
return total;
}
/* Get the AsmInstruction tag associated with a tag token. */
int get_tag(int t)
{
if(t == TAG_RET)
return AsmInstruction::Tag::IsReturn;
if(t == TAG_UNCONDJUMP)
return AsmInstruction::Tag::IsUnconditionalJump;
if(t == TAG_CONDJUMP)
return AsmInstruction::Tag::IsConditionalJump;
if(t == TAG_DYNJUMP)
return AsmInstruction::Tag::IsDynamicJump;
if(t == TAG_CALL)
return AsmInstruction::Tag::IsCall;
if(t == TAG_DELAY)
return AsmInstruction::Tag::HasDelaySlot;
if(t == TAG_ISLOT)
return AsmInstruction::Tag::IsInvalidDelaySlot;
return 0;
}
/* Load an assembly instruction table for the disassembler. */
int load_instructions(Buffer const &file)
{
/* Lex all instructions and fill in the general assembly table */
YY_BUFFER_STATE buf = yy_scan_bytes(file.data.get(), file.size);
yylineno = 1;
filename = file.path;
/* Number of instructions lexed */
int total = 0;
/* Instruction information */
char *code=nullptr, *mnemonic=nullptr;
int argtoken1=0, argtoken2=0;
int insntags=0;
/* Current line */
int line = -1;
while(1) {
int t = yylex();
int tag = get_tag(t);
if(line >= 0 && (yylineno != line || t == PATTERN || t == -1)) {
/* Finalize current instruction */
if(!mnemonic) {
err("missing mnemonic at line %d", line);
break;
}
Pattern p = make_pattern(code);
total += instantiate(p, mnemonic, argtoken1, argtoken2, insntags);
if(code) free(code);
if(mnemonic) free(mnemonic);
}
if(t == -1) break;
if(t == PATTERN) {
code = yylval;
line = yylineno;
mnemonic = nullptr;
argtoken1 = 0;
argtoken2 = 0;
insntags = 0;
}
else if(t == MNEMONIC && !mnemonic) {
mnemonic = yylval;
}
else if(!mnemonic) {
err("missing mnemonic at line %d", line);
break;
}
else if(tag) {
insntags |= tag;
}
else if(!argtoken1) {
argtoken1 = t;
}
else if(!argtoken2) {
argtoken2 = t;
}
}
yy_delete_buffer(buf);
return total;
}
} /* namespace FxOS */