Casio_asm/assembler/assembler.c

673 lines
16 KiB
C

#include "assembler.h"
//prototypes
int readChar(assembler_t *status);
void unreadChar(assembler_t *status);
int readLabel(assembler_t *status, int mode);
int A_readOpcode(assembler_t *status, int mode);
int pushOpcode(assembler_t *status, int mode);
int readImmediateArg(assembler_t *status);
int readRegisterArg(assembler_t *status, int no);
int readLabelArg(assembler_t *status, int mode);
int parseCode(assembler_t *status, int pass);
//BEGIN char detection macros
//detect hex digits (a-fA-F0-9)
#define isHex(c) (((c)>='a'&&(c)<='f')||((c)>='A'&&(c)<='F')||((c)>='0'&&(c)<='9'))
//detect whitespace (new line, carriage return, tabulation and space)
#define isWhitespace(c) ((c)==' '||(c)=='\t'||(c)=='\n'||(c)=='\r')
//detect delimiters (coma and semicolon)
#define isDelimiter(c) ((c)==','||(c)==';')
//detect comment starters (single or double quote and pipe)
#define isComment(c) ((c)=='\''||(c)=='|'||(c)=='\"')
//detect label characters (letters and underscore, or number after the first one)
#define isLabelStart(c) ((c)=='_'||((c)>='a'&&(c)<='z')||((c)>='A'&&(c)<='Z'))
#define isLabel(c) (isLabelStart((c))||((c)>='0'&&(c)<='9'))
//detect EOF
#define isEOF(status) ((status)->pos>=(status)->in->size)
//END char detection macros
//BEGIN info functions
#define info(status, str) appendStringBuffer((status)->stdout, (str))
#define printPos(status) do{info((status), itodec((status)->line));info((status), ":");info((status), itodec((status)->col));}while(0)
#define error(status, str) do{info((status), (str));info((status), " at ");printPos((status));info((status), "\n");}while(0)
//END info functions
//opcode functions
#define opcodeLen(op) (((op).extended?2:1)+((op).nArgs==3?2:(op).nArgs))
//BEGIN constants
//function modes
#define MODE_MOCK 0
#define MODE_ACT 1
//assembler passes
#define PASS_LABELS 0
#define PASS_OPCODES 1
//readChar flags
#define FLAG_NEWLINE 0x1
#define FLAG_COMMENT 0x2
#define FLAG_DELIMITER 0x4
#define FLAG_WHITESPACE 0x8
#define FLAG_HEX 0x10
#define FLAG_LABEL 0x20
#define FLAG_EOF 0x40
//END constants
//read a single char from the code stream
int readChar(assembler_t *status) {
if(isEOF(status)) return FLAG_EOF;
int flags=0;
char c=getByteBuffer(status->in, status->pos++);
if(c=='\n') {
status->line++;
status->col=1;
flags|=FLAG_NEWLINE;
} else {
status->col++;
}
if(isComment(c)) flags|=FLAG_COMMENT;
if(isDelimiter(c)) flags|=FLAG_DELIMITER;
if(isWhitespace(c)) flags|=FLAG_WHITESPACE;
if(isHex(c)) flags|=FLAG_HEX;
if(isLabel(c)) flags|=FLAG_LABEL;
if(c=='\0') flags|=FLAG_EOF;
status->chr=c;
return flags;
}
//unreads a char from the code stream
void unreadChar(assembler_t *status) {
status->pos--;
status->col--;
if(status->col<=0) {
status->line--;
}
status->chr=getByteBuffer(status->in, status->pos);
}
//reads a label
int readLabel(assembler_t *status, int mode) {
char label[21];
int pos=0;
int ok;
//check if we have enough room for labels
if(status->labelCount==64&&mode==MODE_ACT) {
error(status, "Too many labels");
return 0;
}
//read the label
ok=readChar(status)&FLAG_LABEL;
if(ok&&isLabelStart(status->chr)) {
label[pos++]=status->chr;
} else {
error(status, "Illegal first character in label");
return 0;
}
while((readChar(status)&FLAG_LABEL)&&pos<20) {
label[pos++]=status->chr;
if(isEOF(status)) {
error(status, "Unexpected EOF while reading label");
return 0;
}
}
label[pos++]='\0';
//check if we're still in a legal state
if(!isWhitespace(status->chr)) {
error(status, "Label is not followed by whitespace");
return 0;
}
if(mode==MODE_ACT) {
//log this
info(status, "Found label \'");
info(status, label);
info(status, "\' at ");
printPos(status);
info(status, "\n");
//hash the label to save some space
int hash=strsumd(label);
//add it to our label list
status->labels[status->labelCount].offset=status->offset;
status->labels[status->labelCount++].hash=hash;
}
return 1;
}
//reads an opcode
int A_readOpcode(assembler_t *status, int mode) {
char name[OPCODE_NAME_LEN];
int pos=0;
int ok;
opcode_info_t op;
//read the first char
ok=readChar(status)&FLAG_LABEL;
//check if we have an extension info
if(status->chr=='E') {
status->op.extended=1;
//make sure the next character is a delimiter
if(!(readChar(status)&FLAG_DELIMITER)) {
error(status, "Illegal character between E and mnemonic");
return 0;
}
//come back to where we left off
ok=readChar(status)&FLAG_LABEL;
} else {
status->op.extended=0;
}
if(ok&&isLabelStart(status->chr)) {
name[pos++]=status->chr;
} else {
error(status, "Illegal first character in opcode");
return 0;
}
while((readChar(status)&FLAG_LABEL)&&pos<(OPCODE_NAME_LEN-1)) {
name[pos++]=status->chr;
if(isEOF(status)) {
error(status, "Unexpected EOF while reading opcode");
return 0;
}
}
//null-terminate the string
name[pos++]='\0';
//set the instruction number for the mnemonic we read
if(!Opcode_getOpByName(name, status->op.extended, &op)) {
error(status, "Invalid mnemonic");
info(status, "Faulting mnemonic was \'");
info(status, name);
info(status, "\'\n");
return 0;
}
status->op.code=op.code;
if(mode==MODE_ACT) {
//log this
info(status, "Found opcode \'");
info(status, op.name);
info(status, "\' at ");
printPos(status);
info(status, "\n");
}
//see if we're done or not
if(!isDelimiter(status->chr)) {
//check if we're still in a legal state
//~readChar(status);
if(!isWhitespace(status->chr)) {
error(status, "Opcode is not followed by whitespace");
return 0;
}
//add the opcode
status->op.nArgs=0;
return pushOpcode(status, mode);
}
if(readChar(status)==FLAG_EOF) {
error(status, "Unexpected EOF after opcode");
return 0;
}
if(status->chr=='#') {
//we have at least one register
status->op.nArgs=1;
if(!readRegisterArg(status, 0)) return 0;
//check if we're still legal
if(isEOF(status)) {
error(status, "Unexpected EOF after argument");
return 0;
}
//check if we have a second argument
if(isDelimiter(status->chr)) {
//we do indeed have one
readChar(status);
if(status->chr!='#') {
error(status, "Unexpected character after argument");
}
status->op.nArgs=2;
if(!readRegisterArg(status, 1)) return 0;
//check if we're still legal
if(readChar(status)==FLAG_EOF) {
error(status, "Unexpected EOF after argument");
return 0;
}
}
//check if we're still legal
if(!isWhitespace(status->chr)) {
error(status, "Arguments are not followed by whitespace");
return 0;
}
//add the opcode
return pushOpcode(status, mode);
} else if(status->chr=='.') {
//we have a label
status->op.nArgs=3;
if(!readLabelArg(status, mode)) return 0;
//check if we're still in a legal state
if(!isWhitespace(status->chr)) {
error(status, "Label argument is not followed by whitespace");
return 0;
}
//add the opcode
return pushOpcode(status, mode);
} else {
//we have an immediate argument
status->op.nArgs=3;
unreadChar(status);
if(!readImmediateArg(status)) return 0;
//check if we're still in a legal state
if(!isWhitespace(status->chr)) {
error(status, "Immediate argument is not followed by whitespace");
return 0;
}
//add the opcode
return pushOpcode(status, mode);
}
}
//pushes an opcode
int pushOpcode(assembler_t *status, int mode) {
opcode_any_t op=status->op;
int len=opcodeLen(op);
status->offset+=len;
if(mode==MODE_ACT) {
if(status->op.extended) {
opcode_ext_t opcode;
opcode.nArgs=op.nArgs;
opcode.op=op.code;
opcode.ext=OP_extend;
if(op.nArgs) {
if(op.nArgs==3) {
opcode.imm=op.immediate;
} else {
for(int i=0; i<op.nArgs; i++) opcode.args[i]=op.args[i];
}
}
if(!appendDataBuffer(status->out, &opcode, len)) return 0;
} else {
opcode_t opcode;
opcode.nArgs=op.nArgs;
opcode.op=op.code;
if(op.nArgs) {
if(op.nArgs==3) {
opcode.imm=op.immediate;
} else {
for(int i=0; i<op.nArgs; i++) opcode.args[i]=op.args[i];
}
}
if(!appendDataBuffer(status->out, &opcode, len)) return 0;
}
}
return 1;
}
//reads an immediate argument
int readImmediateArg(assembler_t *status) {
readChar(status);
char* ptr=(char*) status->in->data+status->pos;
if(status->chr=='x') {
status->op.immediate=hextoi(ptr);
while(readChar(status)&FLAG_HEX);
} else if(status->chr=='b') {
status->op.immediate=bintoi(ptr);
while(readChar(status)&FLAG_HEX);
} else {
ptr--;
status->op.immediate=dectoi(ptr);
while(readChar(status)&FLAG_HEX);
}
return !isEOF(status);
}
//reads a register argument
int readRegisterArg(assembler_t *status, int no) {
char* ptr=(char*) status->in->data+status->pos;
status->op.args[no]=hextoi(ptr);
while(readChar(status)&FLAG_HEX);
return !isEOF(status);
}
//reads a label argument
int readLabelArg(assembler_t *status, int mode) {
char label[21];
int pos=0;
int ok;
//read the label
ok=readChar(status)&FLAG_LABEL;
if(ok&&isLabelStart(status->chr)) {
label[pos++]=status->chr;
} else {
error(status, "Illegal first character in label");
return 0;
}
while((readChar(status)&FLAG_LABEL)&&pos<20) {
label[pos++]=status->chr;
if(isEOF(status)) {
error(status, "Unexpected EOF while reading label");
return 0;
}
}
label[pos++]='\0';
//check if we're still in a legal state
if(!isWhitespace(status->chr)) {
error(status, "Label is not followed by whitespace");
return 0;
}
if(mode==MODE_ACT) {
label_t lbl;
int hash=strsumd(label);
//iterate the labels
for(int i=0; i<status->labelCount; i++) {
lbl=status->labels[i];
if(lbl.hash==hash) {
//add it as an argument
status->op.immediate=lbl.offset;
info(status, "Label is number 0x");
info(status, itohex(i, 2));
info(status, " which points to offset 0x");
info(status, itohex(lbl.offset, 8));
info(status, "\n");
return 1;
}
}
//we didn't find it
error(status, "Didn't find label");
info(status, "Label was \'");
info(status, label);
info(status, "\'\n");
return 0;
} else {
return 1;
}
}
//do a single pass
int parseCode(assembler_t *status, int pass) {
while(!isEOF(status)) {
//skip all whitespace
while(readChar(status)&FLAG_WHITESPACE);
//skip comments
if(isComment(status->chr)) {
while(!(readChar(status)&FLAG_NEWLINE));
continue;
}
//check if we're done
if(isEOF(status)) return 1;
if(status->chr=='.') {
//read label
if(!readLabel(status, pass==PASS_LABELS?MODE_ACT:MODE_MOCK)) return 0;
} else {
//read opcode
unreadChar(status);
if(!A_readOpcode(status, pass==PASS_OPCODES?MODE_ACT:MODE_MOCK)) return 0;
}
}
return 1;
}
//actually do it now
int Assembler_assemble(assembler_t *status) {
//backup the variables that will be modified
int pos=status->pos;
int offset=status->offset;
int line=status->line;
int col=status->col;
//first pass, read the labels
info(status, "Doing first pass, reading labels\n");
if(!parseCode(status, 0)) {
error(status, "First pass failed");
return 0;
}
//restore the variables
status->pos=pos;
status->offset=offset;
status->line=line;
status->col=col;
//second pass, read the opcodes
info(status, "Doing second pass, reading opcodes\n");
if(!parseCode(status, 1)) {
error(status, "Second pass failed");
return 0;
}
return 1;
}
#define msg(a) appendStringBuffer(&infoBuf, a)
//assemble the file directly
int Assembler_doFile(char* inFile, char* outFile, char* cfgFile, buffer_t** infoBuffer) {
//create the buffers
buffer_t inBuf, outBuf, infoBuf, cfgBuf;
allocBuffer(&infoBuf, 100);
infoBuf.size=0;
*infoBuffer=&infoBuf;
//open the relevant files
int inFD=File_open(inFile, FILE_OPEN_read);
int outFD=File_open(outFile, FILE_OPEN_write|FILE_OPEN_create);
int cfgFD=cfgFile?File_open(cfgFile, FILE_OPEN_read):-1;
//setup the relevant variables
assembler_config_t config;
source_file_t header;
mapping_t mapping;
assembler_t assembler;
//check if anything went wrong
if(inFD<0) {
msg("Couldn't open input file for reading\n");
return EOPEN1;
} else if(outFD<0) {
msg("Couldn't open output file for writing\n");
return EOPEN2;
} else if(cfgFile&&cfgFD<0) {
msg("Couldn't open config file for reading\n");
return EOPEN3;
}
//read source file
int srcLen=File_length(inFD);
if(srcLen<=0) {
msg("Couldn't get input file length\n");
return ELEN;
}
if(!allocBuffer(&inBuf, srcLen)) {
msg("Couldn't allocate input buffer\n");
return EALLOC;
}
if(File_read(inFD, 0, inBuf.data, srcLen)!=srcLen) {
msg("Error reading source file\n");
freeBuffer(&inBuf);
return EREAD;
}
File_close(inFD);
//read config file
int cfgLen=-1;
if(cfgFile) {
cfgLen=File_length(cfgFD);
if(cfgLen<=0) {
msg("Couldn't get config file length\n");
freeBuffer(&inBuf);
return ELEN;
}
if(!allocBuffer(&cfgBuf, cfgLen)) {
msg("Couldn't allocate config buffer\n");
freeBuffer(&inBuf);
return EALLOC;
}
if(File_read(cfgFD, 0, cfgBuf.data, cfgLen)!=cfgLen) {
msg("Error reading config file\n");
freeBuffer(&inBuf);
freeBuffer(&cfgBuf);
return EREAD;
}
File_close(inFD);
if(!readConfig(&cfgBuf, &config)) {
msg("Error while reading config file\n");
freeBuffer(&inBuf);
freeBuffer(&cfgBuf);
return ECFG;
}
freeBuffer(&cfgBuf);
}
//allocate output buffer
if(!allocBuffer(&outBuf, 256)) {
msg("Couldn't allocate output buffer\n");
freeBuffer(&inBuf);
if(cfgFile) freeBuffer(&cfgBuf);
return EALLOC;
}
outBuf.size=0;
//prepare the struct
if(cfgFile) assembler.offset=config.codeOffset;
else assembler.offset=0x0000;
assembler.pos=0;
assembler.line=1;
assembler.col=1;
assembler.labelCount=0;
assembler.in=&inBuf;
assembler.out=&outBuf;
assembler.stdout=&infoBuf;
//load everything we need
Opcode_registerOpcodes();
//make sure opcodes are loaded correctly
opcode_info_t op;
if(!Opcode_getOpByName("add_i", 0, &op)) {
msg("Cannot continue: uninitialized opcodes\n");
freeBuffer(&inBuf);
freeBuffer(&outBuf);
return EINIT;
}
//assemble the code
if(!Assembler_assemble(&assembler)) {
msg("Failed to assemble the code\n");
freeBuffer(&inBuf);
freeBuffer(&outBuf);
return EASM;
}
//compute the header
header.magic=MAGIC;
header.hash=strsuml((char*) inBuf.data, inBuf.size);
header.version=SOURCE_VERSION;
freeBuffer(&inBuf);
//write header to file
int outputPos=0;
if(cfgFile) {
//add the header
header.entryPoint=config.entryPoint;
header.mappings=config.mappingCount;
File_write(outFD, outputPos, &header, HEADER_LENGTH);
outputPos+=HEADER_LENGTH;
for(int i=0; i<config.mappingCount; i++) {
//read the mapping and add it
assembler_mapping_t map=config.mappings[i];
mapping.offset=map.offset;
mapping.length=map.length;
mapping.type=map.type;
mapping.access=map.access;
File_write(outFD, outputPos, &mapping, MAPPING_LENGTH);
outputPos+=MAPPING_LENGTH;
//write the filename if we have one
if(mapping.type==MAPPING_TYPE_file||mapping.type==MAPPING_TYPE_file_raw) {
//name length
big_endian_int_t bei={.value=map.filenameLen};
File_write(outFD, outputPos, &bei, sizeof(bei));
outputPos+=sizeof(bei);
//name itself
File_write(outFD, outputPos, &map.filename, map.filenameLen);
outputPos+=map.filenameLen;
}
}
} else {
//we start at 0 in a regular script, and have 3 mappings
header.entryPoint=0x0000;
header.mappings=3;
File_write(outFD, outputPos, &header, HEADER_LENGTH);
outputPos+=HEADER_LENGTH;
//we map the ROM at offset 0x0000
mapping.offset=0x0000;
mapping.length=outBuf.size;
mapping.type=MAPPING_TYPE_rom;
mapping.access=MAPPING_ACCESS_r|MAPPING_ACCESS_x;
File_write(outFD, outputPos, &mapping, MAPPING_LENGTH);
outputPos+=MAPPING_LENGTH;
//we map 1kB of RAM at offset 0x10000
mapping.offset=0x10000;
mapping.length=1024;
mapping.type=MAPPING_TYPE_ram;
mapping.access=MAPPING_ACCESS_r|MAPPING_ACCESS_w;
File_write(outFD, outputPos, &mapping, MAPPING_LENGTH);
outputPos+=MAPPING_LENGTH;
//we map the stack at offset 0x20000
mapping.offset=0x20000;
mapping.length=-1;
mapping.type=MAPPING_TYPE_stack;
mapping.access=MAPPING_ACCESS_r|MAPPING_ACCESS_w;
File_write(outFD, outputPos, &mapping, MAPPING_LENGTH);
outputPos+=MAPPING_LENGTH;
}
//write bytecode to file
if(File_write(outFD, outputPos, outBuf.data, outBuf.size)<0) {
msg("Can not write bytecode to file");
freeBuffer(&outBuf);
return EWRITE;
}
outputPos+=outBuf.size;
if(File_truncate(outFD, outputPos)) {
msg("Cannot truncate output file\n");
freeBuffer(&outBuf);
return ETRUNC;
}
freeBuffer(&outBuf);
File_close(outFD);
return 0;
}