initial system: instruction load, target creation

Contains basic stuff to start working:

* Loading files and assembly instruction descriptions
* Create a 64k-entry assembly decoding table
* Standard memory regions and information
* Create targets where files can be mapped at any addresses
* Load OSes and detect a few basic things
This commit is contained in:
Lephenixnoir 2019-12-14 22:33:57 +01:00
commit d78f7bca10
24 changed files with 2429 additions and 0 deletions

.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
# Build files
# Semantic exclude

Makefile Normal file
View File

@ -0,0 +1,124 @@
#! /usr/bin/make -f
# Tools
AR = ar
CC = gcc
CXX = g++
# Compiler flags
CFLAGS = -Wall -Wextra -I include -D_GNU_SOURCE -std=c++17 -O3 -flto \
# Linker flags
LDFLAGS = $(CFLAGS) -Lbin -lfxos
# Dependency generation flags
DEPFLAGS = -MT $@ -MMD -MP -MF $(@:%.o=%.d)
# Default install prefix (beware of sudo...)
PREFIX ?= $(HOME)/.local
# Main targets
TARGETS := bin/fxos
all: $(TARGETS)
all-lib: bin/libfxos.a
all-fxos: bin/fxos
obj = $($1:%=build/%.o)
lex = $($1:%.l=build/%.yy.c.o)
## The library
src-lib := $(wildcard lib/*.cpp lib/*/*.cpp lib/*/*/*.cpp)
lex-lib := $(wildcard lib/*.l lib/*/*.l lib/*/*.l)
obj-lib := $(call obj,src-lib) $(call lex,lex-lib)
bin/libfxos.a: $(obj-lib) | bin/
$(AR) rcs $@ $^
## The command-line interface
src-fxos := $(wildcard fxos/*.cpp fxos/*/*.cpp fxos/*/*/*.cpp)
obj-fxos := $(call obj,src-fxos)
bin/fxos: $(obj-fxos) bin/libfxos.a | bin/
$(CXX) $(obj-fxos) -o $@ $(LDFLAGS)
# Source targets
# C++ sources
build/%.cpp.o: %.cpp
@mkdir -p $(dir $@)
$(CXX) -c $< -o $@ $(CXXFLAGS) $(DEPFLAGS)
# C sources
build/%.c.o: %.c
@mkdir -p $(dir $@)
$(CC) -c $< -o $@ $(CFLAGS) $(DEPFLAGS)
# Flex lexers for the database
build/%.yy.c: %.l
flex -o $@ -s $<
build/%.yy.c.o: build/%.yy.c
$(CXX) -c $< -o $@ $(CFLAGS) -Wno-unused-function $(DEPFLAGS)
@mkdir -p $@
# Dependency generation
include $(wildcard build/*/*.d)
.PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean
.PRECIOUS: build/%.d build/%.yy.c
# Installing
m644 := -m 644
m755 := -m 755
sed := -i -e '/^PREFIX=\\$$/ a \$(PREFIX)'
# Disable -m on Mac OS and use sed differently
ifeq "$(shell uname)" "Darwin"
m644 :=
m755 :=
sed := -i '' -e "$$(printf '/^PREFIX=/ a \\\n$(PREFIX)')"
install: $(TARGETS)
install -d $(PREFIX)/bin
install -d $(PREFIX)/share/fxos
install $(TARGETS) $(m755) $(PREFIX)/bin
@echo "TODO: Install data files"
# install fxos/*.txt $(m644) $(PREFIX)/share/fxos
rm -f $(TARGETS:%=$(PREFIX)/%)
rm -rf $(PREFIX)/share/fxos
# Cleaning
@rm -rf build/lib
@rm -rf build/fxos
@rm -rf build
distclean: clean
@rm -rf bin

data/sh3.txt Normal file
View File

@ -0,0 +1,225 @@
type: assembly
name: sh-3
# Format: [01nmdi]{16}, followed by the mnemonic and the list of arguments.
# In each opcode, there should be at most one sequence of "m", "n", "d" and "i"
# each (representing the location of the argument).
# Possible argument strings are predefined and include:
# rn rm #imm
# jump8 jump12 disp pcdisp
# @rn @rm @rn+ @rm+ @-rn
# @(disp,rn) @(disp,rm) @(r0,rn) @(r0,rm) @(disp,gbr)
# The disassembler substitutes some elements as follows:
# rn -> value of the "n"-sequence
# rm -> value of the "m"-sequence
# #imm -> value of the "i"-sequence
# disp -> value of the "d"-sequence
# jump8 -> value of the 8-bit "d"-sequence x2 plus value of PC
# jump12 -> value of the 12-bit "d"-sequence x2 plus value of PC
# @(disp,pc) -> value of the 8-bit "d"-sequence x2 or x4, plus value of PC
# TODO: This list does not exactly reflect the behavior of the parser
0000000001001000 clrs
0000000000001000 clrt
0000000000101000 clrmac
0000000000011001 div0u
0000000000111000 ldtlb
0000000000001001 nop
0000000000101011 rte
0000000000001011 rts
0000000001011000 sets
0000000000011000 sett
0000000000011011 sleep
0100nnnn00010101 cmp/pl rn
0100nnnn00010001 cmp/pz rn
0100nnnn00010000 dt rn
0000nnnn00101001 movt rn
0100nnnn00000100 rotl rn
0100nnnn00000101 rotr rn
0100nnnn00100100 rotcl rn
0100nnnn00100101 rotcr rn
0100nnnn00100000 shal rn
0100nnnn00100001 shar rn
0100nnnn00000000 shll rn
0100nnnn00000001 shlr rn
0100nnnn00001000 shll2 rn
0100nnnn00001001 shlr2 rn
0100nnnn00011000 shll8 rn
0100nnnn00011001 shlr8 rn
0100nnnn00101000 shll16 rn
0100nnnn00101001 shlr16 rn
0011nnnnmmmm1100 add rm, rn
0011nnnnmmmm1110 addc rm, rn
0011nnnnmmmm1111 addv rm, rn
0010nnnnmmmm1001 and rm, rn
0011nnnnmmmm0000 cmp/eq rm, rn
0011nnnnmmmm0010 cmp/hs rm, rn
0011nnnnmmmm0011 cmp/ge rm, rn
0011nnnnmmmm0110 cmp/hi rm, rn
0011nnnnmmmm0111 cmp/gt rm, rn
0010nnnnmmmm1100 cmp/str rm, rn
0011nnnnmmmm0100 div1 rm, rn
0010nnnnmmmm0111 div0s rm, rn
0011nnnnmmmm1101 dmuls.l rm, rn
0011nnnnmmmm0101 dmulu.l rm, rn
0110nnnnmmmm1110 exts.b rm, rn
0110nnnnmmmm1111 exts.w rm, rn
0110nnnnmmmm1100 extu.b rm, rn
0110nnnnmmmm1101 extu.w rm, rn
0110nnnnmmmm0011 mov rm, rn
0000nnnnmmmm0111 mul.l rm, rn
0010nnnnmmmm1111 muls.w rm, rn
0010nnnnmmmm1110 mulu.w rm, rn
0110nnnnmmmm1011 neg rm, rn
0110nnnnmmmm1010 negc rm, rn
0110nnnnmmmm0111 not rm, rn
0010nnnnmmmm1011 or rm, rn
0100nnnnmmmm1100 shad rm, rn
0100nnnnmmmm1101 shld rm, rn
0011nnnnmmmm1000 sub rm, rn
0011nnnnmmmm1010 subc rm, rn
0011nnnnmmmm1011 subv rm, rn
0110nnnnmmmm1000 swap.b rm, rn
0110nnnnmmmm1001 swap.w rm, rn
0010nnnnmmmm1000 tst rm, rn
0010nnnnmmmm1010 xor rm, rn
0010nnnnmmmm1101 xtrct rm, rn
0100mmmm00001110 ldc rm, sr
0100mmmm00011110 ldc rm, gbr
0100mmmm00101110 ldc rm, vbr
0100mmmm00111110 ldc rm, ssr
0100mmmm01001110 ldc rm, spc
0100mmmm10001110 ldc rm, r0_bank
0100mmmm10011110 ldc rm, r1_bank
0100mmmm10101110 ldc rm, r2_bank
0100mmmm10111110 ldc rm, r3_bank
0100mmmm11001110 ldc rm, r4_bank
0100mmmm11011110 ldc rm, r5_bank
0100mmmm11101110 ldc rm, r6_bank
0100mmmm11111110 ldc rm, r7_bank
0100mmmm00001010 lds rm, mach
0100mmmm00011010 lds rm, macl
0100mmmm00101010 lds rm, pr
0000nnnn00000010 stc sr, rn
0000nnnn00010010 stc gbr, rn
0000nnnn00100010 stc vbr, rn
0000nnnn00110010 stc ssr, rn
0000nnnn01000010 stc spc, rn
0000nnnn10000010 stc r0_bank, rn
0000nnnn10010010 stc r1_bank, rn
0000nnnn10100010 stc r2_bank, rn
0000nnnn10110010 stc r3_bank, rn
0000nnnn11000010 stc r4_bank, rn
0000nnnn11010010 stc r5_bank, rn
0000nnnn11100010 stc r6_bank, rn
0000nnnn11110010 stc r7_bank, rn
0000nnnn00001010 sts mach, rn
0000nnnn00011010 sts macl, rn
0000nnnn00101010 sts pr, rn
0100nnnn00101011 jmp @rn
0100nnnn00001011 jsr @rn
0000nnnn10000011 pref @rn
0100nnnn00011011 tas.b @rn
0010nnnnmmmm0000 mov.b rm, @rn
0010nnnnmmmm0001 mov.w rm, @rn
0010nnnnmmmm0010 mov.l rm, @rn
0110nnnnmmmm0000 mov.b @rm, rn
0110nnnnmmmm0001 mov.w @rm, rn
0110nnnnmmmm0010 mov.l @rm, rn
0000nnnnmmmm1111 mac.l @rm+, @rn+
0100nnnnmmmm1111 mac.w @rm+, @rn+
0110nnnnmmmm0100 mov.b @rm+, rn
0110nnnnmmmm0101 mov.w @rm+, rn
0110nnnnmmmm0110 mov.l @rm+, rn
0100mmmm00000111 ldc.l @rm+, sr
0100mmmm00010111 ldc.l @rm+, gbr
0100mmmm00100111 ldc.l @rm+, vbr
0100mmmm00110111 ldc.l @rm+, ssr
0100mmmm01000111 ldc.l @rm+, spc
0100mmmm10000111 ldc.l @rm+, r0_bank
0100mmmm10010111 ldc.l @rm+, r1_bank
0100mmmm10100111 ldc.l @rm+, r2_bank
0100mmmm10110111 ldc.l @rm+, r3_bank
0100mmmm11000111 ldc.l @rm+, r4_bank
0100mmmm11010111 ldc.l @rm+, r5_bank
0100mmmm11100111 ldc.l @rm+, r6_bank
0100mmmm11110111 ldc.l @rm+, r7_bank
0100mmmm00000110 lds.l @rm+, mach
0100mmmm00010110 lds.l @rm+, macl
0100mmmm00100110 lds.l @rm+, pr
0010nnnnmmmm0100 mov.b rm, @-rn
0010nnnnmmmm0101 mov.w rm, @-rn
0010nnnnmmmm0110 mov.l rm, @-rn
0100nnnn00000011 stc.l sr, @-rn
0100nnnn00010011 stc.l gbr, @-rn
0100nnnn00100011 stc.l vbr, @-rn
0100nnnn00110011 stc.l ssr, @-rn
0100nnnn01000011 stc.l spc, @-rn
0100nnnn10000011 stc.l r0_bank, @-rn
0100nnnn10010011 stc.l r1_bank, @-rn
0100nnnn10100011 stc.l r2_bank, @-rn
0100nnnn10110011 stc.l r3_bank, @-rn
0100nnnn11000011 stc.l r4_bank, @-rn
0100nnnn11010011 stc.l r5_bank, @-rn
0100nnnn11100011 stc.l r6_bank, @-rn
0100nnnn11110011 stc.l r7_bank, @-rn
0100nnnn00000010 sts.l mach, @-rn
0100nnnn00010010 sts.l macl, @-rn
0100nnnn00100010 sts.l pr, @-rn
10000000nnnndddd mov.b r0, @(disp,rn)
10000001nnnndddd mov.w r0, @(disp,rn)
0001nnnnmmmmdddd mov.l rm, @(disp,rn)
10000100mmmmdddd mov.b @(disp,rm), r0
10000101mmmmdddd mov.w @(disp,rm), r0
0101nnnnmmmmdddd mov.l @(disp,rm), rn
0000nnnnmmmm0100 mov.b rm, @(r0,rn)
0000nnnnmmmm0101 mov.w rm, @(r0,rn)
0000nnnnmmmm0110 mov.l rm, @(r0,rn)
0000nnnnmmmm1100 mov.b @(r0,rm), rn
0000nnnnmmmm1101 mov.w @(r0,rm), rn
0000nnnnmmmm1110 mov.l @(r0,rm), rn
11000000dddddddd mov.b r0, @(disp,gbr)
11000001dddddddd mov.w r0, @(disp,gbr)
11000010dddddddd mov.l r0, @(disp,gbr)
11000100dddddddd mov.b @(disp,gbr), r0
11000101dddddddd mov.w @(disp,gbr), r0
11000110dddddddd mov.l @(disp,gbr), r0
11001101iiiiiiii and.b #imm, @(r0,gbr)
11001111iiiiiiii or.b #imm, @(r0,gbr)
11001100iiiiiiii tst.b #imm, @(r0,gbr)
11001110iiiiiiii xor.b #imm, @(r0,gbr)
1001nnnndddddddd mov.w @(disp,pc), rn
1101nnnndddddddd mov.l @(disp,pc), rn
11000111dddddddd mova.l @(disp,pc), r0
0000mmmm00100011 braf rm
0000mmmm00000011 bsrf rm
10001011dddddddd bf jump8
10001111dddddddd bf/s jump8
10001001dddddddd bt jump8
10001101dddddddd bt/s jump8
1010dddddddddddd bra jump12
1011dddddddddddd bsr jump12
0111nnnniiiiiiii add #imm, rn
11001001iiiiiiii and #imm, r0
10001000iiiiiiii cmp/eq #imm, r0
1110nnnniiiiiiii mov #imm, rn
11001011iiiiiiii or #imm, r0
11001000iiiiiiii tst #imm, r0
11001010iiiiiiii xor #imm, r0
11000011iiiiiiii trapa #imm

data/sh4.txt Normal file
View File

@ -0,0 +1,26 @@
type: assembly
name: sh-4a-extensions
0000nnnn01110011 movco.l r0, @rn
0000mmmm01100011 movli.l @rm, r0
0100mmmm10101001 movua.l @rm, r0
0100mmmm11101001 movua.l @rm+, r0
0000nnnn11000011 movca.l r0, @rn
0000nnnn11100011 icbi @rn
0000nnnn10010011 ocbi @rn
0000nnnn10100011 ocbp @rn
0000nnnn10110011 ocbwb @rn
0000nnnn11010011 prefi @rn
0000000010101011 synco
0100mmmm00111010 ldc rm, sgr
0100mmmm11111010 ldc rm, dbr
0100mmmm00110110 ldc.l @rm+, sgr
0100mmmm11110110 ldc.l @rm+, dbr
0000nnnn00111010 stc sgr, rn
0000nnnn11111010 stc dbr, rn
0100nnnn00110010 stc.l sgr, @-rn
0100nnnn11110010 stc.l dbr, @-rn

fxos/main.cpp Normal file
View File

@ -0,0 +1,79 @@
#include <fxos/lang.h>
#include <fxos/load.h>
#include <fxos/errors.h>
#include <fxos/os.h>
#include <cstdio>
using namespace FxOS;
char const *info_str =
"Header information:\n"
" Bootcode timestamp (DateA) (0x8000ffb0) : %s\n"
" Serial number (0x8000ffd0) : %s\n"
" Bootcode checksum (0x8000fffc) : 0x%s\n"
" OS version (0x80010020) : %s\n";
char const *footer_str =
"\nFooter information:\n"
" Detected footer address : 0x8%07x\n"
" Langdata entries found : %d\n"
" OS date (DateO) (0x8%07x)" " : %s\n"
" OS checksum (0x8%07x)" " : 0x%s\n";
char const *syscall_str =
"\nSyscall information:\n"
" Syscall table address (0x8001007c) : 0x%08x\n"
" Entries that point to valid memory : 0x%x\n"
" First seemingly invalid entry : 0x%08x\n"
" Syscall entries outside ROM:\n";
char const *syscall_nonrom_str =
" %%%03x -> 0x%08x (%s memory)\n";
void info(std::string path)
File file(path);
OS os(file);
Target t;
t.bind_region(MemoryRegion::ROM, file);
t.bind_region(MemoryRegion::ROM_P2, file);
uint32_t syscall_table = t.read_u32(0x8001007c);
uint32_t first_noncall = t.read_u32(syscall_table +
4 * os.syscall_count());
printf(syscall_str, syscall_table, os.syscall_count(), first_noncall);
int total = 0;
for(int i = 0; i < os.syscall_count(); i++)
uint32_t e = os.syscall(i);
MemoryRegion const *r = MemoryRegion::region_for(e);
if(!r || r->name == "ROM" || r->name == "ROM_P2") continue;
printf(syscall_nonrom_str, i, e, r->name.c_str());
if(!total) printf(" (none)\n");
int main(void)
catch(FxOS::SyntaxError &e)
std::cerr << e.file() << ":" << e.line() << ": " <<
e.what() << "\n" << std::flush;
return 1;
info("/home/lake/Documents/PC/Données/OS Graph 35+E II/3.10.bin");
return 0;

View File

@ -0,0 +1,22 @@
// fxos.disassembly: Disassembler
#include <fxos/lang.h>
#include <cstdint>
namespace FxOS {
/* Register an instruction.
@inst Instruction with [opcode] set to the binary pattern
Typically this is called by loader functions from data tables describing
instructions with parameters, not manually. See <fxos/load.h>. */
void register_instruction(Instruction ins);
} /* namespace FxOS */

include/fxos/endianness.h Normal file
View File

@ -0,0 +1,33 @@
// fxos.endianness. Somewhat cross-platform endianness conversion. (seriously?)
#if defined(__APPLE__)
#include <libkern/OSByteOrder.h>
#define htobe16(x) OSSwapHostToBigInt16(x)
#define htole16(x) OSSwapHostToLittleInt16(x)
#define be16toh(x) OSSwapBigToHostInt16(x)
#define le16toh(x) OSSwapLittleToHostInt16(x)
#define htobe32(x) OSSwapHostToBigInt32(x)
#define htole32(x) OSSwapHostToLittleInt32(x)
#define be32toh(x) OSSwapBigToHostInt32(x)
#define le32toh(x) OSSwapLittleToHostInt32(x)
#define htobe64(x) OSSwapHostToBigInt64(x)
#define htole64(x) OSSwapHostToLittleInt64(x)
#define be64toh(x) OSSwapBigToHostInt64(x)
#define le64toh(x) OSSwapLittleToHostInt64(x)
#elif defined(__linux__)
#include <sys/types.h>

include/fxos/errors.h Normal file
View File

@ -0,0 +1,40 @@
// fxos.errors: Exception specification
#include <exception>
#include <string>
namespace FxOS {
/* Syntax errors for fxos data files */
class SyntaxError: public std::exception
/* Specifies the file and line of the exception */
SyntaxError(char const *file, int line, char const *what):
m_file(file), m_line(line), m_what(what) {}
/* Provides access to these free objets */
char const *file() const noexcept {
return m_file;
int line() const noexcept {
return m_line;
char const *what() const noexcept override {
return m_what;
char const *m_file;
int m_line;
char const *m_what;
} /* namespace FxOS */
#endif /* LIBFXOS_ERRORS_H */

include/fxos/lang.h Normal file
View File

@ -0,0 +1,136 @@
// fxos.lang: Assembler language specification
#include <string>
#include <vector>
#include <cstdint>
namespace FxOS {
/* CPU register names, with a little meat for conversion to and from string */
class CpuRegister
enum CpuRegisterName {
/* Caller-saved general-purpose registers */
R0, R1, R2, R3, R4, R5, R6, R7,
/* Banked general-purpose registers. fxos does not account for
banking identities, these are just for naming and output. */
R0B, R1B, R2B, R3B, R4B, R5B, R6B, R7B,
/* Callee-saved general-purpose registers */
R8, R9, R10, R11, R12, R13, R14, R15,
/* System registers */
/* Control registers */
CpuRegister() = default;
/* Construction from CpuRegisterName */
constexpr CpuRegister(CpuRegisterName name): m_name(name) {}
/* Construction from string */
CpuRegister(std::string register_name);
/* Conversion to string */
std::string str() const noexcept;
/* Conversion to CpuRegisterName for switch statements */
constexpr operator CpuRegisterName() noexcept { return m_name; }
/* Comparison operators */
constexpr bool operator==(CpuRegister r) const {
return m_name == r.m_name;
constexpr bool operator!=(CpuRegister r) const {
return m_name != r.m_name;
CpuRegisterName m_name;
/* Addressing modes for arguments */
struct Argument
/* Various addressing modes in the language */
enum Kind {
Reg, /* rn */
Deref, /* @rn */
PostInc, /* @rn+ */
PreDec, /* @-rn */
StructDeref, /* @(disp,rn) or @(disp,gbr) */
ArrayDeref, /* @(r0,rn) or @(r0,gbr) */
PcRel, /* @(disp,pc) with 4-alignment correction */
PcJump, /* disp */
Imm, /* #imm */
Argument() = default;
/* String representation */
std::string str() const;
/* Addressing mode */
Kind kind;
/* Base register. Valid for all modes except Imm */
CpuRegister base;
/* Index register. Valid for ArrayDeref */
CpuRegister index;
/* Displacement in bytes. Valid for StructDeref, PcRel and PcJump */
int disp;
/* Operation size. Generally a multiplier for disp */
int opsize;
/* Immediate value. Valid for Imm */
int imm;
/* Argument constructors */
Argument Argument_Reg(CpuRegister base);
Argument Argument_Deref(CpuRegister base);
Argument Argument_PostInc(CpuRegister base);
Argument Argument_PreDec(CpuRegister base);
Argument Argument_StructDeref(int disp, int opsize, CpuRegister base);
Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base);
Argument Argument_PcRel(int disp, int opsize);
Argument Argument_PcJump(int disp);
Argument Argument_Imm(int imm);
/* Assembler instruction */
struct Instruction
Instruction() = default;
/* Construct with one or several arguments */
Instruction(std::string mnemonic);
Instruction(std::string mnemonic, Argument arg);
Instruction(std::string mnemonic, Argument arg1, Argument arg2);
/* Original opcode. Initialized to 0 when unset, which is an invalid
instruction by design. */
uint16_t opcode;
/* Mnemonic **without the size indicator** */
std::string mnemonic;
/* Operation size (0, 1, 2 or 4) */
int opsize;
/* Arguments */
std::vector<Argument> args;
} /* namespace FxOS */
#endif /* LIBFXOS_LANG_H */

include/fxos/load.h Normal file
View File

@ -0,0 +1,46 @@
// fxos.load: Data file lexers and loaders
#include <fxos/util.h>
#include <string>
#include <map>
namespace FxOS {
using Header = std::map<std::string, std::string>;
/* Load any fxos data file.
@file Data file, assumed to follow the fxos header and data format.
This function reads the header with load_header() then calls the appropriate
lexer and loader depending on the type specified in the header. */
void load(std::string path);
/* Load the header of a data file.
@file Data file, assumed with an fxos header type-specific contents
@offset Will be set to the byte offset where content starts
@line Will be set to the liner number where content starts
This function is used when reading all data files for fxos. The header
indicates the file type, thus the syntax of the contents. Some metadata can
also be specified here.
The parameters [offset] and [line] are set to reflect the location in the
file where the raw content starts. These parameters are used to initialize
the lexers in all other load functions. */
Header load_header(File &file, size_t &offset, int &line);
/* Load an assembly instruction table for the disassembler.
@file Data file, presumably analyzed with lex_header()
@start_offset Offset of assembly data in the file
@start_line Line where assembly data starts in the file (for errors) */
void load_asm(File &file, size_t start_offset, size_t start_line);
} /* namespace FxOS */
#endif /* LIBFXOS_LOAD_H */

include/fxos/memory.h Normal file
View File

@ -0,0 +1,98 @@
// fxos.memory: Standard memory regions
#include <string>
#include <cstdint>
namespace FxOS {
/* Memory area enumeration with a few tools */
class MemoryArea
enum MemoryAreaName {
/* Userspace seen from user and privileged mode */
U0, P0,
/* Second half of memory, only for privileged mode */
P1, P2, P3, P4,
MemoryArea() = default;
/* Construction from MemoryAreaName */
constexpr MemoryArea(MemoryAreaName name): m_name(name) {}
/* Start, end (last byte in area) and size of area */
uint32_t start() const noexcept;
uint32_t end() const noexcept;
uint32_t size() const noexcept;
/* Conversion to MemoryAreaName for switch */
constexpr operator MemoryAreaName() noexcept { return m_name; }
/* Comparison operators */
constexpr bool operator==(MemoryArea a) const {
return m_name == a.m_name;
constexpr bool operator!=(MemoryArea a) const {
return m_name != a.m_name;
MemoryAreaName m_name;
struct MemoryRegion
/* Address space regions that correspond to standard (ie. contiguous
multi-addressable) memory */
static MemoryRegion const &ROM;
static MemoryRegion const &RAM;
static MemoryRegion const &ROM_P2;
static MemoryRegion const &RAM_P2;
static MemoryRegion const &RS;
static MemoryRegion const &ILRAM;
static MemoryRegion const &XRAM;
static MemoryRegion const &YRAM;
/* Determine if an address falls into one of the standard regions.
Throws std::out_of_range if none. */
static MemoryRegion const *region_for(uint32_t address);
/* Short constructor which calls guess_flags() */
MemoryRegion(std::string name, uint32_t start, uint32_t end,
bool writable);
/* Region name */
std::string name {};
/* Start address and end address. Generally the end address has one
additionnaly byte. This is okay since no region is supposed to
extend to the very end of the memory. */
uint32_t start, end;
/* The region is writable under normal conditions */
bool writable;
/* The cache is active in that region (if enabled) */
bool cacheable;
/* The MMU is active in that region (if enabled) */
bool mappable;
/* Returns the size of the region */
uint32_t size() const noexcept;
/* Returns the area associated to the region (assuming it is fully
contained in one, which should always be the case) */
MemoryArea area() const noexcept;
/* Automatically guess the cacheable and mappable flags */
void guess_flags() noexcept;
} /* namespace FxOS */
#endif /* LIBFXOS_MEMORY_H */

include/fxos/operands.h Normal file
View File

@ -0,0 +1,108 @@
namespace FxOS {
/* Data types:
Registers longwords
Memory aligned u8, u16, u32
User-defined structs, ... */
/* Status register does not contain too much useful stuff:
(30)MD (29)RB -> Disassembler needs not understand them
(9)M (8)Q -> Idem, divisions are very rare
(7-4)IMASK (28)BL -> Interrupt system rarely used, even less explicitly
(1)S -> ?
(0)T -> Now THAT is important
Disassembler should name them but needs not understand their role, except T.
The T bit might just be stored outside. */
enum class DataKind {
/* Base types */
/* Bit fields over integers */
/* Structures (can only reside in memory) */
/* Arrays (can only reside in memory) */
class DataType
virtual DataKind kind() const noexcept = 0;
class IntegralType: public DataType
IntegralType(int bitsize);
DataKind kind() const noexcept override {
return DataKind::Integral;
int m_size;
class BitFieldType: public DataType
using Field = std::pair<int, std::string>;
BitFieldType(std::vector<Field> fields);
DataKind kind() const noexcept override {
return DataKind::BitField;
int m_size;
std::vector<Field> m_fields;
enum class OperandKind {
/* CPU-held registers accessed with instructions */
/* Standard randomly-addressable memory */
/* Memory-mapped module registers with specific access */
class Operand
/* Returns the operand kind (which is also the subclass identity) */
virtual OperandKind type() const noexcept = 0;
/* Sring representation */
virtual std::string str() const noexcept = 0;
class RegisterOperand: public Operand
RegisterOperand(std::string name): m_name(name) {}
OperandKind type() const noexcept override {
return OperandKind::CpuRegister;
std::string name() const noexcept {
return m_name;
std::string str() const noexcept override {
return this->name();
/* Register name for assembler listings */
std::string m_name;
} /* namespace FxOS */

include/fxos/os.h Normal file
View File

@ -0,0 +1,69 @@
// fxos.os: Operating system models and primitives
#ifndef LIBFXOS_OS_H
#define LIBFXOS_OS_H
#include <fxos/target.h>
#include <fxos/util.h>
#include <vector>
#include <map>
namespace FxOS {
class OS
/* Load an OS from a file. */
OS(File &file);
/* Get OS version */
std::string version() const noexcept;
/* Get number of syscalls */
int syscall_count() const noexcept;
/* Get a syscall entry */
uint32_t syscall(int id) const;
/* Find a syscall entry. Returns -1 if syscall is not found */
int find_syscall(uint32_t entry) const noexcept;
/* Get the footer address. Returns -1 if not found */
uint32_t footer() const noexcept;
/* Determine the OS version. This should be the first analysis function
to be called, because it determines the type of model (ie. fx9860g
versus fxcg50) thus the location of the syscall table and many more
important parameters. */
void parse_version();
/* Locate and parse the syscall table. */
void parse_syscall_table();
/* Locate the footer */
void parse_footer();
/* Working target which is a simulated memory with just the OS */
Target m_target;
// OS information
/* Version */
std::string m_version;
/* Syscall table, in order of syscall IDs */
std::vector<uint32_t> m_syscall_table;
/* Bimap converse, syscalls sorted by address */
std::map<uint32_t,int> m_syscall_addresses;
/* Footer address */
uint32_t m_footer;
} /* namespace FxOS */
#endif /* LIBFXOS_OS_H */

include/fxos/target.h Normal file
View File

@ -0,0 +1,121 @@
// The target which is being studied
#include <fxos/memory.h>
#include <fxos/util.h>
#include <vector>
#include <cstdint>
namespace FxOS {
class OS;
/* A common interface for simulated memory */
class AbstractMemory
/* Checks if an address or interval is simulated */
virtual bool covers(uint32_t addr, int size=1) const noexcept = 0;
/* Returns the data located at the provided virtual address. Throws
std::out_of_range if the interval is not entirely simulated */
virtual void const *translate(uint32_t addr, int size=1) const = 0;
/* Read data, with signed or unsigned extension. Virtual addresses
are used here, so they should be within the range of the region.
Throws std::out_of_range if this is not satisfied.
These functions do *not* check alignment because exceptionally there
are instructions which can read unaligned (movua.l). Check it
yourself! */
int32_t read_i8 (uint32_t addr) const;
uint32_t read_u8 (uint32_t addr) const;
int32_t read_i16(uint32_t addr) const;
uint32_t read_u16(uint32_t addr) const;
int32_t read_i32(uint32_t addr) const;
uint32_t read_u32(uint32_t addr) const;
/* Search a binary pattern in the specified area. Returns the virtual
address of the first occurrence if any is found, [end] otherwise. */
virtual uint32_t search(uint32_t start, uint32_t end,
void const *pattern, int size) const = 0;
/* A binding of a data buffer into a memory region of the target. */
struct Binding: public AbstractMemory
/* Constructor from file */
Binding(MemoryRegion const &region, File &file);
/* Targeted region, might overlap with other bindings */
MemoryRegion region;
/* Actual data. This buffer must have at least [size] bytes */
uint8_t *data;
/* Binding size, is the minimum of the region size and the data size */
uint32_t size;
/* Checks if an address is covered by the binding */
bool covers(uint32_t addr, int size=1) const noexcept override;
/* Returns this process' address (in [data]) corresponding to the
provided virtual address */
void const *translate(uint32_t addr, int size=1) const override;
/* Search a pattern */
uint32_t search(uint32_t start, uint32_t end, void const *pattern,
int size) const override;
class Target: public AbstractMemory
/* Create an empty target with no sections */
/* Bind an OS. This is used to either disassemble the OS itself, or
select the OS version for which code is being disassembled (typical
use is for add-ins). */
void bind_os(OS &os);
/* Bind a memory region from a file. The region can either be standard
(see <fxos/memory.h>) or custom.
If several loaded regions overlap on some addresses, *the last
loaded region will be used*. Thus, new regions can be loaded to
selectively override parts of the target.
Generally it is preferable to bind an OS image to the target's ROM
area using bind_os(), rather than using bind_region(). This is
because bind_os() which will also enable OS-specific tasks such as
syscall resolution.
If the file is smaller than the region being bound, the region is
shrunk to fit the file. */
void bind_region(MemoryRegion const &region, File &file);
/* Check if an address is bound */
bool covers(uint32_t addr, int size=1) const noexcept override;
/* Returns the data at the provided virtual address */
void const *translate(uint32_t addr, int size=1) const override;
/* Search a pattern */
uint32_t search(uint32_t start, uint32_t end, void const *pattern,
int size) const override;
/* Bound OS image */
OS *m_os;
/* Bound regions (in order of binding) */
std::vector<Binding> m_bindings;
} /* namespace FxOS */
#endif /* LIBFXOS_TARGET_H */

include/fxos/util.h Normal file
View File

@ -0,0 +1,74 @@
// fxos.util: Utility functions
#include <memory>
#include <iostream>
#include <string>
#include <memory>
#include <cstdio>
/* Format a string with printf() syntax */
template<typename ... Args>
std::string format(std::string const &format, Args ... args)
/* Reserve space for snprintf() to put its NUL */
size_t size = snprintf(nullptr, 0, format.c_str(), args ...) + 1;
std::unique_ptr<char[]> buf(new char[size]);
snprintf(buf.get(), size, format.c_str(), args ...);
/* Remove the NUL from the string */
return std::string(buf.get(), buf.get() + size - 1);
/* An RAII contiguous memory buffer */
class Buffer
/* Empty buffer initialized with given byte */
Buffer(int size, int fill=0x00);
/* Buffer initialized from file, reading the given size and offset.
* Default offset is beginning of file.
* Default size (-1) is file size. If the specified region ends after
the end of the file, the buffer is padded. */
Buffer(std::string filepath, int size=-1, int offset=0, int fill=0x00);
/* Create a buffer by copying (and possibly resizing) another buffer */
Buffer(Buffer const &other, int new_size=-1);
/* Size */
int size() const noexcept;
/* A file abstraction that supports both direct load and memory mapping */
class File
/* Load a file, either by buffer or by memory mapping */
File(std::string path, bool mmap=false);
/* Get the path, size and loading address of the file */
std::string path() const noexcept;
size_t size() const noexcept;
char *data() const noexcept;
/* Free the allocated buffers */
/* Path to file */
std::string m_path;
/* Size of buffer, or mapping */
size_t m_size;
/* Whether mmap() was used on the file */
bool m_mmap;
/* Data buffer (m_mmap=false) or mapping address (m_mmap=true) */
char *m_addr;
#endif /* LIBFXOS_UTIL_H */

lib/disassembly.cpp Normal file
View File

@ -0,0 +1,24 @@
#include <fxos/disassembly.h>
#include <optional>
#include <array>
namespace FxOS {
/* Instruction map */
static std::array<std::optional<Instruction>,65536> insmap;
/* Register an instruction at a given opcode. */
void register_instruction(Instruction ins)
uint16_t opcode = ins.opcode;
throw std::logic_error("opcode collision");
insmap[opcode] = ins;
} /* namespace FxOS */

lib/lang.cpp Normal file
View File

@ -0,0 +1,225 @@
#include <fxos/lang.h>
#include <fxos/util.h>
#include <stdexcept>
#include <string>
#include <map>
namespace FxOS {
// CPU registers
using Reg = CpuRegister::CpuRegisterName;
static std::map<Reg, std::string> regnames = {
{ Reg::R0, "r0" },
{ Reg::R1, "r1" },
{ Reg::R2, "r2" },
{ Reg::R3, "r3" },
{ Reg::R4, "r4" },
{ Reg::R5, "r5" },
{ Reg::R6, "r6" },
{ Reg::R7, "r7" },
{ Reg::R0B, "r0_bank" },
{ Reg::R1B, "r1_bank" },
{ Reg::R2B, "r2_bank" },
{ Reg::R3B, "r3_bank" },
{ Reg::R4B, "r4_bank" },
{ Reg::R5B, "r5_bank" },
{ Reg::R6B, "r6_bank" },
{ Reg::R7B, "r7_bank" },
{ Reg::R8, "r8" },
{ Reg::R9, "r9" },
{ Reg::R10, "r10" },
{ Reg::R11, "r11" },
{ Reg::R12, "r12" },
{ Reg::R13, "r13" },
{ Reg::R14, "r14" },
{ Reg::R15, "r15" },
{ Reg::MACH, "mach" },
{ Reg::MACL, "macl" },
{ Reg::PR, "pr" },
{ Reg::PC, "pc" },
{ Reg::SR, "sr" },
{ Reg::SSR, "ssr" },
{ Reg::SPC, "spc" },
{ Reg::GBR, "gbr" },
{ Reg::VBR, "vbr" },
{ Reg::DBR, "dbr" },
{ Reg::SGR, "sgr" },
/* Construction from string - pretty slow */
CpuRegister::CpuRegister(std::string name)
for(auto it = regnames.begin(); it != regnames.end(); it++)
if(it->second == name)
m_name = it->first;
throw std::invalid_argument("invalid CpuRegister name");
/* Conversion to string */
std::string CpuRegister::str() const noexcept
// Instruction arguments
/* External constructors */
Argument Argument_Reg(CpuRegister base)
Argument arg;
arg.kind = Argument::Reg;
arg.base = base;
return arg;
Argument Argument_Deref(CpuRegister base)
Argument arg;
arg.kind = Argument::Deref;
arg.base = base;
return arg;
Argument Argument_PostInc(CpuRegister base)
Argument arg;
arg.kind = Argument::PostInc;
arg.base = base;
return arg;
Argument Argument_PreDec(CpuRegister base)
Argument arg;
arg.kind = Argument::PreDec;
arg.base = base;
return arg;
Argument Argument_StructDeref(int disp, int opsize, CpuRegister base)
Argument arg;
arg.kind = Argument::StructDeref;
arg.base = base;
arg.disp = disp;
arg.opsize = opsize;
return arg;
Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base)
Argument arg;
arg.kind = Argument::ArrayDeref;
arg.base = base;
arg.index = index;
return arg;
Argument Argument_PcRel(int disp, int opsize)
Argument arg;
arg.kind = Argument::PcRel;
arg.disp = disp;
arg.opsize = opsize;
return arg;
Argument Argument_PcJump(int disp)
Argument arg;
arg.kind = Argument::PcJump;
arg.disp = disp;
return arg;
Argument Argument_Imm(int imm)
Argument arg;
arg.kind = Argument::Imm;
arg.imm = imm;
return arg;
/* String representation */
std::string Argument::str() const
case Argument::Reg:
return format("r%d", base);
case Argument::Deref:
return format("@r%d", base);
case Argument::PostInc:
return format("@r%d+", base);
case Argument::PreDec:
return format("@-%dr", base);
case Argument::StructDeref:
return format("@(%d,%s)", disp, base.str().c_str());
case Argument::ArrayDeref:
return format("@(%s,%s)", index.str().c_str(),
case Argument::PcRel:
return format("@(%d,pc)", disp);
case Argument::PcJump:
return format("pc+%d", disp);
return "(invalid)";
// Instruction creation
Instruction::Instruction(std::string mn):
opcode(0), opsize(0)
int pos = std::max(0, (int)mn.size() - 2);
if(mn.substr(pos, 2) == ".b")
opsize = 1;
mn.erase(pos, 2);
else if(mn.substr(pos, 2) == ".w")
opsize = 2;
mn.erase(pos, 2);
else if(mn.substr(pos, 2) == ".l")
opsize = 4;
mn.erase(pos, 2);
mnemonic = mn;
Instruction::Instruction(std::string mn, Argument arg):
Instruction::Instruction(std::string mn, Argument arg1, Argument arg2):
} /* namespace FxOS */

lib/load-asm.l Normal file
View File

@ -0,0 +1,332 @@
#include <fxos/load.h>
#include <fxos/lang.h>
#include <fxos/disassembly.h>
#include <fxos/errors.h>
#include <fxos/util.h>
#include <cstdarg>
/* Text value for parser */
static char *yylval;
/* Argument tokens */
enum Token {
/* Instruction pattern and mnemonic */
/* General-purpose registers */
R0, RN, RM,
/* Banked registers */
/* Control registers */
/* PC-relative jumps and displacements (with 4-alignment correction) */
/* Immediate operands */
/* Memory access with post-increment and pre-decrement */
/* Structure dereferencing */
/* Array dereferencing */
/* Instruction opcode pattern */
struct Pattern {
/* 16-bit opcode, bits corresponding to arguments are clear */
uint16_t bits;
/* Position of the arguments */
uint8_t n_sh, m_sh, d_sh, i_sh;
/* Length of arguments, in bits */
uint16_t n_size, m_size, d_size, i_size;
/* Current file name */
static std::string filename;
/* Error messages and exceptions */
static void err(char const *format, ...)
static char buf[256];
va_list args;
va_start(args, format);
vsnprintf(buf, 256, format, args);
throw FxOS::SyntaxError(filename.c_str(), yylineno, buf);
%option prefix="asm"
%option noyywrap
%option nounput
pattern ^[01nmdi]{16}
mnemonic [a-zA-Z0-9./]+
space [ \t]+
^#[^\n]* ;
{space} ;
, ;
[\n] yylineno++;
{pattern} { yylval = strdup(yytext); return PATTERN; }
^.{0,16} { err("invalid opcode at start of line"); }
"#imm" { return IMM; }
"rn" { return RN; }
"rm" { return RM; }
"jump8" { return JUMP8; }
"jump12" { return JUMP12; }
"@(disp,"[ ]*"pc)" { return AT_DPC; }
"@rn" { return AT_RN; }
"@rm" { return AT_RM; }
"@rm+" { return AT_RMP; }
"@rn+" { return AT_RNP; }
"@-rn" { return AT_MRN; }
"@(disp,"[ ]*"rn)" { return AT_DRN; }
"@(disp,"[ ]*"rm)" { return AT_DRM; }
"@(r0,"[ ]*"rn)" { return AT_R0RN; }
"@(r0,"[ ]*"rm)" { return AT_R0RM; }
"@(disp,"[ ]*"gbr)" { return AT_DGBR; }
"@(r0,"[ ]*"gbr)" { return AT_R0GBR; }
"r0" { return R0; }
"sr" { return SR; }
"pr" { return PR; }
"gbr" { return GBR; }
"vbr" { return VBR; }
"ssr" { return SSR; }
"spc" { return SPC; }
"sgr" { return SGR; }
"dbr" { return DBR; }
"r0_bank" { return R0_BANK; }
"r1_bank" { return R1_BANK; }
"r2_bank" { return R2_BANK; }
"r3_bank" { return R3_BANK; }
"r4_bank" { return R4_BANK; }
"r5_bank" { return R5_BANK; }
"r6_bank" { return R6_BANK; }
"r7_bank" { return R7_BANK; }
"mach" { return MACH; }
"macl" { return MACL; }
{mnemonic} { yylval = strdup(yytext); return MNEMONIC; }
. { err("lex error near '%s'", yytext); }
<<EOF>> { return -1; }
namespace FxOS {
/* Build a pattern for an opcode.
@code 16-byte string using characters from "01mndi"
Returns a logical pattern representing the opcode and its arguments.
Each argument has two parameters: shift ("sh") and size ("size"). From an
instance of the instruction, the value of parameter x can be recovered as:
x = (opcode >> x_sh) & ((1 << x_size) - 1);
(Originally, the second parameter was named x_mask and was equal to the rhs
of the [&] operator. But this decoding method is now unused.) */
static Pattern make_pattern(char const *code)
Pattern p {};
for(int i = 0; i < 16; i++)