commit d78f7bca1020b0db7917fd1d0dcf71eed742e85a Author: Lephenixnoir Date: Sat Dec 14 22:33:57 2019 +0100 initial system: instruction load, target creation Contains basic stuff to start working: * Loading files and assembly instruction descriptions * Create a 64k-entry assembly decoding table * Standard memory regions and information * Create targets where files can be mapped at any addresses * Load OSes and detect a few basic things diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92f6ab5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Build files +/build +/bin + +# Semantic exclude +/exclude diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5662bef --- /dev/null +++ b/Makefile @@ -0,0 +1,124 @@ +#! /usr/bin/make -f + +# Tools +AR = ar +CC = gcc +CXX = g++ + +# Compiler flags +CFLAGS = -Wall -Wextra -I include -D_GNU_SOURCE -std=c++17 -O3 -flto \ + -fno-diagnostics-show-line-numbers +CXXFLAGS = $(CFLAGS) +# Linker flags +LDFLAGS = $(CFLAGS) -Lbin -lfxos +# Dependency generation flags +DEPFLAGS = -MT $@ -MMD -MP -MF $(@:%.o=%.d) + +# Default install prefix (beware of sudo...) +PREFIX ?= $(HOME)/.local + +# +# Main targets +# + +TARGETS := bin/fxos + +all: $(TARGETS) + +all-lib: bin/libfxos.a +all-fxos: bin/fxos + +obj = $($1:%=build/%.o) +lex = $($1:%.l=build/%.yy.c.o) + +## The library + +src-lib := $(wildcard lib/*.cpp lib/*/*.cpp lib/*/*/*.cpp) +lex-lib := $(wildcard lib/*.l lib/*/*.l lib/*/*.l) + +obj-lib := $(call obj,src-lib) $(call lex,lex-lib) + +bin/libfxos.a: $(obj-lib) | bin/ + $(AR) rcs $@ $^ + +## The command-line interface + +src-fxos := $(wildcard fxos/*.cpp fxos/*/*.cpp fxos/*/*/*.cpp) +obj-fxos := $(call obj,src-fxos) + +bin/fxos: $(obj-fxos) bin/libfxos.a | bin/ + $(CXX) $(obj-fxos) -o $@ $(LDFLAGS) + +# +# Source targets +# + +# C++ sources +build/%.cpp.o: %.cpp + @mkdir -p $(dir $@) + $(CXX) -c $< -o $@ $(CXXFLAGS) $(DEPFLAGS) + +# C sources +build/%.c.o: %.c + @mkdir -p $(dir $@) + $(CC) -c $< -o $@ $(CFLAGS) $(DEPFLAGS) + +# Flex lexers for the database +build/%.yy.c: %.l + flex -o $@ -s $< +build/%.yy.c.o: build/%.yy.c + $(CXX) -c $< -o $@ $(CFLAGS) -Wno-unused-function $(DEPFLAGS) + +bin/: + @mkdir -p $@ + +# +# Dependency generation +# + +include $(wildcard build/*/*.d) + +.PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean + +.PRECIOUS: build/%.d build/%.yy.c + +# +# Installing +# + +m644 := -m 644 +m755 := -m 755 +sed := -i -e '/^PREFIX=\\$$/ a \$(PREFIX)' + +# Disable -m on Mac OS and use sed differently +ifeq "$(shell uname)" "Darwin" +m644 := +m755 := +sed := -i '' -e "$$(printf '/^PREFIX=/ a \\\n$(PREFIX)')" +endif + +install: $(TARGETS) + install -d $(PREFIX)/bin + install -d $(PREFIX)/share/fxos + install $(TARGETS) $(m755) $(PREFIX)/bin + @echo "TODO: Install data files" + @false +# install fxos/*.txt $(m644) $(PREFIX)/share/fxos + +uninstall: + rm -f $(TARGETS:%=$(PREFIX)/%) + rm -rf $(PREFIX)/share/fxos + +# +# Cleaning +# + +clean-lib: + @rm -rf build/lib +clean-fxos: + @rm -rf build/fxos + +clean: + @rm -rf build +distclean: clean + @rm -rf bin diff --git a/data/sh3.txt b/data/sh3.txt new file mode 100644 index 0000000..066cef1 --- /dev/null +++ b/data/sh3.txt @@ -0,0 +1,225 @@ +type: assembly +name: sh-3 +--- + +# Format: [01nmdi]{16}, followed by the mnemonic and the list of arguments. +# In each opcode, there should be at most one sequence of "m", "n", "d" and "i" +# each (representing the location of the argument). +# +# Possible argument strings are predefined and include: +# rn rm #imm +# jump8 jump12 disp pcdisp +# @rn @rm @rn+ @rm+ @-rn +# @(disp,rn) @(disp,rm) @(r0,rn) @(r0,rm) @(disp,gbr) +# +# The disassembler substitutes some elements as follows: +# rn -> value of the "n"-sequence +# rm -> value of the "m"-sequence +# #imm -> value of the "i"-sequence +# disp -> value of the "d"-sequence +# jump8 -> value of the 8-bit "d"-sequence x2 plus value of PC +# jump12 -> value of the 12-bit "d"-sequence x2 plus value of PC +# @(disp,pc) -> value of the 8-bit "d"-sequence x2 or x4, plus value of PC +# TODO: This list does not exactly reflect the behavior of the parser + +0000000001001000 clrs +0000000000001000 clrt +0000000000101000 clrmac +0000000000011001 div0u +0000000000111000 ldtlb +0000000000001001 nop +0000000000101011 rte +0000000000001011 rts +0000000001011000 sets +0000000000011000 sett +0000000000011011 sleep + +0100nnnn00010101 cmp/pl rn +0100nnnn00010001 cmp/pz rn +0100nnnn00010000 dt rn +0000nnnn00101001 movt rn +0100nnnn00000100 rotl rn +0100nnnn00000101 rotr rn +0100nnnn00100100 rotcl rn +0100nnnn00100101 rotcr rn +0100nnnn00100000 shal rn +0100nnnn00100001 shar rn +0100nnnn00000000 shll rn +0100nnnn00000001 shlr rn +0100nnnn00001000 shll2 rn +0100nnnn00001001 shlr2 rn +0100nnnn00011000 shll8 rn +0100nnnn00011001 shlr8 rn +0100nnnn00101000 shll16 rn +0100nnnn00101001 shlr16 rn + +0011nnnnmmmm1100 add rm, rn +0011nnnnmmmm1110 addc rm, rn +0011nnnnmmmm1111 addv rm, rn +0010nnnnmmmm1001 and rm, rn +0011nnnnmmmm0000 cmp/eq rm, rn +0011nnnnmmmm0010 cmp/hs rm, rn +0011nnnnmmmm0011 cmp/ge rm, rn +0011nnnnmmmm0110 cmp/hi rm, rn +0011nnnnmmmm0111 cmp/gt rm, rn +0010nnnnmmmm1100 cmp/str rm, rn +0011nnnnmmmm0100 div1 rm, rn +0010nnnnmmmm0111 div0s rm, rn +0011nnnnmmmm1101 dmuls.l rm, rn +0011nnnnmmmm0101 dmulu.l rm, rn +0110nnnnmmmm1110 exts.b rm, rn +0110nnnnmmmm1111 exts.w rm, rn +0110nnnnmmmm1100 extu.b rm, rn +0110nnnnmmmm1101 extu.w rm, rn +0110nnnnmmmm0011 mov rm, rn +0000nnnnmmmm0111 mul.l rm, rn +0010nnnnmmmm1111 muls.w rm, rn +0010nnnnmmmm1110 mulu.w rm, rn +0110nnnnmmmm1011 neg rm, rn +0110nnnnmmmm1010 negc rm, rn +0110nnnnmmmm0111 not rm, rn +0010nnnnmmmm1011 or rm, rn +0100nnnnmmmm1100 shad rm, rn +0100nnnnmmmm1101 shld rm, rn +0011nnnnmmmm1000 sub rm, rn +0011nnnnmmmm1010 subc rm, rn +0011nnnnmmmm1011 subv rm, rn +0110nnnnmmmm1000 swap.b rm, rn +0110nnnnmmmm1001 swap.w rm, rn +0010nnnnmmmm1000 tst rm, rn +0010nnnnmmmm1010 xor rm, rn +0010nnnnmmmm1101 xtrct rm, rn + +0100mmmm00001110 ldc rm, sr +0100mmmm00011110 ldc rm, gbr +0100mmmm00101110 ldc rm, vbr +0100mmmm00111110 ldc rm, ssr +0100mmmm01001110 ldc rm, spc +0100mmmm10001110 ldc rm, r0_bank +0100mmmm10011110 ldc rm, r1_bank +0100mmmm10101110 ldc rm, r2_bank +0100mmmm10111110 ldc rm, r3_bank +0100mmmm11001110 ldc rm, r4_bank +0100mmmm11011110 ldc rm, r5_bank +0100mmmm11101110 ldc rm, r6_bank +0100mmmm11111110 ldc rm, r7_bank +0100mmmm00001010 lds rm, mach +0100mmmm00011010 lds rm, macl +0100mmmm00101010 lds rm, pr +0000nnnn00000010 stc sr, rn +0000nnnn00010010 stc gbr, rn +0000nnnn00100010 stc vbr, rn +0000nnnn00110010 stc ssr, rn +0000nnnn01000010 stc spc, rn +0000nnnn10000010 stc r0_bank, rn +0000nnnn10010010 stc r1_bank, rn +0000nnnn10100010 stc r2_bank, rn +0000nnnn10110010 stc r3_bank, rn +0000nnnn11000010 stc r4_bank, rn +0000nnnn11010010 stc r5_bank, rn +0000nnnn11100010 stc r6_bank, rn +0000nnnn11110010 stc r7_bank, rn +0000nnnn00001010 sts mach, rn +0000nnnn00011010 sts macl, rn +0000nnnn00101010 sts pr, rn + +0100nnnn00101011 jmp @rn +0100nnnn00001011 jsr @rn +0000nnnn10000011 pref @rn +0100nnnn00011011 tas.b @rn +0010nnnnmmmm0000 mov.b rm, @rn +0010nnnnmmmm0001 mov.w rm, @rn +0010nnnnmmmm0010 mov.l rm, @rn +0110nnnnmmmm0000 mov.b @rm, rn +0110nnnnmmmm0001 mov.w @rm, rn +0110nnnnmmmm0010 mov.l @rm, rn +0000nnnnmmmm1111 mac.l @rm+, @rn+ +0100nnnnmmmm1111 mac.w @rm+, @rn+ + +0110nnnnmmmm0100 mov.b @rm+, rn +0110nnnnmmmm0101 mov.w @rm+, rn +0110nnnnmmmm0110 mov.l @rm+, rn + +0100mmmm00000111 ldc.l @rm+, sr +0100mmmm00010111 ldc.l @rm+, gbr +0100mmmm00100111 ldc.l @rm+, vbr +0100mmmm00110111 ldc.l @rm+, ssr +0100mmmm01000111 ldc.l @rm+, spc +0100mmmm10000111 ldc.l @rm+, r0_bank +0100mmmm10010111 ldc.l @rm+, r1_bank +0100mmmm10100111 ldc.l @rm+, r2_bank +0100mmmm10110111 ldc.l @rm+, r3_bank +0100mmmm11000111 ldc.l @rm+, r4_bank +0100mmmm11010111 ldc.l @rm+, r5_bank +0100mmmm11100111 ldc.l @rm+, r6_bank +0100mmmm11110111 ldc.l @rm+, r7_bank +0100mmmm00000110 lds.l @rm+, mach +0100mmmm00010110 lds.l @rm+, macl +0100mmmm00100110 lds.l @rm+, pr + +0010nnnnmmmm0100 mov.b rm, @-rn +0010nnnnmmmm0101 mov.w rm, @-rn +0010nnnnmmmm0110 mov.l rm, @-rn + +0100nnnn00000011 stc.l sr, @-rn +0100nnnn00010011 stc.l gbr, @-rn +0100nnnn00100011 stc.l vbr, @-rn +0100nnnn00110011 stc.l ssr, @-rn +0100nnnn01000011 stc.l spc, @-rn +0100nnnn10000011 stc.l r0_bank, @-rn +0100nnnn10010011 stc.l r1_bank, @-rn +0100nnnn10100011 stc.l r2_bank, @-rn +0100nnnn10110011 stc.l r3_bank, @-rn +0100nnnn11000011 stc.l r4_bank, @-rn +0100nnnn11010011 stc.l r5_bank, @-rn +0100nnnn11100011 stc.l r6_bank, @-rn +0100nnnn11110011 stc.l r7_bank, @-rn +0100nnnn00000010 sts.l mach, @-rn +0100nnnn00010010 sts.l macl, @-rn +0100nnnn00100010 sts.l pr, @-rn + +10000000nnnndddd mov.b r0, @(disp,rn) +10000001nnnndddd mov.w r0, @(disp,rn) +0001nnnnmmmmdddd mov.l rm, @(disp,rn) +10000100mmmmdddd mov.b @(disp,rm), r0 +10000101mmmmdddd mov.w @(disp,rm), r0 +0101nnnnmmmmdddd mov.l @(disp,rm), rn +0000nnnnmmmm0100 mov.b rm, @(r0,rn) +0000nnnnmmmm0101 mov.w rm, @(r0,rn) +0000nnnnmmmm0110 mov.l rm, @(r0,rn) +0000nnnnmmmm1100 mov.b @(r0,rm), rn +0000nnnnmmmm1101 mov.w @(r0,rm), rn +0000nnnnmmmm1110 mov.l @(r0,rm), rn +11000000dddddddd mov.b r0, @(disp,gbr) +11000001dddddddd mov.w r0, @(disp,gbr) +11000010dddddddd mov.l r0, @(disp,gbr) +11000100dddddddd mov.b @(disp,gbr), r0 +11000101dddddddd mov.w @(disp,gbr), r0 +11000110dddddddd mov.l @(disp,gbr), r0 + +11001101iiiiiiii and.b #imm, @(r0,gbr) +11001111iiiiiiii or.b #imm, @(r0,gbr) +11001100iiiiiiii tst.b #imm, @(r0,gbr) +11001110iiiiiiii xor.b #imm, @(r0,gbr) + +1001nnnndddddddd mov.w @(disp,pc), rn +1101nnnndddddddd mov.l @(disp,pc), rn +11000111dddddddd mova.l @(disp,pc), r0 + +0000mmmm00100011 braf rm +0000mmmm00000011 bsrf rm +10001011dddddddd bf jump8 +10001111dddddddd bf/s jump8 +10001001dddddddd bt jump8 +10001101dddddddd bt/s jump8 +1010dddddddddddd bra jump12 +1011dddddddddddd bsr jump12 + +0111nnnniiiiiiii add #imm, rn +11001001iiiiiiii and #imm, r0 +10001000iiiiiiii cmp/eq #imm, r0 +1110nnnniiiiiiii mov #imm, rn +11001011iiiiiiii or #imm, r0 +11001000iiiiiiii tst #imm, r0 +11001010iiiiiiii xor #imm, r0 +11000011iiiiiiii trapa #imm diff --git a/data/sh4.txt b/data/sh4.txt new file mode 100644 index 0000000..09f076a --- /dev/null +++ b/data/sh4.txt @@ -0,0 +1,26 @@ +type: assembly +name: sh-4a-extensions +--- + +0000nnnn01110011 movco.l r0, @rn +0000mmmm01100011 movli.l @rm, r0 +0100mmmm10101001 movua.l @rm, r0 +0100mmmm11101001 movua.l @rm+, r0 +0000nnnn11000011 movca.l r0, @rn + +0000nnnn11100011 icbi @rn +0000nnnn10010011 ocbi @rn +0000nnnn10100011 ocbp @rn +0000nnnn10110011 ocbwb @rn + +0000nnnn11010011 prefi @rn +0000000010101011 synco + +0100mmmm00111010 ldc rm, sgr +0100mmmm11111010 ldc rm, dbr +0100mmmm00110110 ldc.l @rm+, sgr +0100mmmm11110110 ldc.l @rm+, dbr +0000nnnn00111010 stc sgr, rn +0000nnnn11111010 stc dbr, rn +0100nnnn00110010 stc.l sgr, @-rn +0100nnnn11110010 stc.l dbr, @-rn diff --git a/fxos/main.cpp b/fxos/main.cpp new file mode 100644 index 0000000..735cfd5 --- /dev/null +++ b/fxos/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +using namespace FxOS; + +char const *info_str = +"Header information:\n" +" Bootcode timestamp (DateA) (0x8000ffb0) : %s\n" +" Serial number (0x8000ffd0) : %s\n" +" Bootcode checksum (0x8000fffc) : 0x%s\n" +" OS version (0x80010020) : %s\n"; + +char const *footer_str = +"\nFooter information:\n" +" Detected footer address : 0x8%07x\n" +" Langdata entries found : %d\n" +" OS date (DateO) (0x8%07x)" " : %s\n" +" OS checksum (0x8%07x)" " : 0x%s\n"; + +char const *syscall_str = +"\nSyscall information:\n" +" Syscall table address (0x8001007c) : 0x%08x\n" +" Entries that point to valid memory : 0x%x\n" +" First seemingly invalid entry : 0x%08x\n" +" Syscall entries outside ROM:\n"; + +char const *syscall_nonrom_str = +" %%%03x -> 0x%08x (%s memory)\n"; + +void info(std::string path) +{ + File file(path); + OS os(file); + + Target t; + t.bind_region(MemoryRegion::ROM, file); + t.bind_region(MemoryRegion::ROM_P2, file); + + uint32_t syscall_table = t.read_u32(0x8001007c); + uint32_t first_noncall = t.read_u32(syscall_table + + 4 * os.syscall_count()); + + printf(syscall_str, syscall_table, os.syscall_count(), first_noncall); + + int total = 0; + for(int i = 0; i < os.syscall_count(); i++) + { + uint32_t e = os.syscall(i); + MemoryRegion const *r = MemoryRegion::region_for(e); + if(!r || r->name == "ROM" || r->name == "ROM_P2") continue; + + printf(syscall_nonrom_str, i, e, r->name.c_str()); + total++; + } + + if(!total) printf(" (none)\n"); +} + +int main(void) +{ + try + { + FxOS::load("data/sh3.txt"); + FxOS::load("data/sh4.txt"); + } + catch(FxOS::SyntaxError &e) + { + std::cerr << e.file() << ":" << e.line() << ": " << + e.what() << "\n" << std::flush; + return 1; + } + + info("/home/lake/Documents/PC/DonnĂ©es/OS Graph 35+E II/3.10.bin"); + + return 0; +} diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h new file mode 100644 index 0000000..ca1770d --- /dev/null +++ b/include/fxos/disassembly.h @@ -0,0 +1,22 @@ +//--- +// fxos.disassembly: Disassembler +//--- + +#ifndef LIBFXOS_DISASSEMBLY_H +#define LIBFXOS_DISASSEMBLY_H + +#include +#include + +namespace FxOS { + +/* Register an instruction. + @inst Instruction with [opcode] set to the binary pattern + + Typically this is called by loader functions from data tables describing + instructions with parameters, not manually. See . */ +void register_instruction(Instruction ins); + +} /* namespace FxOS */ + +#endif /* LIBFXOS_DISASSEMBLY_H */ diff --git a/include/fxos/endianness.h b/include/fxos/endianness.h new file mode 100644 index 0000000..cf2cf82 --- /dev/null +++ b/include/fxos/endianness.h @@ -0,0 +1,33 @@ +//--- +// fxos.endianness. Somewhat cross-platform endianness conversion. (seriously?) +//--- + +#ifndef LIFXOS_ENDIANNESS_H +#define LIFXOS_ENDIANNESS_H + +#if defined(__APPLE__) + + #include + + #define htobe16(x) OSSwapHostToBigInt16(x) + #define htole16(x) OSSwapHostToLittleInt16(x) + #define be16toh(x) OSSwapBigToHostInt16(x) + #define le16toh(x) OSSwapLittleToHostInt16(x) + + #define htobe32(x) OSSwapHostToBigInt32(x) + #define htole32(x) OSSwapHostToLittleInt32(x) + #define be32toh(x) OSSwapBigToHostInt32(x) + #define le32toh(x) OSSwapLittleToHostInt32(x) + + #define htobe64(x) OSSwapHostToBigInt64(x) + #define htole64(x) OSSwapHostToLittleInt64(x) + #define be64toh(x) OSSwapBigToHostInt64(x) + #define le64toh(x) OSSwapLittleToHostInt64(x) + +#elif defined(__linux__) + + #include + +#endif + +#endif /* LIFXOS_ENDIANNESS_H */ diff --git a/include/fxos/errors.h b/include/fxos/errors.h new file mode 100644 index 0000000..a6ce84f --- /dev/null +++ b/include/fxos/errors.h @@ -0,0 +1,40 @@ +//--- +// fxos.errors: Exception specification +//--- + +#ifndef LIBFXOS_ERRORS_H +#define LIBFXOS_ERRORS_H + +#include +#include + +namespace FxOS { + +/* Syntax errors for fxos data files */ +class SyntaxError: public std::exception +{ +public: + /* Specifies the file and line of the exception */ + SyntaxError(char const *file, int line, char const *what): + m_file(file), m_line(line), m_what(what) {} + + /* Provides access to these free objets */ + char const *file() const noexcept { + return m_file; + } + int line() const noexcept { + return m_line; + } + char const *what() const noexcept override { + return m_what; + } + +private: + char const *m_file; + int m_line; + char const *m_what; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_ERRORS_H */ diff --git a/include/fxos/lang.h b/include/fxos/lang.h new file mode 100644 index 0000000..e6583d6 --- /dev/null +++ b/include/fxos/lang.h @@ -0,0 +1,136 @@ +//--- +// fxos.lang: Assembler language specification +//--- + +#ifndef LIBFXOS_LANG_H +#define LIBFXOS_LANG_H + +#include +#include +#include + +namespace FxOS { + +/* CPU register names, with a little meat for conversion to and from string */ +class CpuRegister +{ +public: + enum CpuRegisterName { + /* Caller-saved general-purpose registers */ + R0, R1, R2, R3, R4, R5, R6, R7, + /* Banked general-purpose registers. fxos does not account for + banking identities, these are just for naming and output. */ + R0B, R1B, R2B, R3B, R4B, R5B, R6B, R7B, + /* Callee-saved general-purpose registers */ + R8, R9, R10, R11, R12, R13, R14, R15, + /* System registers */ + MACH, MACL, PR, PC, + /* Control registers */ + SR, SSR, SPC, GBR, VBR, DBR, SGR, + }; + + CpuRegister() = default; + + /* Construction from CpuRegisterName */ + constexpr CpuRegister(CpuRegisterName name): m_name(name) {} + + /* Construction from string */ + CpuRegister(std::string register_name); + + /* Conversion to string */ + std::string str() const noexcept; + + /* Conversion to CpuRegisterName for switch statements */ + constexpr operator CpuRegisterName() noexcept { return m_name; } + + /* Comparison operators */ + constexpr bool operator==(CpuRegister r) const { + return m_name == r.m_name; + } + constexpr bool operator!=(CpuRegister r) const { + return m_name != r.m_name; + } + +private: + CpuRegisterName m_name; +}; + +/* Addressing modes for arguments */ +struct Argument +{ + /* Various addressing modes in the language */ + enum Kind { + Reg, /* rn */ + Deref, /* @rn */ + PostInc, /* @rn+ */ + PreDec, /* @-rn */ + StructDeref, /* @(disp,rn) or @(disp,gbr) */ + ArrayDeref, /* @(r0,rn) or @(r0,gbr) */ + PcRel, /* @(disp,pc) with 4-alignment correction */ + PcJump, /* disp */ + Imm, /* #imm */ + }; + + Argument() = default; + + /* String representation */ + std::string str() const; + + /* Addressing mode */ + Kind kind; + + /* Base register. Valid for all modes except Imm */ + CpuRegister base; + + /* Index register. Valid for ArrayDeref */ + CpuRegister index; + + /* Displacement in bytes. Valid for StructDeref, PcRel and PcJump */ + int disp; + + /* Operation size. Generally a multiplier for disp */ + int opsize; + + /* Immediate value. Valid for Imm */ + int imm; +}; + +/* Argument constructors */ + +Argument Argument_Reg(CpuRegister base); +Argument Argument_Deref(CpuRegister base); +Argument Argument_PostInc(CpuRegister base); +Argument Argument_PreDec(CpuRegister base); +Argument Argument_StructDeref(int disp, int opsize, CpuRegister base); +Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base); +Argument Argument_PcRel(int disp, int opsize); +Argument Argument_PcJump(int disp); +Argument Argument_Imm(int imm); + +/* Assembler instruction */ +struct Instruction +{ + Instruction() = default; + + /* Construct with one or several arguments */ + Instruction(std::string mnemonic); + Instruction(std::string mnemonic, Argument arg); + Instruction(std::string mnemonic, Argument arg1, Argument arg2); + + /* Original opcode. Initialized to 0 when unset, which is an invalid + instruction by design. */ + uint16_t opcode; + + /* Mnemonic **without the size indicator** */ + std::string mnemonic; + + /* Operation size (0, 1, 2 or 4) */ + int opsize; + + /* Arguments */ + std::vector args; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_LANG_H */ diff --git a/include/fxos/load.h b/include/fxos/load.h new file mode 100644 index 0000000..423af1a --- /dev/null +++ b/include/fxos/load.h @@ -0,0 +1,46 @@ +//--- +// fxos.load: Data file lexers and loaders +//--- + +#ifndef LIBFXOS_LOAD_H +#define LIBFXOS_LOAD_H + +#include + +#include +#include + +namespace FxOS { + +using Header = std::map; + +/* Load any fxos data file. + @file Data file, assumed to follow the fxos header and data format. + + This function reads the header with load_header() then calls the appropriate + lexer and loader depending on the type specified in the header. */ +void load(std::string path); + +/* Load the header of a data file. + @file Data file, assumed with an fxos header type-specific contents + @offset Will be set to the byte offset where content starts + @line Will be set to the liner number where content starts + + This function is used when reading all data files for fxos. The header + indicates the file type, thus the syntax of the contents. Some metadata can + also be specified here. + + The parameters [offset] and [line] are set to reflect the location in the + file where the raw content starts. These parameters are used to initialize + the lexers in all other load functions. */ +Header load_header(File &file, size_t &offset, int &line); + +/* Load an assembly instruction table for the disassembler. + @file Data file, presumably analyzed with lex_header() + @start_offset Offset of assembly data in the file + @start_line Line where assembly data starts in the file (for errors) */ +void load_asm(File &file, size_t start_offset, size_t start_line); + +} /* namespace FxOS */ + +#endif /* LIBFXOS_LOAD_H */ diff --git a/include/fxos/memory.h b/include/fxos/memory.h new file mode 100644 index 0000000..33ba3d3 --- /dev/null +++ b/include/fxos/memory.h @@ -0,0 +1,98 @@ +//--- +// fxos.memory: Standard memory regions +//--- + +#ifndef LIBFXOS_MEMORY_H +#define LIBFXOS_MEMORY_H + +#include +#include + +namespace FxOS { + +/* Memory area enumeration with a few tools */ +class MemoryArea +{ +public: + enum MemoryAreaName { + /* Userspace seen from user and privileged mode */ + U0, P0, + /* Second half of memory, only for privileged mode */ + P1, P2, P3, P4, + }; + + MemoryArea() = default; + + /* Construction from MemoryAreaName */ + constexpr MemoryArea(MemoryAreaName name): m_name(name) {} + + /* Start, end (last byte in area) and size of area */ + uint32_t start() const noexcept; + uint32_t end() const noexcept; + uint32_t size() const noexcept; + + /* Conversion to MemoryAreaName for switch */ + constexpr operator MemoryAreaName() noexcept { return m_name; } + + /* Comparison operators */ + constexpr bool operator==(MemoryArea a) const { + return m_name == a.m_name; + } + constexpr bool operator!=(MemoryArea a) const { + return m_name != a.m_name; + } + +private: + MemoryAreaName m_name; +}; + +struct MemoryRegion +{ + /* Address space regions that correspond to standard (ie. contiguous + multi-addressable) memory */ + static MemoryRegion const &ROM; + static MemoryRegion const &RAM; + static MemoryRegion const &ROM_P2; + static MemoryRegion const &RAM_P2; + static MemoryRegion const &RS; + static MemoryRegion const &ILRAM; + static MemoryRegion const &XRAM; + static MemoryRegion const &YRAM; + + /* Determine if an address falls into one of the standard regions. + Throws std::out_of_range if none. */ + static MemoryRegion const *region_for(uint32_t address); + + /* Short constructor which calls guess_flags() */ + MemoryRegion(std::string name, uint32_t start, uint32_t end, + bool writable); + + /* Region name */ + std::string name {}; + + /* Start address and end address. Generally the end address has one + additionnaly byte. This is okay since no region is supposed to + extend to the very end of the memory. */ + uint32_t start, end; + + /* The region is writable under normal conditions */ + bool writable; + /* The cache is active in that region (if enabled) */ + bool cacheable; + /* The MMU is active in that region (if enabled) */ + bool mappable; + + /* Returns the size of the region */ + uint32_t size() const noexcept; + + /* Returns the area associated to the region (assuming it is fully + contained in one, which should always be the case) */ + MemoryArea area() const noexcept; + + /* Automatically guess the cacheable and mappable flags */ + void guess_flags() noexcept; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_MEMORY_H */ diff --git a/include/fxos/operands.h b/include/fxos/operands.h new file mode 100644 index 0000000..e2f576a --- /dev/null +++ b/include/fxos/operands.h @@ -0,0 +1,108 @@ +#ifndef LIBFXOS_OPERANDS_H +#define LIBFXOS_OPERANDS_H + +namespace FxOS { + +/* Data types: + Registers longwords + Memory aligned u8, u16, u32 + User-defined structs, ... */ + +/* Status register does not contain too much useful stuff: + (30)MD (29)RB -> Disassembler needs not understand them + (9)M (8)Q -> Idem, divisions are very rare + (7-4)IMASK (28)BL -> Interrupt system rarely used, even less explicitly + (1)S -> ? + (0)T -> Now THAT is important + Disassembler should name them but needs not understand their role, except T. + The T bit might just be stored outside. */ + +enum class DataKind { + /* Base types */ + Integral, + /* Bit fields over integers */ + BitField, + /* Structures (can only reside in memory) */ + Struct, + /* Arrays (can only reside in memory) */ + Array, +}; + +class DataType +{ +public: + virtual DataKind kind() const noexcept = 0; +}; + +class IntegralType: public DataType +{ +public: + IntegralType(int bitsize); + + DataKind kind() const noexcept override { + return DataKind::Integral; + } + +private: + int m_size; +}; + +class BitFieldType: public DataType +{ + using Field = std::pair; + +public: + BitFieldType(std::vector fields); + + DataKind kind() const noexcept override { + return DataKind::BitField; + } + +private: + int m_size; + std::vector m_fields; +}; + +enum class OperandKind { + /* CPU-held registers accessed with instructions */ + CpuRegister, + /* Standard randomly-addressable memory */ + Memory, + /* Memory-mapped module registers with specific access */ + MappedModule, +}; + +class Operand +{ +public: + /* Returns the operand kind (which is also the subclass identity) */ + virtual OperandKind type() const noexcept = 0; + /* Sring representation */ + virtual std::string str() const noexcept = 0; +}; + +class RegisterOperand: public Operand +{ +public: + RegisterOperand(std::string name): m_name(name) {} + + OperandKind type() const noexcept override { + return OperandKind::CpuRegister; + } + + std::string name() const noexcept { + return m_name; + } + + std::string str() const noexcept override { + return this->name(); + } + +private: + /* Register name for assembler listings */ + std::string m_name; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_OPERANDS_H */ diff --git a/include/fxos/os.h b/include/fxos/os.h new file mode 100644 index 0000000..8030cea --- /dev/null +++ b/include/fxos/os.h @@ -0,0 +1,69 @@ +//--- +// fxos.os: Operating system models and primitives +//--- + +#ifndef LIBFXOS_OS_H +#define LIBFXOS_OS_H + +#include +#include + +#include +#include + +namespace FxOS { + +class OS +{ +public: + /* Load an OS from a file. */ + OS(File &file); + + /* Get OS version */ + std::string version() const noexcept; + + /* Get number of syscalls */ + int syscall_count() const noexcept; + /* Get a syscall entry */ + uint32_t syscall(int id) const; + /* Find a syscall entry. Returns -1 if syscall is not found */ + int find_syscall(uint32_t entry) const noexcept; + + /* Get the footer address. Returns -1 if not found */ + uint32_t footer() const noexcept; + +private: + /* Determine the OS version. This should be the first analysis function + to be called, because it determines the type of model (ie. fx9860g + versus fxcg50) thus the location of the syscall table and many more + important parameters. */ + void parse_version(); + + /* Locate and parse the syscall table. */ + void parse_syscall_table(); + + /* Locate the footer */ + void parse_footer(); + + /* Working target which is a simulated memory with just the OS */ + Target m_target; + + //--- + // OS information + //--- + + /* Version */ + std::string m_version; + + /* Syscall table, in order of syscall IDs */ + std::vector m_syscall_table; + /* Bimap converse, syscalls sorted by address */ + std::map m_syscall_addresses; + + /* Footer address */ + uint32_t m_footer; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_OS_H */ diff --git a/include/fxos/target.h b/include/fxos/target.h new file mode 100644 index 0000000..6478502 --- /dev/null +++ b/include/fxos/target.h @@ -0,0 +1,121 @@ +//--- +// fxos.target: The target which is being studied +//--- + +#ifndef LIBFXOS_TARGET_H +#define LIBFXOS_TARGET_H + +#include +#include + +#include +#include + +namespace FxOS { + +class OS; + +/* A common interface for simulated memory */ +class AbstractMemory +{ +public: + /* Checks if an address or interval is simulated */ + virtual bool covers(uint32_t addr, int size=1) const noexcept = 0; + + /* Returns the data located at the provided virtual address. Throws + std::out_of_range if the interval is not entirely simulated */ + virtual void const *translate(uint32_t addr, int size=1) const = 0; + + /* Read data, with signed or unsigned extension. Virtual addresses + are used here, so they should be within the range of the region. + Throws std::out_of_range if this is not satisfied. + + These functions do *not* check alignment because exceptionally there + are instructions which can read unaligned (movua.l). Check it + yourself! */ + int32_t read_i8 (uint32_t addr) const; + uint32_t read_u8 (uint32_t addr) const; + int32_t read_i16(uint32_t addr) const; + uint32_t read_u16(uint32_t addr) const; + int32_t read_i32(uint32_t addr) const; + uint32_t read_u32(uint32_t addr) const; + + /* Search a binary pattern in the specified area. Returns the virtual + address of the first occurrence if any is found, [end] otherwise. */ + virtual uint32_t search(uint32_t start, uint32_t end, + void const *pattern, int size) const = 0; +}; + +/* A binding of a data buffer into a memory region of the target. */ +struct Binding: public AbstractMemory +{ + /* Constructor from file */ + Binding(MemoryRegion const ®ion, File &file); + + /* Targeted region, might overlap with other bindings */ + MemoryRegion region; + /* Actual data. This buffer must have at least [size] bytes */ + uint8_t *data; + /* Binding size, is the minimum of the region size and the data size */ + uint32_t size; + + /* Checks if an address is covered by the binding */ + bool covers(uint32_t addr, int size=1) const noexcept override; + + /* Returns this process' address (in [data]) corresponding to the + provided virtual address */ + void const *translate(uint32_t addr, int size=1) const override; + + /* Search a pattern */ + uint32_t search(uint32_t start, uint32_t end, void const *pattern, + int size) const override; +}; + +class Target: public AbstractMemory +{ +public: + /* Create an empty target with no sections */ + Target(); + + /* Bind an OS. This is used to either disassemble the OS itself, or + select the OS version for which code is being disassembled (typical + use is for add-ins). */ + void bind_os(OS &os); + + /* Bind a memory region from a file. The region can either be standard + (see ) or custom. + + If several loaded regions overlap on some addresses, *the last + loaded region will be used*. Thus, new regions can be loaded to + selectively override parts of the target. + + Generally it is preferable to bind an OS image to the target's ROM + area using bind_os(), rather than using bind_region(). This is + because bind_os() which will also enable OS-specific tasks such as + syscall resolution. + + If the file is smaller than the region being bound, the region is + shrunk to fit the file. */ + void bind_region(MemoryRegion const ®ion, File &file); + + /* Check if an address is bound */ + bool covers(uint32_t addr, int size=1) const noexcept override; + + /* Returns the data at the provided virtual address */ + void const *translate(uint32_t addr, int size=1) const override; + + /* Search a pattern */ + uint32_t search(uint32_t start, uint32_t end, void const *pattern, + int size) const override; + +private: + /* Bound OS image */ + OS *m_os; + + /* Bound regions (in order of binding) */ + std::vector m_bindings; +}; + +} /* namespace FxOS */ + +#endif /* LIBFXOS_TARGET_H */ diff --git a/include/fxos/util.h b/include/fxos/util.h new file mode 100644 index 0000000..937c18a --- /dev/null +++ b/include/fxos/util.h @@ -0,0 +1,74 @@ +//--- +// fxos.util: Utility functions +//--- + +#ifndef LIBFXOS_UTIL_H +#define LIBFXOS_UTIL_H + +#include +#include +#include +#include +#include + +/* Format a string with printf() syntax */ +template +std::string format(std::string const &format, Args ... args) +{ + /* Reserve space for snprintf() to put its NUL */ + size_t size = snprintf(nullptr, 0, format.c_str(), args ...) + 1; + + std::unique_ptr buf(new char[size]); + snprintf(buf.get(), size, format.c_str(), args ...); + + /* Remove the NUL from the string */ + return std::string(buf.get(), buf.get() + size - 1); +} + +/* An RAII contiguous memory buffer */ +class Buffer +{ +public: + /* Empty buffer initialized with given byte */ + Buffer(int size, int fill=0x00); + + /* Buffer initialized from file, reading the given size and offset. + * Default offset is beginning of file. + * Default size (-1) is file size. If the specified region ends after + the end of the file, the buffer is padded. */ + Buffer(std::string filepath, int size=-1, int offset=0, int fill=0x00); + + /* Create a buffer by copying (and possibly resizing) another buffer */ + Buffer(Buffer const &other, int new_size=-1); + + /* Size */ + int size() const noexcept; +}; + +/* A file abstraction that supports both direct load and memory mapping */ +class File +{ +public: + /* Load a file, either by buffer or by memory mapping */ + File(std::string path, bool mmap=false); + + /* Get the path, size and loading address of the file */ + std::string path() const noexcept; + size_t size() const noexcept; + char *data() const noexcept; + + /* Free the allocated buffers */ + ~File(); + +private: + /* Path to file */ + std::string m_path; + /* Size of buffer, or mapping */ + size_t m_size; + /* Whether mmap() was used on the file */ + bool m_mmap; + /* Data buffer (m_mmap=false) or mapping address (m_mmap=true) */ + char *m_addr; +}; + +#endif /* LIBFXOS_UTIL_H */ diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp new file mode 100644 index 0000000..c411ac2 --- /dev/null +++ b/lib/disassembly.cpp @@ -0,0 +1,24 @@ +#include +#include +#include + +namespace FxOS { + +/* Instruction map */ +static std::array,65536> insmap; + +/* Register an instruction at a given opcode. */ + +void register_instruction(Instruction ins) +{ + uint16_t opcode = ins.opcode; + + if(insmap[opcode]) + { + throw std::logic_error("opcode collision"); + } + + insmap[opcode] = ins; +} + +} /* namespace FxOS */ diff --git a/lib/lang.cpp b/lib/lang.cpp new file mode 100644 index 0000000..7468b2e --- /dev/null +++ b/lib/lang.cpp @@ -0,0 +1,225 @@ +#include +#include +#include +#include +#include + +namespace FxOS { + +//--- +// CPU registers +//--- + +using Reg = CpuRegister::CpuRegisterName; + +static std::map regnames = { + { Reg::R0, "r0" }, + { Reg::R1, "r1" }, + { Reg::R2, "r2" }, + { Reg::R3, "r3" }, + { Reg::R4, "r4" }, + { Reg::R5, "r5" }, + { Reg::R6, "r6" }, + { Reg::R7, "r7" }, + { Reg::R0B, "r0_bank" }, + { Reg::R1B, "r1_bank" }, + { Reg::R2B, "r2_bank" }, + { Reg::R3B, "r3_bank" }, + { Reg::R4B, "r4_bank" }, + { Reg::R5B, "r5_bank" }, + { Reg::R6B, "r6_bank" }, + { Reg::R7B, "r7_bank" }, + { Reg::R8, "r8" }, + { Reg::R9, "r9" }, + { Reg::R10, "r10" }, + { Reg::R11, "r11" }, + { Reg::R12, "r12" }, + { Reg::R13, "r13" }, + { Reg::R14, "r14" }, + { Reg::R15, "r15" }, + { Reg::MACH, "mach" }, + { Reg::MACL, "macl" }, + { Reg::PR, "pr" }, + { Reg::PC, "pc" }, + { Reg::SR, "sr" }, + { Reg::SSR, "ssr" }, + { Reg::SPC, "spc" }, + { Reg::GBR, "gbr" }, + { Reg::VBR, "vbr" }, + { Reg::DBR, "dbr" }, + { Reg::SGR, "sgr" }, +}; + +/* Construction from string - pretty slow */ +CpuRegister::CpuRegister(std::string name) +{ + for(auto it = regnames.begin(); it != regnames.end(); it++) + { + if(it->second == name) + { + m_name = it->first; + return; + } + } + + throw std::invalid_argument("invalid CpuRegister name"); +} + +/* Conversion to string */ +std::string CpuRegister::str() const noexcept +{ + return regnames.at(m_name); +} + +//--- +// Instruction arguments +//--- + +/* External constructors */ + +Argument Argument_Reg(CpuRegister base) +{ + Argument arg; + arg.kind = Argument::Reg; + arg.base = base; + return arg; +} + +Argument Argument_Deref(CpuRegister base) +{ + Argument arg; + arg.kind = Argument::Deref; + arg.base = base; + return arg; +} + +Argument Argument_PostInc(CpuRegister base) +{ + Argument arg; + arg.kind = Argument::PostInc; + arg.base = base; + return arg; +} + +Argument Argument_PreDec(CpuRegister base) +{ + Argument arg; + arg.kind = Argument::PreDec; + arg.base = base; + return arg; +} + +Argument Argument_StructDeref(int disp, int opsize, CpuRegister base) +{ + Argument arg; + arg.kind = Argument::StructDeref; + arg.base = base; + arg.disp = disp; + arg.opsize = opsize; + return arg; +} + +Argument Argument_ArrayDeref(CpuRegister index, CpuRegister base) +{ + Argument arg; + arg.kind = Argument::ArrayDeref; + arg.base = base; + arg.index = index; + return arg; +} + +Argument Argument_PcRel(int disp, int opsize) +{ + Argument arg; + arg.kind = Argument::PcRel; + arg.disp = disp; + arg.opsize = opsize; + return arg; +} + +Argument Argument_PcJump(int disp) +{ + Argument arg; + arg.kind = Argument::PcJump; + arg.disp = disp; + return arg; +} + +Argument Argument_Imm(int imm) +{ + Argument arg; + arg.kind = Argument::Imm; + arg.imm = imm; + return arg; +} + +/* String representation */ +std::string Argument::str() const +{ + switch(kind) + { + case Argument::Reg: + return format("r%d", base); + case Argument::Deref: + return format("@r%d", base); + case Argument::PostInc: + return format("@r%d+", base); + case Argument::PreDec: + return format("@-%dr", base); + case Argument::StructDeref: + return format("@(%d,%s)", disp, base.str().c_str()); + case Argument::ArrayDeref: + return format("@(%s,%s)", index.str().c_str(), + base.str().c_str()); + case Argument::PcRel: + return format("@(%d,pc)", disp); + case Argument::PcJump: + return format("pc+%d", disp); + default: + return "(invalid)"; + } +} + +//--- +// Instruction creation +//--- + +Instruction::Instruction(std::string mn): + opcode(0), opsize(0) +{ + int pos = std::max(0, (int)mn.size() - 2); + + if(mn.substr(pos, 2) == ".b") + { + opsize = 1; + mn.erase(pos, 2); + } + else if(mn.substr(pos, 2) == ".w") + { + opsize = 2; + mn.erase(pos, 2); + } + else if(mn.substr(pos, 2) == ".l") + { + opsize = 4; + mn.erase(pos, 2); + } + + mnemonic = mn; +} + +Instruction::Instruction(std::string mn, Argument arg): + Instruction(mn) +{ + args.push_back(arg); +} + +Instruction::Instruction(std::string mn, Argument arg1, Argument arg2): + Instruction(mn) +{ + args.push_back(arg1); + args.push_back(arg2); +} + + +} /* namespace FxOS */ diff --git a/lib/load-asm.l b/lib/load-asm.l new file mode 100644 index 0000000..5fa4c2f --- /dev/null +++ b/lib/load-asm.l @@ -0,0 +1,332 @@ +%{ +#include +#include +#include +#include +#include + +#include + +/* Text value for parser */ +static char *yylval; + +/* Argument tokens */ +enum Token { + /* Instruction pattern and mnemonic */ + PATTERN = 1, MNEMONIC, + /* General-purpose registers */ + R0, RN, RM, + /* Banked registers */ + R0_BANK, R1_BANK, R2_BANK, R3_BANK, R4_BANK, R5_BANK, R6_BANK, R7_BANK, + /* Control registers */ + SR, PR, GBR, VBR, DBR, SSR, SPC, SGR, MACH, MACL, + /* PC-relative jumps and displacements (with 4-alignment correction) */ + JUMP8, JUMP12, AT_DPC, + /* Immediate operands */ + IMM, + /* Memory access with post-increment and pre-decrement */ + AT_RN, AT_RM, AT_RMP, AT_RNP, AT_MRN, + /* Structure dereferencing */ + AT_DRN, AT_DRM, AT_DGBR, + /* Array dereferencing */ + AT_R0RN, AT_R0RM, AT_R0GBR, +}; + +/* Instruction opcode pattern */ +struct Pattern { + /* 16-bit opcode, bits corresponding to arguments are clear */ + uint16_t bits; + /* Position of the arguments */ + uint8_t n_sh, m_sh, d_sh, i_sh; + /* Length of arguments, in bits */ + uint16_t n_size, m_size, d_size, i_size; +}; + +/* Current file name */ +static std::string filename; + +/* Error messages and exceptions */ +static void err(char const *format, ...) +{ + static char buf[256]; + + va_list args; + va_start(args, format); + vsnprintf(buf, 256, format, args); + va_end(args); + + throw FxOS::SyntaxError(filename.c_str(), yylineno, buf); +} + +%} + +%option prefix="asm" +%option noyywrap +%option nounput + +pattern ^[01nmdi]{16} +mnemonic [a-zA-Z0-9./]+ +space [ \t]+ + +%% + +^#[^\n]* ; +{space} ; +, ; +[\n] yylineno++; + +{pattern} { yylval = strdup(yytext); return PATTERN; } +^.{0,16} { err("invalid opcode at start of line"); } + +"#imm" { return IMM; } +"rn" { return RN; } +"rm" { return RM; } +"jump8" { return JUMP8; } +"jump12" { return JUMP12; } +"@(disp,"[ ]*"pc)" { return AT_DPC; } +"@rn" { return AT_RN; } +"@rm" { return AT_RM; } +"@rm+" { return AT_RMP; } +"@rn+" { return AT_RNP; } +"@-rn" { return AT_MRN; } +"@(disp,"[ ]*"rn)" { return AT_DRN; } +"@(disp,"[ ]*"rm)" { return AT_DRM; } +"@(r0,"[ ]*"rn)" { return AT_R0RN; } +"@(r0,"[ ]*"rm)" { return AT_R0RM; } +"@(disp,"[ ]*"gbr)" { return AT_DGBR; } +"@(r0,"[ ]*"gbr)" { return AT_R0GBR; } + +"r0" { return R0; } +"sr" { return SR; } +"pr" { return PR; } +"gbr" { return GBR; } +"vbr" { return VBR; } +"ssr" { return SSR; } +"spc" { return SPC; } +"sgr" { return SGR; } +"dbr" { return DBR; } +"r0_bank" { return R0_BANK; } +"r1_bank" { return R1_BANK; } +"r2_bank" { return R2_BANK; } +"r3_bank" { return R3_BANK; } +"r4_bank" { return R4_BANK; } +"r5_bank" { return R5_BANK; } +"r6_bank" { return R6_BANK; } +"r7_bank" { return R7_BANK; } +"mach" { return MACH; } +"macl" { return MACL; } + +{mnemonic} { yylval = strdup(yytext); return MNEMONIC; } + +. { err("lex error near '%s'", yytext); } +<> { return -1; } + +%% + +namespace FxOS { + +/* Build a pattern for an opcode. + @code 16-byte string using characters from "01mndi" + Returns a logical pattern representing the opcode and its arguments. + + Each argument has two parameters: shift ("sh") and size ("size"). From an + instance of the instruction, the value of parameter x can be recovered as: + + x = (opcode >> x_sh) & ((1 << x_size) - 1); + + (Originally, the second parameter was named x_mask and was equal to the rhs + of the [&] operator. But this decoding method is now unused.) */ +static Pattern make_pattern(char const *code) +{ + Pattern p {}; + + for(int i = 0; i < 16; i++) + { + int c = code[i]; + + /* Constant bits */ + if(c == '0' || c == '1') + { + p.bits = (p.bits << 1) | (c - '0'); + continue; + } + + /* Argument bits */ + p.bits <<= 1; + + if(c == 'n') p.n_sh = 15 - i, p.n_size++; + if(c == 'm') p.m_sh = 15 - i, p.m_size++; + if(c == 'd') p.d_sh = 15 - i, p.d_size++; + if(c == 'i') p.i_sh = 15 - i, p.i_size++; + } + + return p; +} + +/* Instantiate an argument token as an fxos language structure. + @token Argument token (tokens for other objets will be rejected) + @opsize Operation size indicated in the mnemonic + @m @n @d @i Instruction instance + Returns a semantic FxOS::Argument. */ +static Argument make_arg(int token, int opsize, int m, int n, int d, int i) +{ + /* TODO: This function is too slow for the ~100k times it is called. */ + + /* Registers rn and rm */ + CpuRegister Rn(format("r%d", n & 0xf)); + CpuRegister Rm(format("r%d", m & 0xf)); + /* Sign extensions of d to 8 and 12 bits */ + int32_t d8 = (int8_t)d; + int32_t d12 = (d & 0x800) ? (int32_t)(d | 0xfffff000) : (d); + /* Sign extension of i to 8 bits */ + int32_t i8 = (int8_t)i; + + using Reg = CpuRegister; + + switch(token) + { + case R0: return Argument_Reg(Reg::R0); + case RN: return Argument_Reg(Rn); + case RM: return Argument_Reg(Rm); + case R0_BANK: return Argument_Reg(Reg::R0B); + case R1_BANK: return Argument_Reg(Reg::R1B); + case R2_BANK: return Argument_Reg(Reg::R2B); + case R3_BANK: return Argument_Reg(Reg::R3B); + case R4_BANK: return Argument_Reg(Reg::R4B); + case R5_BANK: return Argument_Reg(Reg::R5B); + case R6_BANK: return Argument_Reg(Reg::R6B); + case R7_BANK: return Argument_Reg(Reg::R7B); + case SR: return Argument_Reg(Reg::SR); + case PR: return Argument_Reg(Reg::PR); + case GBR: return Argument_Reg(Reg::GBR); + case VBR: return Argument_Reg(Reg::VBR); + case DBR: return Argument_Reg(Reg::DBR); + case SSR: return Argument_Reg(Reg::SSR); + case SPC: return Argument_Reg(Reg::SPC); + case SGR: return Argument_Reg(Reg::SGR); + case MACH: return Argument_Reg(Reg::MACH); + case MACL: return Argument_Reg(Reg::MACL); + case JUMP8: return Argument_PcJump(d8 * 2); + case JUMP12: return Argument_PcJump(d12 * 2); + case IMM: return Argument_Imm(i8); + case AT_RN: return Argument_Deref(Rn); + case AT_RM: return Argument_Deref(Rm); + case AT_RMP: return Argument_PostInc(Rm); + case AT_RNP: return Argument_PostInc(Rn); + case AT_MRN: return Argument_PreDec(Rn); + case AT_DRN: return Argument_StructDeref(d, opsize, Rn); + case AT_DRM: return Argument_StructDeref(d, opsize, Rm); + case AT_DGBR: return Argument_StructDeref(d, opsize, Reg::GBR); + case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn); + case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm); + case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR); + + case AT_DPC: + if(!opsize) err("@(disp,pc) must have a size (.w, .l)"); + return Argument_PcRel(d, opsize); + } + + throw std::logic_error("lex asm builds args from bad tokens"); +} + +/* Record all the instances of an instruction in the disassembly table. + @p Instruction binary pattern + @mnemonic Mnemonic (especially important for operation size suffixes) + @argtoken1 Token corresponding to the first argument (0 if no argument) + @argtoken2 Token corresponding to the second argument (0 if unused) + + Generates all the instances of the instruction, then sends them to the + disassembler for fast lookup. */ +static void instantiate(struct Pattern p, std::string mnemonic, int argtoken1, + int argtoken2) +{ + for(int n = 0; n < (1 << p.n_size); n++) + for(int m = 0; m < (1 << p.m_size); m++) + for(int d = 0; d < (1 << p.d_size); d++) + for(int i = 0; i < (1 << p.i_size); i++) + { + uint16_t opcode = p.bits; + opcode |= (n << p.n_sh); + opcode |= (m << p.m_sh); + opcode |= (d << p.d_sh); + opcode |= (i << p.i_sh); + + Instruction ins(mnemonic); + ins.opcode = opcode; + + if(argtoken1) ins.args.push_back( + make_arg(argtoken1, ins.opsize, m, n, d, i)); + if(argtoken2) ins.args.push_back( + make_arg(argtoken2, ins.opsize, m, n, d, i)); + + register_instruction(ins); + } +} + +/* Load an assembly instruction table for the disassembler. */ +void load_asm(File &file, size_t start_offset, size_t start_line) +{ + /* Lex all instructions and fill in the general assembly table */ + + YY_BUFFER_STATE buf = yy_scan_bytes(file.data() + start_offset, + file.size() - start_offset); + yylineno = start_line; + filename = file.path(); + + /* Insruction information */ + char *code=nullptr, *mnemonic=nullptr; + int argtoken1=0, argtoken2=0; + + /* Current line */ + int line = -1; + + while(1) + { + int t = yylex(); + + if(line >= 0 && (yylineno != line || t == PATTERN || t == -1)) + { + /* Finalize current instruction */ + if(!mnemonic) err("%d: missing mnemonic", line); + + /* TODO: Generate all parameters and fill */ + Pattern p = make_pattern(code); + + instantiate(p, mnemonic, argtoken1, argtoken2); + + if(code) free(code); + if(mnemonic) free(mnemonic); + } + if(t == -1) break; + + if(t == PATTERN) + { + code = yylval; + line = yylineno; + + mnemonic = nullptr; + argtoken1 = 0; + argtoken2 = 0; + } + else if(t == MNEMONIC && !mnemonic) + { + mnemonic = yylval; + } + else if(!mnemonic) + { + err("%d: missing mnemonic", line); + } + else if(!argtoken1) + { + argtoken1 = t; + } + else if(!argtoken2) + { + argtoken2 = t; + } + } + + yy_delete_buffer(buf); +} + +} /* namespace FxOS */ diff --git a/lib/load-header.l b/lib/load-header.l new file mode 100644 index 0000000..82d4195 --- /dev/null +++ b/lib/load-header.l @@ -0,0 +1,132 @@ +%{ +#include +#include +#include + +#include +#include +#include +#include + +/* Text value for parser */ +static char *yylval; + +/* Tokens */ +#define LITERAL 1 +#define COLON 2 +#define HEADER_END 3 + +/* Current file name and number of characters lexed */ +static std::string filename; +static int lexed; + +/* Automatically count lexed characters */ +#define YY_USER_ACTION lexed += yyleng; + +/* Error messages and exceptions */ +static void err(char const *format, ...) +{ + static char buf[256]; + + va_list args; + va_start(args, format); + vsnprintf(buf, 256, format, args); + va_end(args); + + throw FxOS::SyntaxError(filename.c_str(), yylineno, buf); +} + +%} + +%option prefix="header" +%option noyywrap +%option nounput + +literal [a-zA-Z0-9_-]+ +space [ \t]+ + +%% + +^#[^\n]* ; +{space} ; +[\n] yylineno++; + +{literal} { yylval = strdup(yytext); return LITERAL; } +^[ ]*-{3,}[ ]*$ { return HEADER_END; } +^. { err("invalid header line"); } + +":" { return COLON; } + +. { err("lex error near '%s'", yytext); } +<> { err("EOF reached before header ends"); } + +%% + +namespace FxOS { + +/* Load the header of a data file. */ +Header load_header(File &file, size_t &offset_ref, int &line_ref) +{ + /* Build a map of properties */ + FxOS::Header header; + + YY_BUFFER_STATE buf = yy_scan_bytes(file.data(), file.size()); + filename = file.path(); + yylineno = 1; + lexed = 0; + + /* Current line */ + int line = -1; + + /* Property name and value */ + char const *name = nullptr; + char const *value = nullptr; + + while(1) + { + int t = yylex(); + + if(line >= 0 && (yylineno != line || t == HEADER_END)) + { + /* Finalize current line */ + if(!name || !value) { + yylineno = line; + err("incomplete header line"); + } + + /* Fill in the map */ + header[name] = value; + + name = nullptr; + value = nullptr; + } + if(t == HEADER_END) break; + + line = yylineno; + + if(t == COLON) + { + if(!name || value) err("misplaced colon in header"); + } + else if(!name) + { + name = yylval; + } + else if(!value) + { + value = yylval; + } + else + { + err("unexpected stuff after header line"); + } + } + + offset_ref = lexed; + line_ref = yylineno; + + yy_delete_buffer(buf); + return header; +} + +} /* namespace FxOS */ diff --git a/lib/load.cpp b/lib/load.cpp new file mode 100644 index 0000000..e81ad16 --- /dev/null +++ b/lib/load.cpp @@ -0,0 +1,34 @@ +#include +#include + +namespace FxOS { + +/* Load any fxos data file. */ +void load(std::string path) +{ + File file(path); + size_t offset; + int line; + +// std::cerr << "[fxos] loading resource file '" << path << "'...\n"; + + Header h = load_header(file, offset, line); + if(h.find("type") == h.end()) + { + throw std::runtime_error(format("no type in header of '%s'", + path.c_str())); + } + + std::string type = h["type"]; + + if(type == "assembly") + { + load_asm(file, offset, line); + return; + } + + throw std::runtime_error(format("unknown file type '%s' in '%s'", + type.c_str(), path.c_str())); +} + +} /* namespace FxOS */ diff --git a/lib/memory.cpp b/lib/memory.cpp new file mode 100644 index 0000000..3082429 --- /dev/null +++ b/lib/memory.cpp @@ -0,0 +1,126 @@ +#include +#include + +namespace FxOS { + +//--- +// Overview of the memory areas +//--- + +uint32_t MemoryArea::start() const noexcept +{ + switch(m_name) + { + case U0: return 0x00000000; + case P0: return 0x00000000; + case P1: return 0x80000000; + case P2: return 0xa0000000; + case P3: return 0xc0000000; + case P4: return 0xe0000000; + } + + return 0; +} + +uint32_t MemoryArea::end() const noexcept +{ + switch(m_name) + { + case U0: return 0x7fffffff; + case P0: return 0x7fffffff; + case P1: return 0x9fffffff; + case P2: return 0xbfffffff; + case P3: return 0xdfffffff; + case P4: return 0xffffffff; + } + + return -1; +} + +uint32_t MemoryArea::size() const noexcept +{ + return this->end() - this->start() + 1; +} + +//--- +// Fine memory region management +//--- + +MemoryRegion::MemoryRegion(std::string name, uint32_t start, uint32_t end, + bool writable): + name(name), start(start), end(end), writable(writable) +{ + this->guess_flags(); +} + +uint32_t MemoryRegion::size() const noexcept +{ + return end - start; +} + +MemoryArea MemoryRegion::area() const noexcept +{ + using Area = MemoryArea; + Area areas[5] = { Area::P4, Area::P3, Area::P2, Area::P1, Area::P0 }; + + for(int i = 0; i < 5; i++) + { + if(start >= areas[i].start()) return areas[i]; + } + + return Area::P0; +} + +void MemoryRegion::guess_flags() noexcept +{ + switch(this->area()) + { + case MemoryArea::U0: + case MemoryArea::P0: + case MemoryArea::P3: + cacheable = true; + mappable = true; + break; + + case MemoryArea::P1: + cacheable = true; + mappable = false; + break; + + case MemoryArea::P2: + case MemoryArea::P4: + cacheable = false; + mappable = false; + break; + } +} + +using R = MemoryRegion; + +/* Basic memory regions */ +R const &R::ROM = MemoryRegion("ROM", 0x80000000, 0x807fffff, false); +R const &R::RAM = MemoryRegion("RAM", 0x88000000, 0x88040000, true); +R const &R::ROM_P2 = MemoryRegion("ROM_P2", 0xa0000000, 0xa07fffff, false); +R const &R::RAM_P2 = MemoryRegion("RAM_P2", 0xa8000000, 0xa8040000, true); +R const &R::RS = MemoryRegion("RS", 0xfd800000, 0xfd8007ff, true); +R const &R::ILRAM = MemoryRegion("ILRAM", 0xe5200000, 0xe5203fff, true); +R const &R::XRAM = MemoryRegion("XRAM", 0xe5007000, 0xe5008fff, true); +R const &R::YRAM = MemoryRegion("YRAM", 0xe5017000, 0xe5018fff, true); + +MemoryRegion const *MemoryRegion::region_for(uint32_t address) +{ + MemoryRegion const *regs[8] = { + &R::ROM, &R::RAM, &R::ROM_P2, &R::RAM_P2, + &R::RS, &R::ILRAM, &R::XRAM, &R::YRAM + }; + + for(int i = 0; i < 8; i++) + { + if(regs[i]->start <= address && address < regs[i]->end) + return regs[i]; + } + + return nullptr; +} + +} /* namespace FxOS */ diff --git a/lib/os.cpp b/lib/os.cpp new file mode 100644 index 0000000..5953565 --- /dev/null +++ b/lib/os.cpp @@ -0,0 +1,110 @@ +#include +#include +#include + +#include + +namespace FxOS { + +OS::OS(File &file): m_target() +{ + /* OS files are all at least 1 MB large */ + if(file.size() < 1000000) + throw std::runtime_error("OS files cannot be < 1MB"); + + /* Bind the given file to the internal analysis target */ + m_target.bind_region(MemoryRegion::ROM, file); + m_target.bind_region(MemoryRegion::ROM_P2, file); + + parse_version(); + parse_syscall_table(); +// parse_footer(); +} + +void OS::parse_version() +{ + /* Extract the version string at 0x10020 */ + Target &t = m_target; + + char *version = (char *)t.translate(0x80010020, 10); + m_version = std::string(version, 10); +} + +//--- +// Syscalle resolution +//--- + +int OS::syscall_count() const noexcept +{ + return m_syscall_table.size(); +} + +uint32_t OS::syscall(int id) const +{ + return m_syscall_table[id]; +} + +int OS::find_syscall(uint32_t entry) const noexcept +{ + try { + return m_syscall_addresses.at(entry); + } + catch(std::out_of_range &e) { + return -1; + } +} + +void OS::parse_syscall_table() +{ + Target &t = m_target; + + /* Traverse the syscall table */ + uint32_t syscall_table = t.read_u32(0x8001007c); + int id = 0; + + while(1) + { + uint32_t entry = t.read_u32(syscall_table + 4 * id); + + MemoryRegion const *r = MemoryRegion::region_for(entry); + if(!r) break; + + m_syscall_table.push_back(entry); + m_syscall_addresses[entry] = id; + + id++; + } +} + +//--- +// Footer search +//--- + +uint32_t OS::footer() const noexcept +{ + return m_footer; +} + +void OS::parse_footer() +{ + /* Find the footer address (last occurrence of "CASIOABSLangdata") */ + uint32_t start = MemoryRegion::ROM.start; + uint32_t end = MemoryRegion::ROM.end; + + m_target.search(start, end, "CASIOABSLangdata", 16); +#if 0 + char const *signature = "CASIOABSLangdata"; + void *occ = NULL, *next = memmem(os->data, os->len, signature, 16); + void *end = os->data + os->len; + + while(next) + { + occ = next; + next = memmem(next + 1, end - (next + 1), signature, 16); + } + + os->footer = (occ) ? (occ - os->data) : (uint32_t)-1; +#endif +} + +} /* namespace FxOS */ diff --git a/lib/target.cpp b/lib/target.cpp new file mode 100644 index 0000000..11c1fa4 --- /dev/null +++ b/lib/target.cpp @@ -0,0 +1,163 @@ +#include +#include + +namespace FxOS { + +//--- +// Simulated memory primitives +//--- + +int32_t AbstractMemory::read_i8(uint32_t addr) const +{ + int8_t *i8 = (int8_t *)translate(addr, 1); + return *i8; +} + +uint32_t AbstractMemory::read_u8(uint32_t addr) const +{ + uint8_t *u8 = (uint8_t *)translate(addr, 1); + return *u8; +} + +int32_t AbstractMemory::read_i16(uint32_t addr) const +{ + uint8_t *i16 = (uint8_t *)translate(addr, 2); + int16_t v = (i16[0] << 8) | i16[1]; + return v; +} + +uint32_t AbstractMemory::read_u16(uint32_t addr) const +{ + uint8_t *u16 = (uint8_t *)translate(addr, 2); + uint16_t v = (u16[0] << 8) | u16[1]; + return v; +} + +int32_t AbstractMemory::read_i32(uint32_t addr) const +{ + uint8_t *i32 = (uint8_t *)translate(addr, 4); + int32_t v = (i32[0] << 24) | (i32[1] << 16) | (i32[2] << 8) | i32[3]; + return v; +} + +uint32_t AbstractMemory::read_u32(uint32_t addr) const +{ + uint8_t *u32 = (uint8_t *)translate(addr, 4); + uint32_t v = (u32[0] << 24) | (u32[1] << 16) | (u32[2] << 8) | u32[3]; + return v; +} + +//--- +// Bindings of data buffers into memory regions +//--- + +Binding::Binding(MemoryRegion const &source_region, File &file): + region(source_region) +{ + data = reinterpret_cast(file.data()); + size = region.size(); + + if(file.size() < region.size()) + { + region.end = region.start + file.size(); + size = file.size(); + } +} + +bool Binding::covers(uint32_t addr, int size) const noexcept +{ + return addr >= region.start && addr + size <= region.end; +} + +void const *Binding::translate(uint32_t addr, int size) const +{ + if(!covers(addr, size)) + { + throw std::out_of_range("Out of binding range"); + } + + return (void *)(data + (addr - region.start)); +} + +uint32_t Binding::search(uint32_t start, uint32_t end, void const *pattern, + int size) const +{ + if(end < start || !covers(start, end - start)) + { + throw std::out_of_range("Out of binding range"); + } + if(start + size > end) return end; + + void const *data = translate(start); + void const *occurrence = memmem(data, end - start, pattern, size); + + if(!occurrence) return end; + return start + ((char *)occurrence - (char *)data); +} + +//--- +// Composite memory targets +//--- + +Target::Target(): + m_os(nullptr), m_bindings {} +{ +} + +void Target::bind_os(OS &os) +{ + m_os = &os; +} + +void Target::bind_region(MemoryRegion const ®ion, File &file) +{ + Binding b(region, file); + m_bindings.push_back(b); +} + +bool Target::covers(uint32_t addr, int size) const noexcept +{ + for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++) + { + if(it->covers(addr, size)) return true; + } + + return false; +} + +void const *Target::translate(uint32_t addr, int size) const +{ + for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++) + { + try + { + return it->translate(addr, size); + } + catch(std::out_of_range &e) {} + } + + throw std::out_of_range("Out of target bindings"); +} + +uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern, + int size) const +{ + uint32_t occurrence; + if(end < start || !covers(start, end - start)) + { + throw std::out_of_range("Out of target bindings"); + } + + for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++) + { + if(it->covers(start, end - start)) + { + occurrence = it->search(start, end, pattern, size); + if(occurrence != end) return occurrence; + } + } + + return end; +} + +} /* namespace FxOS */ diff --git a/lib/util.cpp b/lib/util.cpp new file mode 100644 index 0000000..b7d5ede --- /dev/null +++ b/lib/util.cpp @@ -0,0 +1,76 @@ +#include + +#include +#include + +#include +#include + +/* A file abstraction that supports both direct load and memory mapping */ +File::File(std::string file_path, bool use_mmap): + m_path(file_path), m_mmap(use_mmap) +{ + char const *path = file_path.c_str(); + + int fd = open(path, O_RDONLY); + if(!fd) throw std::runtime_error(format("cannot open '%s'", path)); + + struct stat statbuf; + ssize_t x = fstat(fd, &statbuf); + + if(x < 0) + { + close(fd); + throw std::runtime_error(format("cannot stat '%s'", path)); + } + + m_size = statbuf.st_size; + + if(use_mmap) + { + m_addr = (char *)mmap(nullptr, m_size, PROT_READ, MAP_SHARED, + fd, 0); + close(fd); + + if(m_addr == (char *)MAP_FAILED) + { + throw std::runtime_error(format( + "cannot map '%s'", path)); + } + } + else + { + m_addr = new char [m_size]; + + x = read(fd, m_addr, m_size); + close(fd); + + if(x != statbuf.st_size) + { + throw std::runtime_error(format( + "error while reading '%s'", path)); + } + } +} + +std::string File::path() const noexcept +{ + return m_path; +} + +size_t File::size() const noexcept +{ + return m_size; +} + +char *File::data() const noexcept +{ + return m_addr; +} + + +File::~File() +{ + if(m_mmap) munmap(m_addr, m_size); + else delete[] m_addr; +}