From 468495856d89c11339b68977c9e20ef1acebb27b Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Sat, 28 Dec 2019 17:18:13 +0100 Subject: [PATCH] implement more of the meat of the tool * Separate OS and Target conceptually; now an OS is created on an existing target which must have ROM bound. * Add a configuration file with a data library and description files which are automatically loaded at startup. * As a first application, implement target descriptions. It is now possible (given the proper library) to type [fxos info fx@3.10] to get information on the fx OS version 3.10. * Set up the pass infrastructure and the first few easy passes. This is still a Work In Progress and not yet called from the command-line. * Improve the copy/move behavior of classes (C++ concerns). * Add instruction metadata, which will make it easier to write actual useful analysis passes. --- Makefile | 18 +-- data/sh3.txt | 4 +- fxos/fxos-cli.h | 3 +- fxos/info.cpp | 12 +- fxos/main.cpp | 288 +++++++++++++++++++++++++++++++++---- include/fxos/disassembly.h | 95 ++++++++++-- include/fxos/lang.h | 15 +- include/fxos/load.h | 24 ++-- include/fxos/memory.h | 4 + include/fxos/os.h | 11 +- include/fxos/target.h | 31 ++-- include/fxos/util.h | 35 ++--- lib/disassembly.cpp | 66 ++++++++- lib/lang.cpp | 45 +++++- lib/load-asm.l | 6 +- lib/load-header.l | 13 +- lib/load-target.l | 156 ++++++++++++++++++++ lib/load-target.y | 52 +++++++ lib/load.cpp | 34 ----- lib/memory.cpp | 26 +++- lib/os.cpp | 19 ++- lib/passes/cfg.cpp | 37 +++++ lib/passes/pcrel.cpp | 19 +++ lib/passes/print.cpp | 20 +++ lib/target.cpp | 23 ++- lib/util.cpp | 108 ++++++++------ 26 files changed, 946 insertions(+), 218 deletions(-) create mode 100644 lib/load-target.l create mode 100644 lib/load-target.y delete mode 100644 lib/load.cpp create mode 100644 lib/passes/cfg.cpp create mode 100644 lib/passes/pcrel.cpp create mode 100644 lib/passes/print.cpp diff --git a/Makefile b/Makefile index 5662bef..7a89fbf 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ DEPFLAGS = -MT $@ -MMD -MP -MF $(@:%.o=%.d) # Default install prefix (beware of sudo...) PREFIX ?= $(HOME)/.local +CFLAGS += -D FXOS_INSTALL_PREFIX='"$(PREFIX)"' # # Main targets @@ -29,12 +30,12 @@ all-lib: bin/libfxos.a all-fxos: bin/fxos obj = $($1:%=build/%.o) -lex = $($1:%.l=build/%.yy.c.o) +lex = $($1:%.l=build/%.yy.cpp.o) ## The library src-lib := $(wildcard lib/*.cpp lib/*/*.cpp lib/*/*/*.cpp) -lex-lib := $(wildcard lib/*.l lib/*/*.l lib/*/*.l) +lex-lib := $(wildcard lib/*.l lib/*/*.l lib/*/*/*.l) obj-lib := $(call obj,src-lib) $(call lex,lex-lib) @@ -46,7 +47,7 @@ bin/libfxos.a: $(obj-lib) | bin/ src-fxos := $(wildcard fxos/*.cpp fxos/*/*.cpp fxos/*/*/*.cpp) obj-fxos := $(call obj,src-fxos) -bin/fxos: $(obj-fxos) bin/libfxos.a | bin/ +bin/fxos: bin/libfxos.a $(obj-fxos) | bin/ $(CXX) $(obj-fxos) -o $@ $(LDFLAGS) # @@ -64,9 +65,10 @@ build/%.c.o: %.c $(CC) -c $< -o $@ $(CFLAGS) $(DEPFLAGS) # Flex lexers for the database -build/%.yy.c: %.l +build/%.yy.cpp: %.l + @mkdir -p $(dir $@) flex -o $@ -s $< -build/%.yy.c.o: build/%.yy.c +build/%.yy.cpp.o: build/%.yy.cpp $(CXX) -c $< -o $@ $(CFLAGS) -Wno-unused-function $(DEPFLAGS) bin/: @@ -80,7 +82,7 @@ include $(wildcard build/*/*.d) .PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean -.PRECIOUS: build/%.d build/%.yy.c +.PRECIOUS: build/%.d build/%.yy.cpp # # Installing @@ -101,9 +103,7 @@ install: $(TARGETS) install -d $(PREFIX)/bin install -d $(PREFIX)/share/fxos install $(TARGETS) $(m755) $(PREFIX)/bin - @echo "TODO: Install data files" - @false -# install fxos/*.txt $(m644) $(PREFIX)/share/fxos + cp -ra data/* $(PREFIX)/share/fxos uninstall: rm -f $(TARGETS:%=$(PREFIX)/%) diff --git a/data/sh3.txt b/data/sh3.txt index 066cef1..228c578 100644 --- a/data/sh3.txt +++ b/data/sh3.txt @@ -209,9 +209,9 @@ name: sh-3 0000mmmm00100011 braf rm 0000mmmm00000011 bsrf rm 10001011dddddddd bf jump8 -10001111dddddddd bf/s jump8 +10001111dddddddd bf.s jump8 10001001dddddddd bt jump8 -10001101dddddddd bt/s jump8 +10001101dddddddd bt.s jump8 1010dddddddddddd bra jump12 1011dddddddddddd bsr jump12 diff --git a/fxos/fxos-cli.h b/fxos/fxos-cli.h index ed269fd..900da19 100644 --- a/fxos/fxos-cli.h +++ b/fxos/fxos-cli.h @@ -5,9 +5,10 @@ #ifndef FXOS_CLI_H #define FXOS_CLI_H +#include #include /* Print general information on an OS file */ -void os_info(std::string path); +void os_info(FxOS::Target &target); #endif /* FXOS_CLI_H */ diff --git a/fxos/info.cpp b/fxos/info.cpp index da5f87b..61c6a1b 100644 --- a/fxos/info.cpp +++ b/fxos/info.cpp @@ -2,6 +2,7 @@ #include #include #include +#include using namespace FxOS; @@ -29,14 +30,11 @@ static char const *syscall_str = static char const *syscall_nonrom_str = " %%%03x -> 0x%08x (%s memory)\n"; -void os_info(std::string path) +void os_info(Target &t) { - /* Create an 8M buffer and load the ROM there */ - Buffer romfile(path, MemoryRegion::ROM.size()); - OS os(romfile); - - /* There is some stuff we want to read directly */ - Target &t = os.target; + /* Create an OS analysis over t. If t does not have a ROM mapped, this + will fail with an exception */ + OS os(t); printf(info_str, &os.bootcode_timestamp, os.bootcode_timestamp.value.c_str(), diff --git a/fxos/main.cpp b/fxos/main.cpp index 46aa914..4d5978b 100644 --- a/fxos/main.cpp +++ b/fxos/main.cpp @@ -1,18 +1,29 @@ #include "fxos-cli.h" #include #include +#include +#include #include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; using namespace FxOS; static char const *help_string = R"( usage: fxos info - fxos disasm (-a
| -s ) [options...] + fxos disasm [options...] fxos disasm -b [options...] fxos analyze [-f] [-s] [-a] [-r] [options...] -fxos is a reverse-engineering tool to disassemble and analyze fx9860g-like -OS dumps, providing efficient annotations through an editable database. +fxos is a reverse-engineering tool that disassembles and analyzes SuperH +programs and OS dumps for fx9860g and fxcg50-like CASIO calculators, using an +editable database of platforms, syscalls, and OS knowledge. Commands: info Identify an OS image: version, platform, date, checksums... @@ -31,20 +42,25 @@ Database extensions: --load Read documentation from --load Read documentation recursively from +Disassembly file selection: + Disassemble this target from the database (eg. "fx@3.10") + -f Disassemble this file as standalone ROM + Disassembly options: - -a
Start disassembling at this address - -s Start disassembling at this syscall's address - -l Length of region - --passes= Execute the specified comma-separated list of passes +
Start disassembling at this address (hexa) +
: Disassemble exactly the specified region. is an + hexadecimal number optionnally followed by k, M, or G. + % Start disassembling at this syscall's address (hexa) + -p Execute the specified comma-separated list of passes Available passes: + cfg Build the control flow graph (always required) pcrel Resolve PC-relative references as their target address - cfg Build the control flow graph (uses pcrel) - cstprop Propagate constants by abstract interpretation (uses cfg) + cstprop Propagate constants by abstract interpretation syscall Annotate code with reverse syscalls regs Annotate code with peripheral register addresses -The default sequence of passes is pcrel,cfg,cstprop,syscall,regs. When +The default sequence of passes is cfg,pcrel,cstprop,syscall,regs. When disassembling a function (ie. no size specified on the command-line), the pcrel and cfg passes are always executed to explore the function. @@ -56,13 +72,121 @@ Analysis modes: Analysis options: --occurrences Show at most occurrences (integer or "all") - -All numbers support base prefixes "0" (octal) and "0x" (hexadecimal). )"+1; +//--- +// Configuration +//--- + +std::map targets; +std::vector library { FXOS_INSTALL_PREFIX "/share/fxos" }; + +/* Load any fxos data file */ +void load(std::string path) +{ + Buffer file(path); + size_t offset; + int line; + +// std::cerr << "[fxos] loading resource file '" << path << "'...\n"; + + Header h = load_header(file, offset, line); + if(h.find("type") == h.end()) + { + std::cerr << "error: no type in header of '" << path << "'\n"; + return; + } + + std::string type = h["type"]; + + if(type == "assembly") + { + try { + load_asm(file, offset, line); + } + catch(FxOS::SyntaxError &e) { + std::cerr << e.file() << ":" << e.line() << ": " << + e.what() << "\n" << std::flush; + } + return; + } + else if(type == "target") + { + if(!h.count("name")) + { + std::cerr << "error: no name specified in '" << path + << "'\n"; + return; + } + + try { + targets[h["name"]] = load_target(file, offset, line); + } + catch(FxOS::SyntaxError &e) { + std::cerr << e.file() << ":" << e.line() << ": " << + e.what() << "\n" << std::flush; + } + return; + } + + std::cerr << "unknown file type '" << type << "' in '" << path <<"'\n"; +} + +/* Load a whole folder into the database */ +void loadfolder(std::string path) +{ + try + { + fs::recursive_directory_iterator it(path); + for(auto &file: it) load(file.path()); + } + catch(fs::filesystem_error &e) + { + if(e.code().value() == ENOENT) + { + std::cerr << "warning: directory '" << path << "' does" + " not exist\n"; + } + else throw e; + } +} + +void loadconfig(void) +{ + std::string home = getenv("HOME"); + fs::path configpath = home + "/.config/fxos/config"; + + if(!fs::exists(configpath)) return; + + std::ifstream stream(configpath); + + /* Read line by line and register paths for the library or load files + from the database */ + while(stream) + { + char path[256]; + std::string line; + std::getline(stream, line); + + if(std::sscanf(line.c_str(), "library: %256s", path) == 1) + { + library.push_back(path); + } + else if(std::sscanf(line.c_str(), "load: %256s", path) == 1) + { + loadfolder(path); + } + } +} + +//--- +// Main routines +//--- + int main_info(int argc, char **argv) { - int error=0, option=0, mpu=0; + int error=0, option=0, mpu='4'; + std::string path; struct option const longs[] = { { "help", no_argument, NULL, 'h' }, @@ -71,35 +195,60 @@ int main_info(int argc, char **argv) }; while(option >= 0 && option != '?') - switch((option = getopt_long(argc, argv, "h34", longs, NULL))) + switch((option = getopt_long(argc, argv, "h34f:p:", longs, NULL))) { case 'h': std::cerr << help_string; break; case '3': case '4': - /* TODO: Use sh3/sh4 information in [fxos info)? */ + /* TODO: Use sh3/sh4 information in [fxos info]? */ mpu = option; break; + case 'f': + path = optarg; + break; case '?': error = 1; } if(error) return 1; - char const *path = argv[optind + 1]; - if(!path) + /* Load from path if one is specified */ + if(path.size()) { - std::cerr << help_string; - return 1; + try { + /* Load the file in ROM over 8M */ + Buffer romfile(path, MemoryRegion::ROM.size()); + Target t; + t.bind_region(MemoryRegion::ROM, romfile); + t.bind_region(MemoryRegion::ROM_P2, romfile); + os_info(t); + } + catch(std::exception &e) { + std::cerr << "error: " << e.what() << "\n"; + return 1; + } } + /* Load from target otherwise */ + else + { + if(!argv[optind + 1]) + { + std::cerr << help_string; + return 1; + } - try { - os_info(path); - } - catch(std::exception &e) { - std::cerr << "error: " << e.what() << "\n"; - return 1; + std::string targetname = argv[optind + 1]; + if(!targets.count(targetname)) + { + std::cerr << "error: no target '" << targetname + << "' in library\n"; + return 1; + } + + Target t(targets[targetname], library); + os_info(t); } return 0; @@ -107,22 +256,100 @@ int main_info(int argc, char **argv) int main_disassembly(int argc, char **argv) { - std::cerr << "doing main_disasm, which is incomplete x_x\n"; + int error=0, option=0, mpu='4'; + std::vector passes { + "cfg", "pcrel", "constprop", "syscall", "regs" + }; + std::string file; - try { + struct option const longs[] = { + { "help", no_argument, NULL, 'h' }, + { "sh3", no_argument, NULL, '3' }, + { "sh4", no_argument, NULL, '4' }, + }; + + while(option >= 0 && option != '?') + switch((option = getopt_long(argc, argv, "h34p:f:", longs, NULL))) + { + case 'h': + std::cerr << help_string; + break; + case '3': + case '4': + mpu = option; + break; + case 'p': + passes.clear(); + + { + std::istringstream in(optarg); + std::string pass; + + while(std::getline(in, pass, ',')) { + passes.push_back(pass); + } + } + + break; + case 'f': + file = optarg; + break; + case '?': + error = 1; + } + + int remaining_args = (file.size() ? 1 : 2); + + if(argc < optind + remaining_args + 1) + { + std::cerr << "error: missing file or address\n"; + error = 1; + } + else if(argc > optind + remaining_args + 1) + { + std::cerr << "error: excess arguments\n"; + error = 1; + } + if(error) return 1; + +/* try { FxOS::load("data/sh3.txt"); - FxOS::load("data/sh4.txt"); + if(mpu == '4') FxOS::load("data/sh4.txt"); } catch(FxOS::SyntaxError &e) { std::cerr << e.file() << ":" << e.line() << ": " << e.what() << "\n" << std::flush; return 1; + } */ + + if(!file.size()) + { + std::string targetname = argv[optind + 1]; + + if(!targets.count(targetname)) + { + std::cerr << "error: no target '" << targetname + << "' in library\n"; + return 1; + } + + Target t(targets[targetname], library); + + char const *ref = argv[optind + 2]; + + std::cout << "disassembling target:" << targetname << " ref:" << ref << "\n"; + } + else + { + char const *ref = argv[optind + 1]; + + std::cout << "disassembling file:" << file << " ref:" << ref << "\n"; } return 0; } -int main_analyze(int argc, char **argv) +int main_analyze(__attribute__((unused)) int argc, __attribute__((unused)) char **argv) { std::cerr << "doing main_analyze, which is incomplete x_x\n"; return 0; @@ -139,6 +366,9 @@ int main(int argc, char **argv) std::string cmd = argv[1]; argv[1] = (char *)""; + /* Load the configuration file if it exists */ + loadconfig(); + if(cmd == "info") return main_info(argc, argv); else if(cmd == "disasm") diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index d94c6ad..c33650c 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -9,9 +9,11 @@ #include #include +#include +#include +#include #include #include -#include namespace FxOS { @@ -51,7 +53,6 @@ struct ConcreteInstructionArg struct ConcreteInstruction { ConcreteInstruction(Instruction &inst); - ConcreteInstruction(ConcreteInstruction const &other) = default; /* What instruction it is */ Instruction &inst; @@ -63,19 +64,32 @@ struct ConcreteInstruction // Data set by the pcrel pass //--- - /* Jump targets, used for jump instructions only. The first jmp is for - unconditional jumps; jmpt and jmpf are for conditional jumps. In - many situations the jump is forced on a general instruction by a - preceding branch due to the delay slot mechanism. */ - union { uint32_t jmp, jmpt; }; - uint32_t jmpf; + /* Jump targets, used for jump instructions only. The target might + either be that of an inconditional jump, or the non-trivial target + of a conditional jump. In many situations the jump is forced on a + general instruction by a preceding delated branch. */ + uint32_t jmptarget; + /* Whether the instruction is terminal. Be careful, as this attribute + is often forced onto delayed slot instructions. It is thus NOT the + same as isterminal(), which tells whether the mnemonic implies a + function exit. Said exit is generally delayed. */ + bool terminal; - //--- - // Data set by the cfg pass - //--- + //--- + // Data set by the cfg pass + //--- - /* Whether this instruction is a basic block leader */ - bool leader; + /* Whether this instruction is a basic block leader */ + bool leader; + + //--- + // Methods and utilities + //--- + + bool isterminal() const noexcept { return inst.isterminal(); } + bool isjump() const noexcept { return inst.isjump(); } + bool iscondjump() const noexcept { return inst.iscondjump(); } + bool isdelayed() const noexcept { return inst.isdelayed(); } }; /* Short aliases */ @@ -99,6 +113,61 @@ private: std::map m_instructions; }; +//--- +// Disassembler passes +//--- + +class DisassemblyPass +{ +public: + DisassemblyPass(Disassembly &disasm); + + /* Analyze a single instruction, probably updating the annotations and + the state of the pass itself. Should return true if the state of the + instruction changed. */ + virtual void analyze(uint32_t pc, ConcreteInstruction &inst) = 0; + + /* Run the pass from the given entry point */ + void run(uint32_t entry_pc); + +protected: + /* Add an instruction to the queue to analyze next */ + void enqueue(uint32_t pc); + /* Enqueue the unseen successors of this instruction */ + void enqueue_unseen_successors(uint32_t pc, ConcreteInstruction &inst); + /* Enqueue all the success of this instruction */ + void enqueue_all_successors(uint32_t pc, ConcreteInstruction &inst); + + /* Underlying disassembly */ + Disassembly &m_disasm; + +private: + /* Blocks to visit next, ordered for uniqueness */ + std::set m_next; + std::priority_queue m_queue; + + /* Visited blocks */ + std::set m_seen; +}; + +class CfgPass: public DisassemblyPass +{ + CfgPass(Disassembly &disasm); + void analyze(uint32_t pc, ConcreteInstruction &inst) override; +}; + +class PcrelPass: public DisassemblyPass +{ + PcrelPass(Disassembly &disasm); + void analyze(uint32_t pc, ConcreteInstruction &inst) override; +}; + +class PrintPass: public DisassemblyPass +{ + PrintPass(Disassembly &disasm); + void analyze(uint32_t pc, ConcreteInstruction &inst) override; +}; + } /* namespace FxOS */ #endif /* LIBFXOS_DISASSEMBLY_H */ diff --git a/include/fxos/lang.h b/include/fxos/lang.h index 61b4181..d5e6c29 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -120,12 +120,23 @@ struct Instruction /* Mnemonic **without the size indicator** */ std::string mnemonic; - /* Operation size (0, 1, 2 or 4) */ int opsize; - /* Arguments */ std::vector args; + + //--- + // Instruction classes + //--- + + /* Check whether instruction terminates the function */ + bool isterminal() const noexcept; + /* Check whether instruction is an unconditional jump */ + bool isjump() const noexcept; + /* Check whether it's a conditional jump */ + bool iscondjump() const noexcept; + /* Check whether instruction has a delay slot */ + bool isdelayed() const noexcept; }; } /* namespace FxOS */ diff --git a/include/fxos/load.h b/include/fxos/load.h index 7c4d6d3..09912e6 100644 --- a/include/fxos/load.h +++ b/include/fxos/load.h @@ -6,6 +6,7 @@ #define LIBFXOS_LOAD_H #include +#include #include #include @@ -14,13 +15,6 @@ namespace FxOS { using Header = std::map; -/* Load any fxos data file. - @file Data file, assumed to follow the fxos header and data format. - - This function reads the header with load_header() then calls the appropriate - lexer and loader depending on the type specified in the header. */ -void load(std::string path); - /* Load the header of a data file. @file Data file, assumed with an fxos header type-specific contents @offset Will be set to the byte offset where content starts @@ -35,11 +29,17 @@ void load(std::string path); the lexers in all other load functions. */ Header load_header(Buffer const &file, size_t &offset, int &line); -/* Load an assembly instruction table for the disassembler. - @file Data file, presumably analyzed with lex_header() - @start_offset Offset of assembly data in the file - @start_line Line where assembly data starts in the file (for errors) */ -void load_asm(Buffer const &file, size_t start_offset, size_t start_line); +/* Load an assembly instruction table for the disassembler. This function + directly feeds the disassembler and does not return anything. + + @file Data file, presumably analyzed with lex_header() + @offset Offset of assembly data in the file (as set by load_header) + @line Line where assembly data starts in the file (idem) */ +void load_asm(Buffer const &file, size_t offset, size_t line); + +/* Load a target description into the target database. This function returns + the complete target description */ +TargetDescription load_target(Buffer const &file, size_t offset, size_t line); } /* namespace FxOS */ diff --git a/include/fxos/memory.h b/include/fxos/memory.h index 63496ae..a6e136c 100644 --- a/include/fxos/memory.h +++ b/include/fxos/memory.h @@ -63,9 +63,13 @@ struct MemoryRegion Throws std::out_of_range if none. */ static MemoryRegion const *region_for(uint32_t address); + /* Empty region at 0 */ + MemoryRegion(); /* Short constructor which calls guess_flags() */ MemoryRegion(std::string name, uint32_t start, uint32_t end, bool writable); + /* Short constructor for standard regions only */ + MemoryRegion(std::string standard_region_name); /* Region name */ std::string name {}; diff --git a/include/fxos/os.h b/include/fxos/os.h index 7bca245..fe773ff 100644 --- a/include/fxos/os.h +++ b/include/fxos/os.h @@ -16,11 +16,9 @@ namespace FxOS { class OS { public: - /* Load an OS from a buffer. */ - OS(Buffer &buffer); - /* This target contains just the OS in ROM and ROM_P2. It can be used - freely to extract data which is not already available here. */ - Target target; + /* Create an OS interface for this target. If the target does not have + data loaded in ROM, this raises an exception. */ + OS(Target &target); /* Bootcode timestamp and checksum */ Addressable bootcode_timestamp; @@ -45,6 +43,9 @@ public: int langdata; private: + /* Target being analyzed */ + Target &m_target; + /* Parse the OS header. This should be the first analysis function to be called, because it determines the type of model (ie. fx9860g vs fxcg50) thus the location of the syscall table and many more diff --git a/include/fxos/target.h b/include/fxos/target.h index c6a12b0..9a15962 100644 --- a/include/fxos/target.h +++ b/include/fxos/target.h @@ -8,13 +8,12 @@ #include #include +#include #include #include namespace FxOS { -class OS; - /* A common interface for simulated memory */ class AbstractMemory { @@ -65,7 +64,7 @@ struct Binding: public AbstractMemory /* Constructor from data buffer. An error is raised if the buffer is not at least of the size of the region. In this case, a new buffer can be constructed with the required size. */ - Binding(MemoryRegion const ®ion, Buffer const &buffer); + Binding(MemoryRegion region, Buffer const &buffer); /* Targeted region, might overlap with other bindings */ MemoryRegion region; @@ -86,6 +85,14 @@ struct Binding: public AbstractMemory int size) const override; }; +/* A target description in the database; loadable, but not loaded yet */ +struct TargetDescription +{ + /* Just a list of bindings to be formed */ + using Binding = std::pair; + std::vector bindings; +}; + /* A composite target where regions can be bound dynamically */ class Target: public AbstractMemory { @@ -93,10 +100,10 @@ public: /* Create an empty target with no regions */ Target(); - /* Bind an OS. This is used to either disassemble the OS itself, or - select the OS version for which code is being disassembled (for - instance for add-ins). */ - void bind_os(OS &os); + /* Create a target from a target description by loading files from a + library. */ + Target(TargetDescription const &description, + std::vector folders); /* Bind a memory region from a buffer. The region can either be standard (see ) or custom. @@ -105,11 +112,6 @@ public: loaded region will be used*. Thus, new regions can be loaded to selectively override parts of the target. - Generally it is preferable to bind an OS image to the target's ROM - area using bind_os(), rather than using bind_region(). This is - because bind_os() which will also enable OS-specific tasks such as - syscall resolution. - An error is raised if the buffer is smaller than the region being bound. */ void bind_region(MemoryRegion const ®ion, Buffer const &buffer); @@ -125,11 +127,10 @@ public: int size) const override; private: - /* Bound OS image */ - OS *m_os; - /* Bound regions (in order of binding) */ std::vector m_bindings; + /* Buffers owned by the target (when loaded from description) */ + std::vector m_buffers; }; } /* namespace FxOS */ diff --git a/include/fxos/util.h b/include/fxos/util.h index acdd1c0..7e9386a 100644 --- a/include/fxos/util.h +++ b/include/fxos/util.h @@ -10,6 +10,7 @@ #include #include #include +#include /* Format a string with printf() syntax */ template @@ -49,9 +50,17 @@ struct Addressable class Buffer { public: + /* Empty buffer with size 0 and no pointer */ + Buffer(); + /* Empty buffer initialized with given byte */ Buffer(size_t size, int fill=0x00); + /* Object is movable and move assignable but not copyable */ + Buffer(Buffer const &other) = delete; + Buffer(Buffer &&other); + Buffer & operator=(Buffer &&other); + /* Buffer initialized from file, reading the given size from the beginning of the file. If the file is smaller than the specified size, the buffer is padded. @@ -59,8 +68,10 @@ public: If this constructor is used, the file path is remembered. */ Buffer(std::string filepath, ssize_t size=-1, int fill=0x00); - /* Create a buffer by copying another buffer */ - Buffer(Buffer const &other); + /* Buffer initialized from file by looking in one of the specified + directories only. */ + Buffer(std::string filepath, std::vector &folders, + ssize_t size=-1, int fill=0x00); /* Create a buffer by copying and resizing another buffer */ Buffer(Buffer const &other, size_t new_size, int fill=0x00); @@ -68,20 +79,12 @@ public: /* Free allocated data, obviously */ ~Buffer(); - /* Get buffer size */ - size_t size() const noexcept; - - /* Get data */ - char *data() noexcept; - char const *data() const noexcept; - - /* Get file path, when constructed from file */ - std::string path() const noexcept; - -private: - void *m_data; - size_t m_size; - std::string m_path; + /* Buffer size */ + size_t size; + /* Data */ + char *data; + /* File path, when constructed from file */ + std::string path; }; #endif /* LIBFXOS_UTIL_H */ diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 4a7f997..6c03cf6 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -22,7 +22,7 @@ void register_instruction(Instruction ins) } //--- -// Concrete (instanciated) arguments and instructions +// Concrete (instantiated) arguments and instructions //--- ConcreteInstructionArg::ConcreteInstructionArg(): @@ -32,7 +32,7 @@ ConcreteInstructionArg::ConcreteInstructionArg(): } ConcreteInstruction::ConcreteInstruction(Instruction &inst): - inst {inst}, jmpt {}, jmpf {}, leader {false} + inst {inst}, jmptarget {}, leader {false} { } @@ -64,10 +64,68 @@ ConcreteInstruction &Disassembly::readins(uint32_t pc) Instruction &inst = *insmap[opcode]; ConcreteInstruction ci(inst); -// std::pair p(pc, ci); -// m_instructions.emplace(std::make_pair(pc, ci)); + m_instructions.emplace(std::make_pair(pc, ci)); return m_instructions.at(pc); } } +//--- +// Base pass +//--- + +DisassemblyPass::DisassemblyPass(Disassembly &disasm): + m_disasm(disasm) +{ +} + +void DisassemblyPass::enqueue(uint32_t pc) +{ + if(m_next.count(pc)) return; + + m_next.insert(pc); + m_queue.push(pc); +} + +void DisassemblyPass::enqueue_unseen_successors(uint32_t pc, + ConcreteInstruction &inst) +{ + if(!inst.isterminal() && !inst.isjump()) + { + if(!m_seen.count(pc + 2)) enqueue(pc + 2); + } + if(inst.isjump() || inst.iscondjump()) + { + if(!m_seen.count(inst.jmptarget)) enqueue(inst.jmptarget); + } +} + +void DisassemblyPass::enqueue_all_successors(uint32_t pc, + ConcreteInstruction &inst) +{ + if(!inst.isterminal() && !inst.isjump()) + { + enqueue(pc + 2); + } + if(inst.isjump() || inst.iscondjump()) + { + enqueue(inst.jmptarget); + } +} + +void DisassemblyPass::run(uint32_t entry_pc) +{ + enqueue(entry_pc); + + while(m_queue.size()) + { + uint32_t pc = m_queue.top(); + + m_queue.pop(); + m_next.erase(m_next.find(pc)); + + ConcreteInstruction &ci = m_disasm.readins(pc); + analyze(pc, ci); + } +} + } /* namespace FxOS */ diff --git a/lib/lang.cpp b/lib/lang.cpp index 7468b2e..7eb2f14 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -12,7 +12,7 @@ namespace FxOS { using Reg = CpuRegister::CpuRegisterName; -static std::map regnames = { +static std::map regnames = { { Reg::R0, "r0" }, { Reg::R1, "r1" }, { Reg::R2, "r2" }, @@ -181,7 +181,7 @@ std::string Argument::str() const } //--- -// Instruction creation +// Instruction management //--- Instruction::Instruction(std::string mn): @@ -221,5 +221,46 @@ Instruction::Instruction(std::string mn, Argument arg1, Argument arg2): args.push_back(arg2); } +//--- +// Instruction classes +//--- + +bool Instruction::isterminal() const noexcept +{ + if(mnemonic == "rte" || mnemonic == "rts") return true; + + /* Also jmp @rn which is regarded as a terminal call */ + if(mnemonic == "jmp" && args[0].kind == Argument::Deref) return true; + /* Same for braf because we can't analyse further */ + if(mnemonic == "braf") return true; + + return false; +} + +bool Instruction::isjump() const noexcept +{ + return (mnemonic == "bra"); +} + +bool Instruction::iscondjump() const noexcept +{ + std::vector v { + "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s" + }; + + for(auto el: v) if(mnemonic == el) return true; + return false; +} + +bool Instruction::isdelayed() const noexcept +{ + std::vector v { + "rte", "rts", "jmp", "jsr", "bra", "braf", "bsr", "bsrf", + "bf.s", "bf/s", "bt.s", "bt/s", + }; + + for(auto el: v) if(mnemonic == el) return true; + return false; +} } /* namespace FxOS */ diff --git a/lib/load-asm.l b/lib/load-asm.l index 531f034..734333e 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -268,10 +268,10 @@ void load_asm(Buffer const &file, size_t start_offset, size_t start_line) { /* Lex all instructions and fill in the general assembly table */ - YY_BUFFER_STATE buf = yy_scan_bytes(file.data() + start_offset, - file.size() - start_offset); + YY_BUFFER_STATE buf = yy_scan_bytes(file.data + start_offset, + file.size - start_offset); yylineno = start_line; - filename = file.path(); + filename = file.path; /* Instruction information */ char *code=nullptr, *mnemonic=nullptr; diff --git a/lib/load-header.l b/lib/load-header.l index fd5cdcb..8cb2622 100644 --- a/lib/load-header.l +++ b/lib/load-header.l @@ -42,7 +42,7 @@ static void err(char const *format, ...) %option noyywrap %option nounput -literal [a-zA-Z0-9_-]+ +literal [^ \t\n:]+ space [ \t]+ %% @@ -70,8 +70,8 @@ Header load_header(Buffer const &file, size_t &offset_ref, int &line_ref) /* Build a map of properties */ FxOS::Header header; - YY_BUFFER_STATE buf = yy_scan_bytes(file.data(), file.size()); - filename = file.path(); + YY_BUFFER_STATE buf = yy_scan_bytes(file.data, file.size); + filename = file.path; yylineno = 1; lexed = 0; @@ -79,8 +79,8 @@ Header load_header(Buffer const &file, size_t &offset_ref, int &line_ref) int line = -1; /* Property name and value */ - char const *name = nullptr; - char const *value = nullptr; + char *name = nullptr; + char *value = nullptr; while(1) { @@ -97,6 +97,9 @@ Header load_header(Buffer const &file, size_t &offset_ref, int &line_ref) /* Fill in the map */ header[name] = value; + free(name); + free(value); + name = nullptr; value = nullptr; } diff --git a/lib/load-target.l b/lib/load-target.l new file mode 100644 index 0000000..7991fde --- /dev/null +++ b/lib/load-target.l @@ -0,0 +1,156 @@ +%{ +#include +#include +#include + +#include +#include +#include +#include + +/* Tokens */ +#define NAME 0 +#define ADDRESS 1 +#define SIZE 2 +#define PATH 3 + +/* Value for parser */ +static union { + char *name; + uint32_t address; + uint32_t size; + char *path; +} yylval; + +/* Current file name */ +static std::string filename; + +/* Error messages and exceptions */ +__attribute__((noreturn)) +static void err(char const *format, ...) +{ + static char buf[256]; + + va_list args; + va_start(args, format); + vsnprintf(buf, 256, format, args); + va_end(args); + + throw FxOS::SyntaxError(filename.c_str(), yylineno, buf); +} + +%} + +%option prefix="target" +%option noyywrap +%option nounput + +name [a-zA-Z_][a-zA-Z0-9_]* +hexa [0-9a-fA-F]+ +path [^: \t\n]+$ +space [ \t]+ + +%% + +^#[^\n]* ; +{space} ; +[\n] yylineno++; + +{hexa} { sscanf(yytext, "%x", &yylval.address); return ADDRESS; } +"("{hexa}")" { sscanf(yytext, "(%x)", &yylval.size); return SIZE; } +{name} { yylval.name = strdup(yytext); return NAME; } +":" { return ':'; } +{path} { yylval.path = strdup(yytext); return PATH; } + +. { err("lex error near '%s'", yytext); } +<> { return -1; } + +%% + +namespace FxOS { + +static int expect(std::vector types) +{ + std::array description { + "region name", "address", "size", "file path" }; + + int t = yylex(); + for(int allowed: types) if(t == allowed) return t; + + std::string errmsg = "expected"; + for(int allowed: types) + { + if(allowed >= 0 && allowed < (int)description.size()) + { + errmsg += " "; + errmsg += description[t]; + errmsg += ","; + } + else if(allowed == -1) + { + errmsg += " end of file,"; + } + else if(allowed == ':') + { + errmsg += " colon,"; + } + } + err(errmsg.c_str()); +} + +static int expect(int type) +{ + std::vector types { type }; + return expect(types); +} + +/* Load a target description into the target database. */ +TargetDescription load_target(Buffer const &file, size_t offset, size_t line) +{ + /* Build a target description without actually loading the files. */ + TargetDescription descr; + + YY_BUFFER_STATE buf = yy_scan_bytes(file.data + offset, + file.size - offset); + yylineno = line; + filename = file.path; + + while(1) + { + MemoryRegion reg; + + /* One iteration per line */ + int t = expect({ NAME, ADDRESS, -1 }); + if(t == -1) break; + + if(t == NAME) + { + reg = MemoryRegion(yylval.name); + free(yylval.name); + } + else if(t == ADDRESS) + { + uint32_t start = yylval.address; + + expect(SIZE); + uint32_t size = yylval.size; + + reg = MemoryRegion("(anonymous)", start, start + size, + true); + } + + expect(':'); + expect(PATH); + + std::string path = yylval.path; + free(yylval.path); + + TargetDescription::Binding b = std::make_pair(reg, path); + descr.bindings.push_back(b); + } + + yy_delete_buffer(buf); + return descr; +} + +} /* namespace FxOS */ diff --git a/lib/load-target.y b/lib/load-target.y new file mode 100644 index 0000000..5653a65 --- /dev/null +++ b/lib/load-target.y @@ -0,0 +1,52 @@ +%{ + +#include +#include +#include +#include + +/* The target description to be filled */ +FxOS::TargetDescription t; + +using BindingDescription = std::pair; + +struct ValueType +{ + BindingDescription binding; + FxOS::MemoryRegion region; +}; + +%} + +/* Tokens */ +%token NAME +%token ADDRESS +%token SIZE +%token PATH + +%define api.value.type {ValueType} + +%type binding +%type region + +%% + +main: + init target {} +init: + %empty { t = FxOS::TargetDescription(); } + +target: + %empty {} + | binding target { t.bindings.push_back($1); } + +binding: + region ':' PATH { $$ = std::make_pair($1, std::string($3)); + free($3); } + +region: + NAME { $$ = FxOS::MemoryRegion($1); free($1); } + | ADDRESS { $$ = FxOS::MemoryRegion("anon", $1, $1, true); } + | ADDRESS '(' SIZE ')' { $$ = FxOS::MemoryRegion("anon", $1, $1+$3, true); } + +%% diff --git a/lib/load.cpp b/lib/load.cpp deleted file mode 100644 index 247bb91..0000000 --- a/lib/load.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include - -namespace FxOS { - -/* Load any fxos data file. */ -void load(std::string path) -{ - Buffer file(path); - size_t offset; - int line; - -// std::cerr << "[fxos] loading resource file '" << path << "'...\n"; - - Header h = load_header(file, offset, line); - if(h.find("type") == h.end()) - { - throw std::runtime_error(format("no type in header of '%s'", - path.c_str())); - } - - std::string type = h["type"]; - - if(type == "assembly") - { - load_asm(file, offset, line); - return; - } - - throw std::runtime_error(format("unknown file type '%s' in '%s'", - type.c_str(), path.c_str())); -} - -} /* namespace FxOS */ diff --git a/lib/memory.cpp b/lib/memory.cpp index 3082429..0d6c2cb 100644 --- a/lib/memory.cpp +++ b/lib/memory.cpp @@ -46,9 +46,14 @@ uint32_t MemoryArea::size() const noexcept // Fine memory region management //--- +MemoryRegion::MemoryRegion(): + name {"null"}, start {0x00000000}, end {0x00000000}, writable {false} +{ +} + MemoryRegion::MemoryRegion(std::string name, uint32_t start, uint32_t end, bool writable): - name(name), start(start), end(end), writable(writable) + name {name}, start {start}, end {end}, writable {writable} { this->guess_flags(); } @@ -123,4 +128,23 @@ MemoryRegion const *MemoryRegion::region_for(uint32_t address) return nullptr; } +MemoryRegion::MemoryRegion(std::string name) +{ + MemoryRegion const *regs[8] = { + &R::ROM, &R::RAM, &R::ROM_P2, &R::RAM_P2, + &R::RS, &R::ILRAM, &R::XRAM, &R::YRAM + }; + + for(int i = 0; i < 8; i++) + { + if(regs[i]->name == name) + { + *this = *regs[i]; + return; + } + } + + throw std::runtime_error("No standard region named '" + name + "'"); +} + } /* namespace FxOS */ diff --git a/lib/os.cpp b/lib/os.cpp index d4110ca..8a8e1da 100644 --- a/lib/os.cpp +++ b/lib/os.cpp @@ -6,15 +6,14 @@ namespace FxOS { -OS::OS(Buffer &buffer): target() +OS::OS(Target &t): m_target {t} { - /* OS files are all at least 1 MB large */ - if(buffer.size() < 1000000) - throw std::runtime_error("OS files cannot be < 1MB"); + if(!t.covers(0x80000000)) + throw std::runtime_error("OS requires a target with ROM"); - /* Bind the given file to the internal analysis target */ - this->target.bind_region(MemoryRegion::ROM, buffer); - this->target.bind_region(MemoryRegion::ROM_P2, buffer); + /* OS files are all at least 1 MB large */ + if(!t.covers(0x80000000, 1000000)) + throw std::runtime_error("OS requires target with >1MB ROM"); parse_header(); parse_syscall_table(); @@ -23,7 +22,7 @@ OS::OS(Buffer &buffer): target() void OS::parse_header() { - Target &t = this->target; + Target &t = m_target; /* Bootcode timestamp at 0xffb0 (the very end of the bootcode) */ this->bootcode_timestamp = t.read_str(0x8000ffb0, 14); @@ -62,7 +61,7 @@ int OS::find_syscall(uint32_t entry) const noexcept void OS::parse_syscall_table() { - Target &t = this->target; + Target &t = m_target; /* Traverse the syscall table */ uint32_t syscall_table = t.read_u32(0x8001007c); @@ -88,7 +87,7 @@ void OS::parse_syscall_table() void OS::parse_footer() { - Target &t = this->target; + Target &t = m_target; /* Find the footer address (occurrence of "CASIOABSLangdata") */ uint32_t start = MemoryRegion::ROM.start; diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp new file mode 100644 index 0000000..321691c --- /dev/null +++ b/lib/passes/cfg.cpp @@ -0,0 +1,37 @@ +//--- +// fxos.passes.cfg: CFG construction, as used by other passes +//--- + +#include +#include + +namespace FxOS { + +CfgPass::CfgPass(Disassembly &disasm): + DisassemblyPass(disasm) +{ +} + +void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) +{ + std::vector jump_mnemonics { + "bra", "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s", + }; + + /* Set the jmptarget fields whenever needed. This is easy because jump + instructions have trivially computable destinations */ + for(auto mnemonic: jump_mnemonics) + { + if(ci.inst.mnemonic != mnemonic) continue; + auto &args = ci.inst.args; + + assert((args.size() < 1 || args[0].kind != Argument::PcJump) + && "invalid use of a jump instruction\n"); + + ci.jmptarget = pc + args[0].disp; + } + + enqueue_unseen_successors(pc, ci); +} + +} /* namespace FxOS */ diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp new file mode 100644 index 0000000..9fad8ce --- /dev/null +++ b/lib/passes/pcrel.cpp @@ -0,0 +1,19 @@ +//--- +// fxos.passes.pcrel: PC-relative addressing resolution +//--- + +#include + +namespace FxOS { + +PcrelPass::PcrelPass(Disassembly &disasm): + DisassemblyPass(disasm) +{ +} + +void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) +{ + enqueue_unseen_successors(pc, ci); +} + +} /* namespace FxOS */ diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp new file mode 100644 index 0000000..6e93413 --- /dev/null +++ b/lib/passes/print.cpp @@ -0,0 +1,20 @@ +//--- +// fxos.passes.print: Print disassembly +//--- + +#include + +namespace FxOS { + +PrintPass::PrintPass(Disassembly &disasm): + DisassemblyPass(disasm) +{ +} + +void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) +{ + std::cout << ci.inst.mnemonic << "\n"; + enqueue_unseen_successors(pc, ci); +} + +} /* namespace FxOS */ diff --git a/lib/target.cpp b/lib/target.cpp index 65e32ad..6ba0868 100644 --- a/lib/target.cpp +++ b/lib/target.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace FxOS { @@ -58,10 +59,10 @@ Addressable AbstractMemory::read_str(uint32_t addr, size_t len) // Bindings of data buffers into memory regions //--- -Binding::Binding(MemoryRegion const &source_region, Buffer const &buffer): - region(source_region), data(buffer.data()), size(region.size()) +Binding::Binding(MemoryRegion source_region, Buffer const &buffer): + region(source_region), data(buffer.data), size(region.size()) { - if(buffer.size() < region.size()) + if(buffer.size < region.size()) { throw std::runtime_error("Buffer too small to create binding"); } @@ -103,13 +104,23 @@ uint32_t Binding::search(uint32_t start, uint32_t end, void const *pattern, //--- Target::Target(): - m_os(nullptr), m_bindings {} + m_bindings {}, m_buffers {} { } -void Target::bind_os(OS &os) +Target::Target(TargetDescription const &descr, + std::vector folders): + Target() { - m_os = &os; + for(auto binding: descr.bindings) + { + MemoryRegion region = binding.first; + ssize_t size = (region.size() > 0 ? region.size() : -1); + + auto b = m_buffers.emplace(m_buffers.end(), + binding.second, folders, size); + bind_region(region, *b); + } } void Target::bind_region(MemoryRegion const ®ion, Buffer const &buffer) diff --git a/lib/util.cpp b/lib/util.cpp index 6ba7e27..5928e35 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -1,25 +1,58 @@ #include #include #include +#include #include #include #include -/* Empty buffer initialized with given byte */ -Buffer::Buffer(size_t size, int fill) +namespace fs = std::filesystem; + +Buffer::Buffer(): + size {0}, data {nullptr}, path {"(none)"} { - m_data = malloc(size); - if(!m_data) throw std::bad_alloc(); +} - m_size = size; - memset(m_data, fill, size); +/* Empty buffer initialized with given byte */ +Buffer::Buffer(size_t bufsize, int fill) +{ + size = bufsize; + data = static_cast(malloc(size)); + if(!data) throw std::bad_alloc(); - m_path = "(anonymous)"; + memset(data, fill, size); + path = "(anonymous)"; +} + +/* Move constructor */ +Buffer::Buffer(Buffer &&other): + size {0}, data {nullptr}, path {} +{ + *this = std::move(other); +} + +/* Move assignment operator */ +Buffer &Buffer::operator=(Buffer &&other) +{ + if(this != &other) + { + free(data); + + data = other.data; + size = other.size; + path = other.path; + + other.data = nullptr; + other.size = 0; + other.path = "(moved)"; + } + + return *this; } /* Buffer initialized from file */ -Buffer::Buffer(std::string filepath, ssize_t size, int fill) +Buffer::Buffer(std::string filepath, ssize_t bufsize, int fill) { char const *path = filepath.c_str(); @@ -35,15 +68,15 @@ Buffer::Buffer(std::string filepath, ssize_t size, int fill) throw std::runtime_error(format("cannot stat '%s'", path)); } - m_size = (size < 0) ? statbuf.st_size : size; - size_t size_to_read = std::min(m_size, (size_t)statbuf.st_size); + size = (bufsize < 0) ? statbuf.st_size : bufsize; + size_t size_to_read = std::min(size, (size_t)statbuf.st_size); - m_data = malloc(m_size); - if(!m_data) throw std::bad_alloc(); + data = static_cast(malloc(size)); + if(!data) throw std::bad_alloc(); /* Read buffer and fill whatever is left */ - memset(m_data, fill, m_size); - x = read(fd, m_data, size_to_read); + memset(data, fill, size); + x = read(fd, data, size_to_read); close(fd); if(x != (ssize_t)size_to_read) @@ -52,46 +85,37 @@ Buffer::Buffer(std::string filepath, ssize_t size, int fill) "error while reading '%s'", path)); } - m_path = filepath; + this->path = filepath; } -/* Create a buffer by copying another buffer */ -Buffer::Buffer(Buffer const &other): - Buffer(other.size(), 0x00) +Buffer::Buffer(std::string filepath, std::vector &folders, + ssize_t size, int fill): + Buffer() { + for(auto &f: folders) + { + fs::path p = fs::path(f) / fs::path(filepath); + if(fs::exists(p)) + { + /* Hopefully this will use the move assignment */ + *this = Buffer(p, size, fill); + return; + } + } + + char const *path = filepath.c_str(); + throw std::runtime_error(format("cannot find '%s' in library", path)); } /* Create a buffer by copying and resizing another buffer */ Buffer::Buffer(Buffer const &other, size_t new_size, int fill): Buffer(new_size, fill) { - memcpy(m_data, other.data(), std::min(new_size, other.size())); + memcpy(data, other.data, std::min(new_size, other.size)); } /* Free allocated data, obviously */ Buffer::~Buffer() { - free(m_data); -} - -/* Buffer size */ -size_t Buffer::size() const noexcept -{ - return m_size; -} - -/* Buffer data */ -char *Buffer::data() noexcept -{ - return static_cast(m_data); -} -char const *Buffer::data() const noexcept -{ - return static_cast(m_data); -} - -/* File path */ -std::string Buffer::path() const noexcept -{ - return m_path; + free(data); }