From c5a7071dcc0bcb0a0caf8d3c46f211f870247a2b Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Fri, 28 Feb 2020 16:19:50 +0100 Subject: [PATCH] better manage exceptions and instruction-level passes --- fxos/disassembly.cpp | 86 ++++++++++++---------------- fxos/info.cpp | 2 +- fxos/main.cpp | 42 +++++++++----- include/fxos/disasm-passes/pcrel.h | 2 +- include/fxos/disasm-passes/print.h | 7 +-- include/fxos/disasm-passes/syscall.h | 2 +- include/fxos/disassembly.h | 16 +++++- include/fxos/errors.h | 39 +++++++++++-- include/fxos/target.h | 5 +- lib/disassembly.cpp | 17 ++++++ lib/passes/cfg.cpp | 4 +- lib/passes/pcrel.cpp | 21 ++----- lib/passes/print.cpp | 10 +--- lib/passes/syscall.cpp | 6 +- lib/semantics.cpp | 2 +- lib/target.cpp | 32 ++++------- lib/util.cpp | 12 ++-- 17 files changed, 165 insertions(+), 140 deletions(-) diff --git a/fxos/disassembly.cpp b/fxos/disassembly.cpp index 5629e91..65f4781 100644 --- a/fxos/disassembly.cpp +++ b/fxos/disassembly.cpp @@ -28,8 +28,33 @@ int disassembly(Library &library, Target &target, char const *ref, int syscall_id; int len = 0; + enum { RefNone=0, RefSyscall, RefAddress } reftype = RefNone; + /* Parse different flavors of references. %: syscall */ if(sscanf(ref, "%%%x", &syscall_id) == 1) + reftype = RefSyscall; + /* Pure hexa: address */ + else if(sscanf(ref, "%x%n", &address, &len) == 1 && !ref[len]) + reftype = RefAddress; + /* Anything else: look up symbols */ + else + { + std::string name = ref; + for(auto const &symtable: library.sym_tables()) + { + std::optional sym = symtable.lookup(name); + if(!sym) continue; + + if(sym->type == Symbol::Syscall) + reftype = RefSyscall, syscall_id = sym->value; + if(sym->type == Symbol::Address) + reftype = RefAddress, address = sym->value; + break; + } + } + + /* Now try to load the address for this reference */ + if(reftype == RefSyscall) { if(!os) { @@ -46,57 +71,20 @@ int disassembly(Library &library, Target &target, char const *ref, address = os->syscall(syscall_id); } - /* Pure hexa: address */ - else if(sscanf(ref, "%x%n", &address, &len) == 1 && !ref[len]) + else if(reftype == RefAddress) { + if(address & 1) + { + log(WRN "address %08x is odd, will start at %08x", + address, address+1); + address++; + } } - /* Anything else: look up symbols */ else { - bool found = false; - std::string name = ref; - Symbol sym; - - for(auto const &symtable: library.sym_tables()) - { - std::optional s = symtable.lookup(name); - if(!s) continue; - - found = true; - sym = *s; - break; - } - - if(!found) - { - log(ERR "cannot interpret '%s' (not syscall id, not " - "address, and no such symbol in library)",ref); - return 1; - } - - switch(sym.type) - { - case Symbol::Syscall: - if(!os) - { - log(ERR "cannot disassemble syscall %s: target" - " does not have an OS mapped", ref); - return 1; - } - if(syscall_id >= os->syscall_count()) - { - log(ERR "this OS only has %#x syscalls", - os->syscall_count()); - return 1; - } - - address = os->syscall(sym.value); - break; - - case Symbol::Address: - address = sym.value; - break; - } + log(ERR "cannot interpret '%s' (not a syscall id, not an " + "address, and no such symbol in library)", ref); + return 1; } for(auto pass: passes) @@ -112,12 +100,12 @@ int disassembly(Library &library, Target &target, char const *ref, else if(pass == "pcrel") { PcrelPass p(disasm); - p.run(address); + p.run(); } else if(pass == "syscall") { SyscallPass p(disasm, os.get()); - p.run(address); + p.run(); } else if(pass == "print") { diff --git a/fxos/info.cpp b/fxos/info.cpp index 61c6a1b..002d7dc 100644 --- a/fxos/info.cpp +++ b/fxos/info.cpp @@ -28,7 +28,7 @@ static char const *syscall_str = " Syscall entries outside ROM:\n"; static char const *syscall_nonrom_str = -" %%%03x -> 0x%08x (%s memory)\n"; +" %%%03x -> %08x (%s memory)\n"; void os_info(Target &t) { diff --git a/fxos/main.cpp b/fxos/main.cpp index 464225f..9bbc3d6 100644 --- a/fxos/main.cpp +++ b/fxos/main.cpp @@ -366,7 +366,7 @@ int main_disassembly(int argc, char **argv) log(ERR "%08x: %s", e.addr(), e.what()); return 1; } - catch(AddrError &e) + catch(AddressError &e) { log(ERR "%08x[%d]: %s", e.addr(), e.size(), e.what()); return 1; @@ -387,19 +387,35 @@ int main(int argc, char **argv) std::string cmd = argv[1]; argv[1] = (char *)""; - if(cmd == "library") - return main_library(argc, argv); - else if(cmd == "info") - return main_info(argc, argv); - else if(cmd == "disasm") - return main_disassembly(argc, argv); -// else if(cmd == "analyze") -// return main_analyze(argc, argv); + try + { + if(cmd == "library") + return main_library(argc, argv); + else if(cmd == "info") + return main_info(argc, argv); + else if(cmd == "disasm") + return main_disassembly(argc, argv); +// else if(cmd == "analyze") +// return main_analyze(argc, argv); - else if(cmd == "-?" || cmd == "-h" || cmd == "--help") - usage(0); + else if(cmd == "-?" || cmd == "-h" || cmd == "--help") + usage(0); + + std::cerr << "invalid operation '" << cmd << "'\n"; + std::cerr << "Try '" << argv[0] << " --help'.\n"; + } + catch(std::logic_error &e) + { + log(ERR "%s o(x_x)o", e.what()); + } + catch(std::runtime_error &e) + { + log(ERR "%s", e.what()); + } + catch(FxOS::LimitError &e) + { + log(ERR "%s _(x_x)_", e.what()); + } - std::cerr << "invalid operation '" << cmd << "'\n"; - std::cerr << "Try '" << argv[0] << " --help'.\n"; return 1; } diff --git a/include/fxos/disasm-passes/pcrel.h b/include/fxos/disasm-passes/pcrel.h index 1352150..58a9895 100644 --- a/include/fxos/disasm-passes/pcrel.h +++ b/include/fxos/disasm-passes/pcrel.h @@ -13,7 +13,7 @@ namespace FxOS { -class PcrelPass: public DisassemblyPass +class PcrelPass: public InstructionDisassemblyPass { public: PcrelPass(Disassembly &disasm); diff --git a/include/fxos/disasm-passes/print.h b/include/fxos/disasm-passes/print.h index c300a6f..cc93912 100644 --- a/include/fxos/disasm-passes/print.h +++ b/include/fxos/disasm-passes/print.h @@ -14,18 +14,13 @@ namespace FxOS { -class PrintPass: public DisassemblyPass +class PrintPass: public InstructionDisassemblyPass { public: PrintPass(Disassembly &disasm, std::vector const &symtables); void analyze(uint32_t pc, ConcreteInstruction &inst) override; - /* This pass uses another entry method that starts at the instruction - with the smallest address loaded in the disassembly, then goes down. - The standard run() is not available. */ - void run(void); - //--- // Print pass parameters //--- diff --git a/include/fxos/disasm-passes/syscall.h b/include/fxos/disasm-passes/syscall.h index 351d6a6..e3dd28b 100644 --- a/include/fxos/disasm-passes/syscall.h +++ b/include/fxos/disasm-passes/syscall.h @@ -15,7 +15,7 @@ namespace FxOS { -class SyscallPass: public DisassemblyPass +class SyscallPass: public InstructionDisassemblyPass { public: SyscallPass(Disassembly &disasm, OS *os); diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 1ecf248..0cb681d 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -130,12 +130,11 @@ public: DisassemblyPass(Disassembly &disasm); /* Analyze a single instruction, probably updating the annotations and - the state of the pass itself. Should return true if the state of the - instruction changed. */ + the state of the pass itself. */ virtual void analyze(uint32_t pc, ConcreteInstruction &inst) = 0; /* Run the pass from the given entry point */ - void run(uint32_t entry_pc); + virtual void run(uint32_t entry_pc); protected: /* Add an instruction to the queue to analyze next */ @@ -159,6 +158,17 @@ private: std::set m_seen; }; +/* A disassembly pass that observes each instruction independently */ +class InstructionDisassemblyPass: public DisassemblyPass +{ +public: + InstructionDisassemblyPass(Disassembly &disasm); + + /* Runs the pass from the first instruction currently loaded, all the + way down to the bottom, as if always using enqueue_next(). */ + virtual void run(); +}; + } /* namespace FxOS */ #endif /* LIBFXOS_DISASSEMBLY_H */ diff --git a/include/fxos/errors.h b/include/fxos/errors.h index 8f976f0..fba263e 100644 --- a/include/fxos/errors.h +++ b/include/fxos/errors.h @@ -1,5 +1,17 @@ //--- // fxos.errors: Exception specification +// +// fxos uses the following exception classes when reporting errors. +// +// Fatal errors (caught by main): +// std::logic_error Internal consistency, fxos is broken +// FxOS::LimitError fxos doesn't know how to handle the input +// FxOS::LangError Program is invalid or dabatase too poor +// +// Recoverable errors: +// std::runtime_error External errors (fi. file access) +// FxOS::SyntaxError Invalid fxos data file syntax (file is skipped) +// FxOS::AddressError Invalid virtual address (not bound) //--- #ifndef LIBFXOS_ERRORS_H @@ -11,7 +23,8 @@ namespace FxOS { -/* Syntax errors for fxos data files */ +/* Syntax errors for fxos data files. This is always an external error, and + reported to the user as an error message. */ class SyntaxError: public std::exception { public: @@ -41,7 +54,8 @@ private: char const *m_what; }; -/* Language errors for the disassembler */ +/* Language errors for the disassembler. These are either bugs in fxos or + caused by corrupted code files. */ class LangError: public std::exception { public: @@ -61,10 +75,13 @@ private: }; /* Address errors */ -class AddrError: public std::exception +class AddressError: public std::exception { public: - AddrError(uint32_t address, int size, char const *what): + static constexpr char const *default_message = "unmapped address"; + + AddressError(uint32_t address, int size, + char const *what = default_message): m_addr(address), m_size(size), m_what(what) {} uint32_t addr() const noexcept { @@ -83,6 +100,20 @@ private: char const *m_what; }; +/* Limitations of fxos */ +class LimitError: public std::exception +{ +public: + LimitError(char const *what): m_what(what) {} + + char const *what() const noexcept override { + return m_what; + } + +private: + char const *m_what; +}; + } /* namespace FxOS */ #endif /* LIBFXOS_ERRORS_H */ diff --git a/include/fxos/target.h b/include/fxos/target.h index 1184510..d506e29 100644 --- a/include/fxos/target.h +++ b/include/fxos/target.h @@ -24,7 +24,7 @@ public: virtual bool covers(MemoryRegion const ®ion) const noexcept; /* Returns the data located at the provided virtual address. Throws - std::out_of_range if the interval is not entirely simulated */ + std::out_of_range if the interval is not entirely simulated */ virtual char const *translate(uint32_t addr, int size=1) const = 0; /* Read data from the memory. The following methods read data of @@ -55,7 +55,8 @@ public: Addressable read_str(uint32_t addr, size_t len) const; /* Search a binary pattern in the specified area. Returns the virtual - address of the first occurrence if any is found, [end] otherwise. */ + address of the first occurrence if any is found, [end] otherwise + (including if the range is empty or exceeds simulated memory). */ virtual uint32_t search(uint32_t start, uint32_t end, void const *pattern, int size) const = 0; }; diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index fec81b5..380cf53 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -173,4 +173,21 @@ void DisassemblyPass::run(uint32_t entry_pc) } } +//--- +// Base instruction-level pass +//--- + +InstructionDisassemblyPass::InstructionDisassemblyPass(Disassembly &disasm): + DisassemblyPass(disasm) +{ +} + +void InstructionDisassemblyPass::run() +{ + for(auto &pair: m_disasm.instructions()) + { + analyze(pair.first, pair.second); + } +} + } /* namespace FxOS */ diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index c7fa211..2cfbafb 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -39,7 +39,7 @@ void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) /* Check that it's not in a delay slot */ if(target.delayslot) - throw LangError(pc, "jump into a delay slot!"); + throw LimitError("jump into a delay slot!"); } /* If this instruction is in a delay slot, check its type. A valid @@ -63,7 +63,7 @@ void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) { ConcreteInstruction &slot = m_disasm.readins(pc+2); if(slot.leader) - throw LangError(pc+2, "leader in a delay slot!"); + throw LimitError("leader in a delay slot!"); if(!slot.inst.isvaliddelayslot()) throw LangError(pc+2, "invalid delay slot"); diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index a5bae6c..3dad01f 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -7,7 +7,7 @@ namespace FxOS { PcrelPass::PcrelPass(Disassembly &disasm): - DisassemblyPass(disasm) + InstructionDisassemblyPass(disasm) { } @@ -20,7 +20,7 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) Argument const &a = i.args[n]; ConcreteInstructionArg &ca = ci.args[n]; - if(a.kind == Argument::PcRel) + if(a.kind == Argument::PcRel && (i.opsize==2 || i.opsize==4)) { uint32_t addr = (pc & ~(a.opsize - 1)) + 4 + a.disp; ca.location = RelConstDomain().constant(addr); @@ -29,19 +29,10 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) 16-bit with mov.w. There is no mov.b for this instruction. */ Target &t = m_disasm.target(); - uint32_t v; + uint32_t v = -1; - switch(i.opsize) - { - case 2: - v = t.read_i16(addr); - break; - case 4: - v = t.read_i32(addr); - break; - default: - throw std::runtime_error("Wrong pcrel opsize"); - } + if(i.opsize == 2) v = t.read_i16(addr); + if(i.opsize == 4) v = t.read_i32(addr); ca.value = DataValue(IntegerType::u32); ca.value.write(0, 4, v); @@ -55,8 +46,6 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) ca.value.write(0, 4, addr); } } - - enqueue_unseen_successors(pc, ci); } } /* namespace FxOS */ diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index 8e28788..f484524 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -11,7 +11,7 @@ namespace FxOS { PrintPass::PrintPass(Disassembly &disasm, std::vector const &symtables): - DisassemblyPass(disasm), m_symtables(symtables) + InstructionDisassemblyPass(disasm), m_symtables(symtables) { /* Default parameters: all 0 */ @@ -21,14 +21,6 @@ PrintPass::PrintPass(Disassembly &disasm, m_os = std::make_unique(t); } -void PrintPass::run(void) -{ - for(auto &pair: m_disasm.instructions()) - { - analyze(pair.first, pair.second); - } -} - void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) { Instruction const &i = ci.inst; diff --git a/lib/passes/syscall.cpp b/lib/passes/syscall.cpp index 7d9da5c..d033c9c 100644 --- a/lib/passes/syscall.cpp +++ b/lib/passes/syscall.cpp @@ -7,11 +7,11 @@ namespace FxOS { SyscallPass::SyscallPass(Disassembly &disasm, OS *os): - DisassemblyPass(disasm), m_os(os) + InstructionDisassemblyPass(disasm), m_os(os) { } -void SyscallPass::analyze(uint32_t pc, ConcreteInstruction &ci) +void SyscallPass::analyze([[maybe_unused]] uint32_t pc,ConcreteInstruction &ci) { /* Nothing to do if no syscall table is provided! */ if(!m_os) return; @@ -45,8 +45,6 @@ void SyscallPass::analyze(uint32_t pc, ConcreteInstruction &ci) if(sid >= 0) ca.syscall_id = sid; } } - - enqueue_unseen_successors(pc, ci); } } /* namespace FxOS */ diff --git a/lib/semantics.cpp b/lib/semantics.cpp index bb3328e..5a2da14 100644 --- a/lib/semantics.cpp +++ b/lib/semantics.cpp @@ -30,7 +30,7 @@ BitfieldType::Field BitfieldType::named_field(std::string name) const if(f.first == name) return f; } - throw std::domain_error("No such field name in bit field"); + throw std::logic_error("no such field name in bit field"); } DataType::DataKind DataType::kind() const noexcept diff --git a/lib/target.cpp b/lib/target.cpp index ecd8af9..c7c49e7 100644 --- a/lib/target.cpp +++ b/lib/target.cpp @@ -68,10 +68,12 @@ Addressable AbstractMemory::read_str(uint32_t addr, size_t len) Binding::Binding(MemoryRegion source_region, Buffer const &buffer): region(source_region), data(buffer.data), size(region.size()) { - if(buffer.size < region.size()) - { - throw std::runtime_error("Buffer too small to create binding"); - } + /* Extend the buffers with zeros if it's too small. */ + if(buffer.size >= region.size()) return; + + Buffer larger_buffer(buffer, region.size()); + data = larger_buffer.data; + size = larger_buffer.size; } bool Binding::covers(uint32_t addr, int size) const noexcept @@ -86,21 +88,14 @@ bool Binding::covers(MemoryRegion const ®ion) const noexcept char const *Binding::translate(uint32_t addr, int size) const { - if(!covers(addr, size)) - { - throw std::out_of_range("Out of binding range"); - } - + if(!covers(addr, size)) return nullptr; return data.get() + (addr - region.start); } uint32_t Binding::search(uint32_t start, uint32_t end, void const *pattern, int size) const { - if(end < start || !covers(start, end - start)) - { - throw std::out_of_range("Out of binding range"); - } + if(end < start || !covers(start, end - start)) return end; if(start + size > end) return end; void const *data = translate(start); @@ -159,14 +154,11 @@ char const *Target::translate(uint32_t addr, int size) const { for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++) { - try - { - return it->translate(addr, size); - } - catch(std::out_of_range &e) {} + char const *ptr = it->translate(addr, size); + if(ptr) return ptr; } - throw AddrError(addr, size, "out of target bindings"); + throw AddressError(addr, size); } uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern, @@ -175,7 +167,7 @@ uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern, uint32_t occurrence; if(end < start || !covers(start, end - start)) { - throw AddrError(start, end-start, "out of target bindings"); + throw AddressError(start, end-start); } for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++) diff --git a/lib/util.cpp b/lib/util.cpp index ca311a6..2a6c241 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -19,8 +19,7 @@ Buffer::Buffer(): Buffer::Buffer(size_t bufsize, int fill) { size = bufsize; - char *buffer = static_cast(malloc(size)); - if(!buffer) throw std::bad_alloc(); + char *buffer = new char[size]; memset(buffer, fill, size); data = std::shared_ptr(buffer); @@ -47,13 +46,10 @@ Buffer::Buffer(std::string filepath, ssize_t bufsize, int fill) size = (bufsize < 0) ? statbuf.st_size : bufsize; size_t size_to_read = std::min(size, (size_t)statbuf.st_size); - char *buffer = static_cast(malloc(size)); - data = std::shared_ptr(buffer); - if(!data) throw std::bad_alloc(); - /* Read buffer and fill whatever is left */ - memset(buffer, fill, size); - x = read(fd, buffer, size_to_read); + data = std::shared_ptr(new char[size]); + memset(data.get(), fill, size); + x = read(fd, data.get(), size_to_read); close(fd); if(x != (ssize_t)size_to_read)