diff --git a/include/fxos/ai/RelConst.h b/include/fxos/ai/RelConst.h index aae1cb3..878ec50 100644 --- a/include/fxos/ai/RelConst.h +++ b/include/fxos/ai/RelConst.h @@ -84,8 +84,9 @@ struct RelConst neither Top nor Bottom. */ operator bool() const noexcept; - /* String representation */ - std::string str() const noexcept; + /* String representation. If detailed is set, allow clearer alternate + representations (eg. "100 (0x00000064)"). */ + std::string str(bool detailed = true) const noexcept; }; class RelConstDomain: public AbstractDomain diff --git a/include/fxos/analysis.h b/include/fxos/analysis.h index 01fbedc..5013872 100644 --- a/include/fxos/analysis.h +++ b/include/fxos/analysis.h @@ -49,8 +49,6 @@ struct ProgramState /* Lattice order. */ bool le(ProgramState const &other) const; - std::string str(int indent = 0) const; - private: /* Values for registers r0..r15 */ RelConst m_regs[16]; @@ -59,7 +57,7 @@ private: /* Change in program state over a single (contextually known) instruction. */ struct ProgramStateDiff { - enum class Target : int { None = -1, Unknown = -2 }; + enum class Target : int { None = -1, Unknown = -2, CallStandard = -3 }; /* Number of the register that changes, or Target::*. */ int target() const @@ -90,6 +88,12 @@ struct ProgramStateDiff { m_target = static_cast(Target::None); } + /* Set the diff to modifyin register states as allowed by the standard + function calling convention. */ + void setCallStandard() + { + m_target = static_cast(Target::CallStandard); + } /* Set the diff to unknown effect on registers. */ void setUnknown() { diff --git a/include/fxos/function.h b/include/fxos/function.h index e00512c..697bcae 100644 --- a/include/fxos/function.h +++ b/include/fxos/function.h @@ -388,7 +388,7 @@ struct Instruction Instruction(Function &function, u32 address, u32 opcode); - // TODO: Rename AsmInstruction -> Opcode + // TODO: Don't use the word "opcode", maybe "base" // TODO: Get opcode from Instruction AsmInstruction const &opcode() const { @@ -400,7 +400,7 @@ struct Instruction return insmap[m_opcode].has_value(); } /* Instruction's size in bytes. */ - uint size() const + uint encodingSize() const { return (m_opcode >> 16) ? 4 : 2; } diff --git a/include/fxos/lang.h b/include/fxos/lang.h index c8341c0..37f0f3f 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -88,6 +88,9 @@ public: return m_name != r.m_name; } + /* Get the register number for r0 ... r15, -1 for other registers. */ + int getR() const; + private: CpuRegisterName m_name; }; @@ -108,36 +111,34 @@ struct AsmOperand Imm, /* #imm */ }; - /* Constructors, mostly internal; default is r0, rest is not relevant */ - AsmOperand(Kind kind = Reg, CpuRegister base = CpuRegister::R0); - AsmOperand(int disp, i8 opsize, CpuRegister base); - AsmOperand(CpuRegister index, CpuRegister base); - AsmOperand(Kind kind, int disp_imm, i8 opsize = 0); + /* Default constructor gives register r0. */ + AsmOperand(); - /* Actually useful factory functions */ + /* Factory functions */ static AsmOperand mkReg(CpuRegister base) { - return AsmOperand(Reg, base); + return AsmOperand(Reg, base, 0); } - static AsmOperand mkDeref(CpuRegister base) + static AsmOperand mkDeref(CpuRegister base, i8 opsize) { - return AsmOperand(Deref, base); + return AsmOperand(Deref, base, opsize); } - static AsmOperand mkPostInc(CpuRegister base) + static AsmOperand mkPostInc(CpuRegister base, i8 opsize) { - return AsmOperand(PostInc, base); + return AsmOperand(PostInc, base, opsize); } - static AsmOperand mkPreDec(CpuRegister base) + static AsmOperand mkPreDec(CpuRegister base, i8 opsize) { - return AsmOperand(PreDec, base); + return AsmOperand(PreDec, base, opsize); } static AsmOperand mkStructDeref(int disp, int opsize, CpuRegister base) { return AsmOperand(disp, opsize, base); } - static AsmOperand mkArrayDeref(CpuRegister index, CpuRegister base) + static AsmOperand mkArrayDeref( + CpuRegister index, CpuRegister base, i8 opsize) { - return AsmOperand(index, base); + return AsmOperand(index, base, opsize); } static AsmOperand mkPcRel(int disp, int opsize) { @@ -163,7 +164,6 @@ struct AsmOperand { return m_kind == Reg; } - /* Is this operand in memory? */ bool isMemory() const { @@ -181,6 +181,26 @@ struct AsmOperand case Imm: return false; } + return false; + } + /* Is this operand a constant? */ + bool isConstant() const + { + switch(m_kind) { + case PcRel: + case PcJump: + case PcAddr: + case Imm: + return true; + case Reg: + case Deref: + case PostInc: + case PreDec: + case StructDeref: + case ArrayDeref: + return false; + } + return false; } // TODO: RelConst modeling the memory address being being used (first @@ -229,10 +249,15 @@ struct AsmOperand } /* Get the PC-relative target, assuming the instruction is at the provided - address, for arguments with PC-relative offsets. */ - u32 getPCRelativeTarget(u32 pc, int size) const; + address, for arguments with PC-relative adressing. */ + u32 getPCRelativeTarget(u32 pc) const; private: + AsmOperand(Kind kind, CpuRegister base, i8 opsize); + AsmOperand(int disp, i8 opsize, CpuRegister base); + AsmOperand(CpuRegister index, CpuRegister base, i8 opsize); + AsmOperand(Kind kind, int disp_imm, i8 opsize = 0); + Kind m_kind; /* Base register. Valid for all modes except Imm */ CpuRegister m_base; @@ -269,18 +294,10 @@ struct AsmInstruction AsmInstruction(u32 encoding, char const *mnemonic, int tags, int opCount, AsmOperand op1 = {}, AsmOperand op2 = {}); - /* Original encoding */ - u32 encoding() const + /* Operation code, one of AsmInstruction::SH_* (eg. SH_mov) */ + uint operation() const { - return m_encoding; - } - - /* Mnemonic **without the size indicator** */ - char const *mnemonic() const; - /* Size indication (purely syntactic), 0 if not present */ - int opsize() const - { - return m_opsize; + return m_operation; } /* Access to operands */ @@ -290,7 +307,7 @@ struct AsmInstruction } AsmOperand const &operand(int i) const { - assert(i >= 0 && i < m_opCount && "operand out-of-bounds"); + assert((uint)i < m_opCount && "operand out-of-bounds"); return m_ops[i]; } auto operands() const // -> [AsmOperand const &] @@ -298,6 +315,25 @@ struct AsmInstruction return std::views::take(m_ops, m_opCount); } + /* Size indication (purely syntactic), 0 if not present */ + int opsize() const + { + return m_opsize; + } + + /* Original encoding */ + u32 encoding() const + { + return m_encoding; + } + + /* Mnemonic (with the size indicator, eg. "mov.l") */ + std::string mnemonic() const; + /* Operation name (without the size indicator, eg. "mov") */ + char const *operationString() const; + /* Size indicator to go after the op name (".b", ".w", ".l" or empty) */ + char const *operationSizeString() const; + //=== Instruction classes ===// /* Whether the instruction terminates the function it's in. */ @@ -357,7 +393,7 @@ private: /* Original encoding */ u32 m_encoding; /* Assembler instruction name (mov, add, etc), without size modifier. */ - u8 m_opcode; + u8 m_operation; /* Operation size (0, 1, 2 or 4) */ i8 m_opsize; /* Number of operands */ diff --git a/include/fxos/os.h b/include/fxos/os.h index d8cb603..c495123 100644 --- a/include/fxos/os.h +++ b/include/fxos/os.h @@ -41,7 +41,7 @@ public: /* Create an OS interface for this virtual space. If there is no data loaded in ROM or the OS can't be identified, the type os OS is set to UNKNOWN and no information is provided. */ - OS(VirtualSpace &space); + OS(VirtualSpace const &space); /* Type of OS, determined at construction */ enum Type { UNKNOWN, FX, CG }; @@ -86,7 +86,7 @@ public: private: /* Virtual space being analyzed */ - VirtualSpace &m_space; + VirtualSpace const &m_space; /* Parse the OS header. This should be the first analysis function to be called, because it determines the type of model (ie. fx9860g vs diff --git a/include/fxos/view/assembly.h b/include/fxos/view/assembly.h index 690e382..17bf6e8 100644 --- a/include/fxos/view/assembly.h +++ b/include/fxos/view/assembly.h @@ -88,6 +88,9 @@ struct ViewAssemblyOptions /* Whether to print function analysis results from the binary */ bool printFunctionAnalysis = false; + /* Whether to show details of instruction addresses and encodings */ + bool showInstructionDetails = false; + /* TODO: More view assembly options, including CFG layout */ /* TODO: View assembly options: syntax highlighting */ }; diff --git a/include/fxos/view/util.h b/include/fxos/view/util.h new file mode 100644 index 0000000..5763fbf --- /dev/null +++ b/include/fxos/view/util.h @@ -0,0 +1,98 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/view/util + +#include +#include +#include +#include +#include + +#ifndef FXOS_VIEW_UTIL_H +#define FXOS_VIEW_UTIL_H + +namespace FxOS { + +static inline auto fmt_rgb(std::string s, fmt::detail::color_type fg) +{ + return fmt::format("{}", fmt::styled(s, fmt::fg(fg))); +} +static inline auto fmt_color(std::string s, fmt::terminal_color fg) +{ + return fmt::format("{}", fmt::styled(s, fmt::fg(fg))); +} + +static size_t codePointLength(std::string s) +{ + size_t len = 0; + for(auto c: s) + len += (c & 0xc0) != 0x80; + return len; +} + +/* viewStrings: Display a set of strings with line wrapping */ + +template +concept range_of + = std::ranges::range && std::same_as, T>; + +struct ViewStringsOptions +{ + /* Maximum number of columns to print elements in (excluding start) */ + int maxColumns = 80; + /* Extra text at the beginning of each line (in addition to maxColumns) */ + std::string lineStart = ""; + /* Item separator */ + std::string separator = ", "; + /* Style for text after lineStart */ + fmt::text_style style {}; +}; + +template + requires(range_of) +void viewStrings(R range, ViewStringsOptions const &opts) +{ + bool newline = true; + int lineSize = 0; + + for(std::string const &str: range) { + int strSize = codePointLength(str); + int lengthNeeded = strSize + (newline ? 0 : opts.separator.size()); + + /* Allow overflow if that's required for progress */ + if(lineSize != 0 && lineSize + lengthNeeded > opts.maxColumns) { + fmt::print("\n"); + newline = true; + lineSize = 0; + } + + if(newline) { + std::cout << opts.lineStart; + fmt::print(opts.style, "{}", str); + } + else { + fmt::print(opts.style, "{}{}", opts.separator, str); + } + + lineSize += strSize + (newline ? 0 : opts.separator.size()); + newline = false; + } + + if(!newline) + fmt::print("\n"); +} + +template +void viewStrings(R range, F fun, ViewStringsOptions const &opts) +{ + return viewStrings( + std::views::all(range) | std::views::transform(fun), opts); +} + +} /* namespace FxOS */ + +#endif /* FXOS_VIEW_UTIL_H */ diff --git a/include/fxos/vspace.h b/include/fxos/vspace.h index 91e7207..4c9bd7c 100644 --- a/include/fxos/vspace.h +++ b/include/fxos/vspace.h @@ -42,24 +42,25 @@ class AbstractMemory { public: /* Checks if an address or interval is simulated (in its entirety) */ - bool covers(uint32_t addr, int size = 1); + bool covers(uint32_t addr, int size = 1) const; /* Check if a full region is simulated */ - bool covers(MemoryRegion const ®ion); + bool covers(MemoryRegion const ®ion) const; /* Returns the data located at the provided virtual address, nullptr if it is not entirely covered. */ - char const *translate(uint32_t addr, int size = 1); + char const *translate(uint32_t addr, int size = 1) const; /* Returns the data located at the provided virtual address, and indicates how much is available in *size. The pointer is null if [addr] itself is not covered, in which case *size is also set to 0. */ - virtual char const *translate_dynamic(uint32_t addr, int *size) = 0; + virtual char const *translate_dynamic(uint32_t addr, int *size) const = 0; /* Search a binary pattern in the specified area. Returns the virtual address of the first occurrence if any is found, [end] otherwise (including if the range is empty or exceeds simulated memory). */ - uint32_t search(uint32_t start, uint32_t end, void const *pat, int size); + uint32_t search( + uint32_t start, uint32_t end, void const *pat, int size) const; /* Read a simple object from memory. The following methods all assume that the specified address is simulated, and return a default value if it's @@ -71,15 +72,15 @@ public: data type, see . */ /* Various sizes of integers with sign-extension or zero-extension. */ - Addressable read_i8(uint32_t addr); - Addressable read_u8(uint32_t addr); - Addressable read_i16(uint32_t addr); - Addressable read_u16(uint32_t addr); - Addressable read_i32(uint32_t addr); - Addressable read_u32(uint32_t addr); + Addressable read_i8(uint32_t addr) const; + Addressable read_u8(uint32_t addr) const; + Addressable read_i16(uint32_t addr) const; + Addressable read_u16(uint32_t addr) const; + Addressable read_i32(uint32_t addr) const; + Addressable read_u32(uint32_t addr) const; /* Read a non-NUL-terminated string */ - Addressable read_str(uint32_t addr, size_t len); + Addressable read_str(uint32_t addr, size_t len) const; }; /* A binding of a data buffer into a memory region of the target. */ @@ -99,7 +100,7 @@ struct Binding: public AbstractMemory Buffer buffer; // - AbstractMemory interface - char const *translate_dynamic(uint32_t addr, int *size) override; + char const *translate_dynamic(uint32_t addr, int *size) const override; }; /* A composite space where regions can be bound dynamically */ @@ -123,7 +124,7 @@ public: void bind_region(MemoryRegion const ®ion, Buffer const &buffer); // - AbstractMemory interface - char const *translate_dynamic(uint32_t addr, int *size) override; + char const *translate_dynamic(uint32_t addr, int *size) const override; // TODO: Remove these Disassembly disasm; diff --git a/lib/ai/RelConst.cpp b/lib/ai/RelConst.cpp index 03c7aa1..76e2aca 100644 --- a/lib/ai/RelConst.cpp +++ b/lib/ai/RelConst.cpp @@ -313,7 +313,7 @@ RelConst::operator bool() const noexcept return !spe; } -std::string RelConst::str() const noexcept +std::string RelConst::str(bool detailed) const noexcept { using RegName = CpuRegister::CpuRegisterName; @@ -347,7 +347,7 @@ std::string RelConst::str() const noexcept v = -ival; } - return str + format("%d (0x%08x)", v, uval); + return str + format(detailed ? "%d (0x%08x)" : "%d", v, uval); } else { return str + format("0x%08x", uval); diff --git a/lib/analysis.cpp b/lib/analysis.cpp index bcaedb5..044034b 100644 --- a/lib/analysis.cpp +++ b/lib/analysis.cpp @@ -6,6 +6,7 @@ //---------------------------------------------------------------------------// #include +#include #include #include @@ -36,6 +37,10 @@ void ProgramState::applyDiff(ProgramStateDiff const &diff) for(int i = 0; i < 16; i++) m_regs[i] = RCD.top(); } + else if(t == static_cast(ProgramStateDiff::Target::CallStandard)) { + for(int i = 0; i < 7; i++) + m_regs[i] = RCD.top(); + } else { assert((unsigned)t < 16 && "invalid register target"); m_regs[t] = diff.value(); @@ -62,34 +67,16 @@ bool ProgramState::le(ProgramState const &other) const return true; } -std::string ProgramState::str(int indentLength) const -{ - std::string indent(indentLength, ' '); - std::string str; - - /* Registers */ - for(int i = 0; i < 16; i++) { - if(i % 4 == 0) { - str += (i > 0 ? "\n" : ""); - str += indent; - } - else - str += " "; - - str += fmt::format("r{}:{}", i, m_regs[i].str()); - } - - return str + "\n"; -} - std::string ProgramStateDiff::str() const { if(m_target == static_cast(Target::None)) return "()"; if(m_target == static_cast(Target::Unknown)) return "⊤"; + if(m_target == static_cast(Target::CallStandard)) + return "call(std)"; - return fmt::format("r{} ← {}", m_target, m_value.str()); + return fmt::format("r{} ← {}", m_target, m_value.str(false)); } /* Information stored for each block during the fixpoint iteration */ @@ -101,6 +88,44 @@ struct BlockStates ProgramState nextEntry; }; +static u32 computeConstantOperand(Instruction const &ins, AsmOperand const &op) +{ + Binary const &binary = ins.parentBinary(); + u32 target; + + assert(op.isConstant() && "analysis of constant operands is out of sync"); + + switch(op.kind()) { + case AsmOperand::PcRel: + target = op.getPCRelativeTarget(ins.address()); + + if(!binary.vspace().covers(target, op.opsize())) { + FxOS_log(ERR, "constant operand reads out of vspace bounds"); + return -1; + } + if(op.opsize() == 1) + return binary.vspace().read_i8(target); + if(op.opsize() == 2) + return binary.vspace().read_i16(target); + if(op.opsize() == 4) + return binary.vspace().read_i32(target); + + FxOS_log(ERR, "PcRel operand with no opsize"); + return -1; + + case AsmOperand::PcJump: + case AsmOperand::PcAddr: + return op.getPCRelativeTarget(ins.address()); + + case AsmOperand::Imm: + return op.imm(); + + default: + assert(false && "not a constant operand"); + __builtin_unreachable(); + } +} + static ProgramStateDiff interpretInstruction( Instruction const &ins, ProgramState const &PS) { @@ -108,12 +133,152 @@ static ProgramStateDiff interpretInstruction( ProgramStateDiff diff; diff.setUnknown(); - // TODO: Do this properly - u16 opc = ins.opcode().encoding(); - if((opc & 0xf000) == 0xe000) { - int reg = (opc >> 8) & 0xf; - int val = (int8_t)opc; - diff.setRegisterUpdate(reg, RCD.constant(val)); + AsmInstruction asmins = ins.opcode(); + + switch(asmins.operation()) { + + /* Moves */ + case AsmInstruction::SH_mov: + case AsmInstruction::SH_ldc: + case AsmInstruction::SH_lds: + case AsmInstruction::SH_stc: + case AsmInstruction::SH_sts: + case AsmInstruction::SH_mova: { + AsmOperand src = asmins.operand(0); + AsmOperand dst = asmins.operand(1); + + if(!dst.isReg()) + diff.setNoop(); + else if(src.isConstant()) { + RelConst c = RCD.constant(computeConstantOperand(ins, src)); + diff.setRegisterUpdate(dst.base().getR(), c); + } + else + diff.setRegisterTouched(dst.base().getR()); + break; + } + + /* Opaque instructions with one operand and one output */ + case AsmInstruction::SH_dt: + case AsmInstruction::SH_movt: + case AsmInstruction::SH_rotl: + case AsmInstruction::SH_rotr: + case AsmInstruction::SH_rotcl: + case AsmInstruction::SH_rotcr: + case AsmInstruction::SH_shal: + case AsmInstruction::SH_shar: + case AsmInstruction::SH_shll: + case AsmInstruction::SH_shlr: + case AsmInstruction::SH_shll2: + case AsmInstruction::SH_shlr2: + case AsmInstruction::SH_shll8: + case AsmInstruction::SH_shlr8: + case AsmInstruction::SH_shll16: + case AsmInstruction::SH_shlr16: { + AsmOperand op = asmins.operand(0); + assert(op.isReg()); + + if(op.base().getR() >= 0) + diff.setRegisterTouched(op.base().getR()); + else + diff.setNoop(); + break; + } + + /* Opaque instructions with two operands and one output */ + case AsmInstruction::SH_add: + case AsmInstruction::SH_addc: + case AsmInstruction::SH_addv: + case AsmInstruction::SH_and: + case AsmInstruction::SH_div1: + case AsmInstruction::SH_exts: + case AsmInstruction::SH_extu: + case AsmInstruction::SH_neg: + case AsmInstruction::SH_negc: + case AsmInstruction::SH_not: + case AsmInstruction::SH_or: + case AsmInstruction::SH_shad: + case AsmInstruction::SH_shld: + case AsmInstruction::SH_sub: + case AsmInstruction::SH_subc: + case AsmInstruction::SH_subv: + case AsmInstruction::SH_swap: + case AsmInstruction::SH_xor: + case AsmInstruction::SH_xtrct: { + AsmOperand op = asmins.operand(1); + + if(op.isReg() && op.base().getR() >= 0) + diff.setRegisterTouched(op.base().getR()); + else + diff.setNoop(); + break; + } + + /* No-op instructions that affect state not modeled by the analysis */ + case AsmInstruction::SH_clrs: + case AsmInstruction::SH_clrt: + case AsmInstruction::SH_clrmac: + case AsmInstruction::SH_div0u: + case AsmInstruction::SH_ldtlb: + case AsmInstruction::SH_nop: + case AsmInstruction::SH_rte: + case AsmInstruction::SH_rts: + case AsmInstruction::SH_sets: + case AsmInstruction::SH_sett: + case AsmInstruction::SH_sleep: + case AsmInstruction::SH_cmp_pl: + case AsmInstruction::SH_cmp_pz: + case AsmInstruction::SH_cmp_eq: + case AsmInstruction::SH_cmp_hs: + case AsmInstruction::SH_cmp_ge: + case AsmInstruction::SH_cmp_hi: + case AsmInstruction::SH_cmp_gt: + case AsmInstruction::SH_cmp_str: + case AsmInstruction::SH_div0s: + case AsmInstruction::SH_dmuls: + case AsmInstruction::SH_dmulu: + case AsmInstruction::SH_mul: + case AsmInstruction::SH_muls: + case AsmInstruction::SH_mulu: + case AsmInstruction::SH_tst: + case AsmInstruction::SH_jmp: + case AsmInstruction::SH_pref: + case AsmInstruction::SH_tas: + case AsmInstruction::SH_mac: + case AsmInstruction::SH_braf: + case AsmInstruction::SH_bf: + case AsmInstruction::SH_bf_s: + case AsmInstruction::SH_bt: + case AsmInstruction::SH_bt_s: + case AsmInstruction::SH_bra: + case AsmInstruction::SH_trapa: + case AsmInstruction::SH_icbi: + case AsmInstruction::SH_ocbi: + case AsmInstruction::SH_ocbp: + case AsmInstruction::SH_ocbwb: + case AsmInstruction::SH_prefi: + case AsmInstruction::SH_synco: + diff.setNoop(); + break; + + case AsmInstruction::SH_bsr: + case AsmInstruction::SH_bsrf: + case AsmInstruction::SH_jsr: + diff.setCallStandard(); + break; + + case AsmInstruction::SH_movco: + case AsmInstruction::SH_movli: + case AsmInstruction::SH_movua: + case AsmInstruction::SH_movca: + diff.setUnknown(); + break; + } + + for(auto op: ins.opcode().operands()) { + /* TODO: Properly handle pre-decr/post-dec */ + if(op.kind() == AsmOperand::PreDec || op.kind() == AsmOperand::PostInc) + diff.setUnknown(); } return diff; @@ -156,7 +321,7 @@ std::unique_ptr analyzeFunction(Function const &f) /* Compute the next entry state for each block */ for(uint i = 0; i < f.blockCount(); i++) { BasicBlock const &bb = f.basicBlockByIndex(i); - VBS[i].nextEntry.setBottom(); + VBS[i].nextEntry = VBS[i].entry; for(int succIndex: bb.successorsByIndex()) VBS[i].nextEntry.joinWith(VBS[succIndex].exit); diff --git a/lib/binary.cpp b/lib/binary.cpp index 9ae63b2..9afde8a 100644 --- a/lib/binary.cpp +++ b/lib/binary.cpp @@ -47,11 +47,7 @@ void Binary::deserialize(BSON const &b) OS *Binary::OSAnalysis(bool force) const { if(!m_os || force) { - /* We break constness a little bit here. We allow access to the OS - analysis for const Binary, even though it uses the VirtualSpace and - technically AbstractMemory allows implementations to modify the - memory in response to reads. */ - m_os = std::make_unique(const_cast(m_vspace)); + m_os = std::make_unique(m_vspace); /* We don't keep an OS analysis result that failed */ if(m_os->type == OS::UNKNOWN) m_os = nullptr; diff --git a/lib/function.cpp b/lib/function.cpp index 7b90de3..082da28 100644 --- a/lib/function.cpp +++ b/lib/function.cpp @@ -88,7 +88,8 @@ void Function::updateFunctionSize() if(bb.instructionCount() == 0) continue; Instruction &insn = bb.instructionAtIndex(bb.instructionCount() - 1); - max_address = std::max(max_address, insn.address() + insn.size()); + max_address + = std::max(max_address, insn.address() + insn.encodingSize()); } this->setSize(max_address - this->address()); @@ -368,7 +369,7 @@ void BasicBlock::finalizeBlock() u32 pc = this->address(); for(Instruction &insn: *this) { assert(insn.address() == pc && "non-sequential instructions in bb"); - pc += insn.size(); + pc += insn.encodingSize(); } /* The block must have no more than one terminator. */ diff --git a/lib/lang.cpp b/lib/lang.cpp index 0dee85a..e2385fc 100644 --- a/lib/lang.cpp +++ b/lib/lang.cpp @@ -28,7 +28,6 @@ char const *regnames[] = { }; // clang-format on -/* Construction from string */ CpuRegister::CpuRegister(std::string name) { int regcount = (sizeof regnames / sizeof regnames[0]); @@ -44,7 +43,6 @@ CpuRegister::CpuRegister(std::string name) m_name = CpuRegister::UNDEFINED; } -/* Conversion to string */ std::string CpuRegister::str() const noexcept { int regcount = (sizeof regnames / sizeof regnames[0]); @@ -55,12 +53,26 @@ std::string CpuRegister::str() const noexcept return regnames[i]; } +int CpuRegister::getR() const +{ + if(m_name >= R0 && m_name <= R7) + return m_name - R0; + if(m_name >= R8 && m_name <= R15) + return m_name - R8 + 8; + + return -1; +} + //--- // Instruction operands //--- -AsmOperand::AsmOperand(Kind kind, CpuRegister base): - m_kind {kind}, m_base {base} +AsmOperand::AsmOperand(): m_kind {Reg}, m_base {CpuRegister::R0}, m_opsize {0} +{ +} + +AsmOperand::AsmOperand(Kind kind, CpuRegister base, i8 opsize): + m_kind {kind}, m_base {base}, m_opsize {opsize} { } @@ -69,8 +81,8 @@ AsmOperand::AsmOperand(int disp, i8 opsize, CpuRegister base): { } -AsmOperand::AsmOperand(CpuRegister index, CpuRegister base): - m_kind {ArrayDeref}, m_base {base}, m_index {index} +AsmOperand::AsmOperand(CpuRegister index, CpuRegister base, i8 opsize): + m_kind {ArrayDeref}, m_base {base}, m_index {index}, m_opsize {opsize} { } @@ -107,9 +119,9 @@ std::string AsmOperand::str() const } } -u32 AsmOperand::getPCRelativeTarget(u32 pc, int size) const +u32 AsmOperand::getPCRelativeTarget(u32 pc) const { - size = size + (size == 0); + int size = m_opsize + (m_opsize == 0); if(m_kind == AsmOperand::PcRel) return (pc & -size) + 4 + m_disp_imm; @@ -155,24 +167,41 @@ AsmInstruction::AsmInstruction(u32 encoding, char const *mnemonic, int tags, int i; for(i = 0; i < SH_MAX; i++) { if(mn == instructionMnemonics[i]) { - m_opcode = i; + m_operation = i; break; } } - assert(i < SH_MAX && "AsmInstruction with unknown opcode string"); + assert(i < SH_MAX && "AsmInstruction with unknown operation string"); } -char const *AsmInstruction::mnemonic() const +std::string AsmInstruction::mnemonic() const { - assert(m_opcode < SH_MAX); - return instructionMnemonics[m_opcode]; + return std::string {operationString()} + operationSizeString(); +} + +char const *AsmInstruction::operationString() const +{ + assert(m_operation < SH_MAX); + return instructionMnemonics[m_operation]; +} + +char const *AsmInstruction::operationSizeString() const +{ + if(m_opsize == 1) + return ".b"; + if(m_opsize == 2) + return ".w"; + if(m_opsize == 4) + return ".l"; + return ""; } u32 AsmInstruction::getPCRelativeTarget(u32 pc) const { + /* There can only be at most one PC-relative operand in an instruction */ for(AsmOperand const &op: operands()) { if(op.usesPCRelativeAddressing()) - return op.getPCRelativeTarget(pc, m_opsize); + return op.getPCRelativeTarget(pc); } return -1; } diff --git a/lib/load-asm.l b/lib/load-asm.l index 2005c81..383bc94 100644 --- a/lib/load-asm.l +++ b/lib/load-asm.l @@ -221,17 +221,17 @@ static AsmOperand make_arg(int token, int opsize, int m, int n, int d, int i) case JUMP12: return AsmOperand::mkPcJump(d12 * 2); case DPC: return AsmOperand::mkPcAddr(d * 4); case IMM: return AsmOperand::mkImm(i8); - case AT_RN: return AsmOperand::mkDeref(Rn); - case AT_RM: return AsmOperand::mkDeref(Rm); - case AT_RMP: return AsmOperand::mkPostInc(Rm); - case AT_RNP: return AsmOperand::mkPostInc(Rn); - case AT_MRN: return AsmOperand::mkPreDec(Rn); + case AT_RN: return AsmOperand::mkDeref(Rn, opsize); + case AT_RM: return AsmOperand::mkDeref(Rm, opsize); + case AT_RMP: return AsmOperand::mkPostInc(Rm, opsize); + case AT_RNP: return AsmOperand::mkPostInc(Rn, opsize); + case AT_MRN: return AsmOperand::mkPreDec(Rn, opsize); case AT_DRN: return AsmOperand::mkStructDeref(d*opsize, opsize, Rn); case AT_DRM: return AsmOperand::mkStructDeref(d*opsize, opsize, Rm); case AT_DGBR: return AsmOperand::mkStructDeref(d*opsize, opsize, Reg::GBR); - case AT_R0RN: return AsmOperand::mkArrayDeref(Reg::R0, Rn); - case AT_R0RM: return AsmOperand::mkArrayDeref(Reg::R0, Rm); - case AT_R0GBR: return AsmOperand::mkArrayDeref(Reg::R0, Reg::GBR); + case AT_R0RN: return AsmOperand::mkArrayDeref(Reg::R0, Rn, opsize); + case AT_R0RM: return AsmOperand::mkArrayDeref(Reg::R0, Rm, opsize); + case AT_R0GBR: return AsmOperand::mkArrayDeref(Reg::R0, Reg::GBR, opsize); case AT_DPC: if(!opsize) diff --git a/lib/os.cpp b/lib/os.cpp index 280bc0f..3a5e649 100644 --- a/lib/os.cpp +++ b/lib/os.cpp @@ -23,7 +23,7 @@ namespace FxOS { static std::map> syscallDefsCache; static std::unique_ptr buildSyscallDefs(char const *str, int len); -OS::OS(VirtualSpace &space): type {UNKNOWN}, m_space {space} +OS::OS(VirtualSpace const &space): type {UNKNOWN}, m_space {space} { if(!space.covers(0x80000000, (256 << 10))) { FxOS_log(ERR, @@ -49,7 +49,7 @@ OS::OS(VirtualSpace &space): type {UNKNOWN}, m_space {space} void OS::parse_header() { - VirtualSpace &s = m_space; + VirtualSpace const &s = m_space; if(this->type == FX) { /* Bootcode timestamp at the very end of the bootcode */ @@ -202,7 +202,7 @@ SyscallDefs const *OS::syscall_defs() const noexcept void OS::parse_footer() { - VirtualSpace &s = m_space; + VirtualSpace const &s = m_space; /* Find the footer address (occurrence of "CASIOABSLangdata") */ uint32_t start = MemoryRegion::ROM.start; @@ -238,7 +238,7 @@ void OS::parse_footer() //--- static uint32_t accumulate_range( - VirtualSpace &m_space, uint32_t start, uint32_t end) + VirtualSpace const &m_space, uint32_t start, uint32_t end) { uint32_t sum = 0; diff --git a/lib/view/assembly.cpp b/lib/view/assembly.cpp index 021d97b..2763321 100644 --- a/lib/view/assembly.cpp +++ b/lib/view/assembly.cpp @@ -6,6 +6,7 @@ //---------------------------------------------------------------------------// #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include #include -#include namespace FxOS { @@ -66,7 +66,7 @@ static void renderOperand(AsmOperand const &op, u32 pc, int opsize, if(!op.usesPCRelativeAddressing()) return; - u32 location = op.getPCRelativeTarget(pc, opsize); + u32 location = op.getPCRelativeTarget(pc); if(output(out, p, {}, format("<%08x>", location))) return; type = (type == PCRelative) ? Location : Constant; @@ -141,23 +141,24 @@ static void doOldInst(u32 pc, OldInstruction &i, printf(">\n"); } - /* Raw data if instruction cannot be decoded */ - printf(" %08x: %04x", pc, (i.inst ? i.inst->encoding() : i.opcode)); + /* Only show the raw data if instruction cannot be decoded */ + if(opts.showInstructionDetails) + printf(" %08x: %04x", pc, (i.inst ? i.inst->encoding() : i.opcode)); + if(!i.inst) { + if(!opts.showInstructionDetails) + printf(" %04x", i.inst->encoding()); printf("\n"); m_lastAddress = pc; return; } /* Mnemonic */ - static char const *suffixes[5] = {"", ".b", ".w", "", ".l"}; - char const *suffix - = suffixes[(i.inst->opsize() <= 4) ? i.inst->opsize() : 0]; - - int spacing = i.inst->operandCount() - ? 8 - strlen(i.inst->mnemonic()) - strlen(suffix) - : 0; - printf(" %s%s%*s", i.inst->mnemonic(), suffix, spacing, ""); + std::string mnemonic = i.inst->mnemonic(); + std::string str = " "; + str += mnemonic; + int spacing = i.inst->operandCount() ? 8 - mnemonic.size() : 0; + printf(" %s%*s", mnemonic.c_str(), spacing, ""); /* Arguments */ for(int n = 0; n < i.inst->operandCount(); n++) { @@ -242,19 +243,16 @@ void viewAssemblyInstruction(Instruction const &ins, ViewAssemblyOptions *opts) OperandOutput opout; u32 pc = ins.address(); - printf(" %08x: %04x", pc, opcode.encoding()); + if(opts->showInstructionDetails) + printf(" %08x: %04x ", pc, opcode.encoding()); + else + printf(" "); /* Mnemonic */ - static char const *suffixes[5] = {"", ".b", ".w", "", ".l"}; - char const *suffix = suffixes[(opcode.opsize() <= 4) ? opcode.opsize() : 0]; - - int spacing = opcode.operandCount() - ? 8 - strlen(opcode.mnemonic()) - strlen(suffix) - : 0; - std::string str = " "; - str += opcode.mnemonic(); - str += suffix; - str += std::string(spacing, ' '); + std::string mnemonic = opcode.mnemonic(); + std::string str = ""; + str += mnemonic; + str += std::string(opcode.operandCount() ? 8 - mnemonic.size() : 0, ' '); /* Arguments */ for(int n = 0; n < opcode.operandCount(); n++) { @@ -272,21 +270,40 @@ void viewAssemblyInstruction(Instruction const &ins, ViewAssemblyOptions *opts) opout.clear(); } + std::vector> comments; + if(opts->printFunctionAnalysis) { auto *an = ins.parentFunction().getAnalysis(); if(an) { auto &block = an->blocks[ins.parentBlock().blockIndex()]; ProgramStateDiff const &diff = block.diffs[ins.indexInBlock()]; - - if(str.size() < 32) - str += std::string(32 - str.size(), ' '); - str += "# "; - str += diff.str(); + if(diff.target() + != static_cast(ProgramStateDiff::Target::None)) + comments.emplace_back( + diff.str(), fmt::fg(fmt::terminal_color::cyan)); } } - fmt::print("{}\n", str); + std::cout << str; + + if(!comments.empty()) { + if(str.size() < 28) + std::cout << std::string(28 - str.size(), ' '); + + bool first = true; + for(auto &[c, style]: comments) { + if(first) + fmt::print(fmt::fg(fmt::color::gray), "# "); + else + fmt::print(", "); + + fmt::print(style, "{}", c); + first = false; + } + } + + std::cout << '\n'; } static std::string objectsAt( @@ -320,6 +337,23 @@ static std::string objectsAt( }); } +static void viewProgramState(ProgramState const &PS, std::string lineStart) +{ + ViewStringsOptions opts = { + .maxColumns = 70, + .lineStart = lineStart, + .separator = ", ", + .style = fmt::fg(fmt::terminal_color::cyan), + }; + RelConstDomain RCD; + std::cout << "\e[36m"; + viewStrings(std::views::iota(0, 16) | std::views::transform([&](int i) { + return fmt::format("r{}:{}", i, PS.getRegister(i).str(false)); + }), + opts); + std::cout << "\e[0m"; +} + void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts) { opts = opts ? opts : &defaultOptions; @@ -365,8 +399,7 @@ void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts) auto *an = bb.parentFunction().getAnalysis(); if(an) { auto &block = an->blocks[bb.blockIndex()]; - printf(" Entry state:\n"); - fmt::print("{}", block.entry.str(6)); + viewProgramState(block.entry, fmt_rgb(" | ", fmt::color::gray)); } } diff --git a/lib/vspace.cpp b/lib/vspace.cpp index 0eaabc1..075019b 100644 --- a/lib/vspace.cpp +++ b/lib/vspace.cpp @@ -13,24 +13,24 @@ namespace FxOS { //=== AbstractMemory ===// -bool AbstractMemory::covers(uint32_t addr, int size) +bool AbstractMemory::covers(uint32_t addr, int size) const { return (this->translate(addr, size) != nullptr); } -bool AbstractMemory::covers(MemoryRegion const ®ion) +bool AbstractMemory::covers(MemoryRegion const ®ion) const { return this->covers(region.start, region.size()); } -char const *AbstractMemory::translate(uint32_t addr, int size) +char const *AbstractMemory::translate(uint32_t addr, int size) const { int actual_size; char const *ptr = this->translate_dynamic(addr, &actual_size); return (ptr && actual_size >= size) ? ptr : nullptr; } -Addressable AbstractMemory::read_i8(uint32_t addr) +Addressable AbstractMemory::read_i8(uint32_t addr) const { int8_t *i8 = (int8_t *)this->translate(addr, 1); if(!i8) @@ -38,7 +38,7 @@ Addressable AbstractMemory::read_i8(uint32_t addr) return Addressable(addr, *i8); } -Addressable AbstractMemory::read_u8(uint32_t addr) +Addressable AbstractMemory::read_u8(uint32_t addr) const { uint8_t *u8 = (uint8_t *)this->translate(addr, 1); if(!u8) @@ -46,7 +46,7 @@ Addressable AbstractMemory::read_u8(uint32_t addr) return Addressable(addr, *u8); } -Addressable AbstractMemory::read_i16(uint32_t addr) +Addressable AbstractMemory::read_i16(uint32_t addr) const { uint8_t *i16 = (uint8_t *)this->translate(addr, 2); if(!i16) @@ -55,7 +55,7 @@ Addressable AbstractMemory::read_i16(uint32_t addr) return Addressable(addr, v); } -Addressable AbstractMemory::read_u16(uint32_t addr) +Addressable AbstractMemory::read_u16(uint32_t addr) const { uint8_t *u16 = (uint8_t *)this->translate(addr, 2); if(!u16) @@ -64,7 +64,7 @@ Addressable AbstractMemory::read_u16(uint32_t addr) return Addressable(addr, v); } -Addressable AbstractMemory::read_i32(uint32_t addr) +Addressable AbstractMemory::read_i32(uint32_t addr) const { uint8_t *i32 = (uint8_t *)this->translate(addr, 4); if(!i32) @@ -73,7 +73,7 @@ Addressable AbstractMemory::read_i32(uint32_t addr) return Addressable(addr, v); } -Addressable AbstractMemory::read_u32(uint32_t addr) +Addressable AbstractMemory::read_u32(uint32_t addr) const { uint8_t *u32 = (uint8_t *)this->translate(addr, 4); if(!u32) @@ -82,7 +82,8 @@ Addressable AbstractMemory::read_u32(uint32_t addr) return Addressable(addr, v); } -Addressable AbstractMemory::read_str(uint32_t addr, size_t len) +Addressable AbstractMemory::read_str( + uint32_t addr, size_t len) const { char const *str = this->translate(addr, len); if(!str) @@ -91,7 +92,7 @@ Addressable AbstractMemory::read_str(uint32_t addr, size_t len) } uint32_t AbstractMemory::search( - uint32_t start, uint32_t end, void const *pattern, int size) + uint32_t start, uint32_t end, void const *pattern, int size) const { void const *data = translate(start, end - start); if(!data) @@ -132,7 +133,7 @@ Binding::Binding(BSON const &b) buffer.deserialize(b["buffer"]); } -char const *Binding::translate_dynamic(uint32_t addr, int *size) +char const *Binding::translate_dynamic(uint32_t addr, int *size) const { if(addr >= region.start && addr < region.end) { *size = region.end - addr; @@ -167,7 +168,7 @@ void VirtualSpace::bind_region(MemoryRegion const ®ion, Buffer const &buf) this->bindings.emplace(this->bindings.begin(), region, buf); } -char const *VirtualSpace::translate_dynamic(uint32_t addr, int *size) +char const *VirtualSpace::translate_dynamic(uint32_t addr, int *size) const { for(auto &b: this->bindings) { char const *ptr = b.translate_dynamic(addr, size); diff --git a/shell/a.cpp b/shell/a.cpp index 1d70b26..32247c4 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ struct _af_args bool force = false; std::string name = ""; std::vector addresses; + bool consistency = false; }; static _af_args parse_af(Session &session, Parser &parser) @@ -33,8 +35,12 @@ static _af_args parse_af(Session &session, Parser &parser) parser.option("-u", [&args](Parser &) { args.update = true; }); parser.option("--force", [&args](Parser &) { args.force = true; }); parser.option("-n", [&args](Parser &p) { args.name = p.symbol(""); }); + parser.option("-c", [&args](Parser &) { args.consistency = true; }); parser.accept_options(); + if(args.consistency && parser.at_end()) + return args; + do { args.addresses.push_back(parser.expr(session.currentBinary())); } @@ -44,38 +50,6 @@ static _af_args parse_af(Session &session, Parser &parser) return args; } -template -concept range_of - = std::ranges::range && std::same_as, T>; - -// TODO: Move this visualization function (also put spacing as a feature) -template - requires(range_of) -void viewStrings(R range, int maxColumns = 80) -{ - int columns = 0; - - for(std::string const &str: range) { - int length = str.size(); - if(columns != 0 && columns + length > maxColumns) { - fmt::print("\n"); - columns = 0; - } - - fmt::print("{}", str); - columns += length; - } - - if(columns > 0) - fmt::print("\n"); -} -template -void viewStrings(R range, F fun, int maxColumns = 80) -{ - return viewStrings( - std::views::all(range) | std::views::transform(fun), maxColumns); -} - static void _af_consistency(Binary const &binary) { /* List of functions with blocks before the function's entry point */ @@ -93,25 +67,37 @@ static void _af_consistency(Binary const &binary) noReturn.push_back(&f); } + ViewStringsOptions opts = { + .maxColumns = 78, + .lineStart = " ", + .separator = " ", + }; + if(earlyBlocks.size() > 0) { fmt::print("{} functions have blocks before their entry point:\n", earlyBlocks.size()); - viewStrings(earlyBlocks, [](Function const *f) { - if(f->name().size() > 0) - return fmt::format(" {}", f->name()); - else - return fmt::format(" fun.{:08x}", f->address()); - }); + viewStrings( + earlyBlocks, + [](Function const *f) { + if(f->name().size() > 0) + return fmt::format("{}", f->name()); + else + return fmt::format("fun.{:08x}", f->address()); + }, + opts); } if(noReturn.size() > 0) { fmt::print("{} functions do not return:\n", noReturn.size()); - viewStrings(noReturn, [](Function const *f) { - if(f->name().size() > 0) - return fmt::format(" {}", f->name()); - else - return fmt::format(" fun.{:08x}", f->address()); - }); + viewStrings( + noReturn, + [](Function const *f) { + if(f->name().size() > 0) + return fmt::format("{}", f->name()); + else + return fmt::format("fun.{:08x}", f->address()); + }, + opts); } } @@ -185,6 +171,11 @@ void _afs(Session &session, _af_args &args) if(!b) return FxOS_log(ERR, "No current binary!\n"); + if(args.consistency) { + _af_consistency(*b); + return; + } + OS *os = b->OSAnalysis(); if(!os) return FxOS_log(ERR, "afs: No OS analysis, cannot enumerate syscalls"); diff --git a/shell/d.cpp b/shell/d.cpp index ac34296..f5bdb9d 100644 --- a/shell/d.cpp +++ b/shell/d.cpp @@ -21,6 +21,7 @@ struct _d_args { std::variant location; + bool analyze = false; }; static _d_args parse_d(Session &session, Parser &parser) @@ -30,12 +31,15 @@ static _d_args parse_d(Session &session, Parser &parser) if(!session.currentBinary()) return {}; + parser.option("-a", [&args](Parser &) { args.analyze = true; }); + parser.accept_options(); + args.location = parser.expr_or_range(session.currentBinary()); parser.end(); return args; } -void _d(Session &session, std::variant location) +void _d(Session &session, std::variant location, bool analyze) { Binary *b = session.currentBinary(); if(!b) @@ -86,11 +90,13 @@ void _d(Session &session, std::variant location) Function f(*b, address); if(f.exploreFunctionAt(address)) { - f.runAnalysis(); + if(analyze) + f.runAnalysis(); ViewAssemblyOptions opts; opts.binary = b; - opts.printFunctionAnalysis = true; + opts.printFunctionAnalysis = analyze; + opts.showInstructionDetails = !analyze; viewAssemblyFunction(f, &opts); } } @@ -104,17 +110,17 @@ static ShellCommand _d_cmd( "d", [](Session &s, Parser &p) { auto args = parse_d(s, p); - _d(s, args.location); + _d(s, args.location, args.analyze); }, [](Session &s, Parser &p) { parse_d(s, p); }, "Disassemble", R"( -d [] +d [-a]
+d -Disassembles code starting at the specified address, exploring branches until -function terminators, invalid instructions, or dynamically-computed jumps. The -default address is $ (the cursor of the current virtual space). +Disassembles code. In the first form, explores a function at the provided +address (without modifying the binary), exploring branches until function +terminators, invalid instructions or dynamically-computed jumps. If -a is +specified, also perform static analysis. -The following disassembler passes are run: - cfg Explores the code reachable from the start address - pcrel Computes PC-relative addresses (eg mov.l, mova, bf, bra...) - syscall Annotates uses of syscall table entries with the syscall number +In the second form, disassembles instructions in the specified range. (This is +an older feature with less support for advanced features.) )");