fxos: first useful analysis + quite a few cleanups

This commit is contained in:
Lephenixnoir 2023-11-29 16:31:52 +01:00
parent 255e31ab8f
commit ed902db2d8
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
19 changed files with 582 additions and 217 deletions

View File

@ -84,8 +84,9 @@ struct RelConst
neither Top nor Bottom. */
operator bool() const noexcept;
/* String representation */
std::string str() const noexcept;
/* String representation. If detailed is set, allow clearer alternate
representations (eg. "100 (0x00000064)"). */
std::string str(bool detailed = true) const noexcept;
};
class RelConstDomain: public AbstractDomain<RelConst>

View File

@ -49,8 +49,6 @@ struct ProgramState
/* Lattice order. */
bool le(ProgramState const &other) const;
std::string str(int indent = 0) const;
private:
/* Values for registers r0..r15 */
RelConst m_regs[16];
@ -59,7 +57,7 @@ private:
/* Change in program state over a single (contextually known) instruction. */
struct ProgramStateDiff
{
enum class Target : int { None = -1, Unknown = -2 };
enum class Target : int { None = -1, Unknown = -2, CallStandard = -3 };
/* Number of the register that changes, or Target::*. */
int target() const
@ -90,6 +88,12 @@ struct ProgramStateDiff
{
m_target = static_cast<int>(Target::None);
}
/* Set the diff to modifyin register states as allowed by the standard
function calling convention. */
void setCallStandard()
{
m_target = static_cast<int>(Target::CallStandard);
}
/* Set the diff to unknown effect on registers. */
void setUnknown()
{

View File

@ -388,7 +388,7 @@ struct Instruction
Instruction(Function &function, u32 address, u32 opcode);
// TODO: Rename AsmInstruction -> Opcode
// TODO: Don't use the word "opcode", maybe "base"
// TODO: Get opcode from Instruction
AsmInstruction const &opcode() const
{
@ -400,7 +400,7 @@ struct Instruction
return insmap[m_opcode].has_value();
}
/* Instruction's size in bytes. */
uint size() const
uint encodingSize() const
{
return (m_opcode >> 16) ? 4 : 2;
}

View File

@ -88,6 +88,9 @@ public:
return m_name != r.m_name;
}
/* Get the register number for r0 ... r15, -1 for other registers. */
int getR() const;
private:
CpuRegisterName m_name;
};
@ -108,36 +111,34 @@ struct AsmOperand
Imm, /* #imm */
};
/* Constructors, mostly internal; default is r0, rest is not relevant */
AsmOperand(Kind kind = Reg, CpuRegister base = CpuRegister::R0);
AsmOperand(int disp, i8 opsize, CpuRegister base);
AsmOperand(CpuRegister index, CpuRegister base);
AsmOperand(Kind kind, int disp_imm, i8 opsize = 0);
/* Default constructor gives register r0. */
AsmOperand();
/* Actually useful factory functions */
/* Factory functions */
static AsmOperand mkReg(CpuRegister base)
{
return AsmOperand(Reg, base);
return AsmOperand(Reg, base, 0);
}
static AsmOperand mkDeref(CpuRegister base)
static AsmOperand mkDeref(CpuRegister base, i8 opsize)
{
return AsmOperand(Deref, base);
return AsmOperand(Deref, base, opsize);
}
static AsmOperand mkPostInc(CpuRegister base)
static AsmOperand mkPostInc(CpuRegister base, i8 opsize)
{
return AsmOperand(PostInc, base);
return AsmOperand(PostInc, base, opsize);
}
static AsmOperand mkPreDec(CpuRegister base)
static AsmOperand mkPreDec(CpuRegister base, i8 opsize)
{
return AsmOperand(PreDec, base);
return AsmOperand(PreDec, base, opsize);
}
static AsmOperand mkStructDeref(int disp, int opsize, CpuRegister base)
{
return AsmOperand(disp, opsize, base);
}
static AsmOperand mkArrayDeref(CpuRegister index, CpuRegister base)
static AsmOperand mkArrayDeref(
CpuRegister index, CpuRegister base, i8 opsize)
{
return AsmOperand(index, base);
return AsmOperand(index, base, opsize);
}
static AsmOperand mkPcRel(int disp, int opsize)
{
@ -163,7 +164,6 @@ struct AsmOperand
{
return m_kind == Reg;
}
/* Is this operand in memory? */
bool isMemory() const
{
@ -181,6 +181,26 @@ struct AsmOperand
case Imm:
return false;
}
return false;
}
/* Is this operand a constant? */
bool isConstant() const
{
switch(m_kind) {
case PcRel:
case PcJump:
case PcAddr:
case Imm:
return true;
case Reg:
case Deref:
case PostInc:
case PreDec:
case StructDeref:
case ArrayDeref:
return false;
}
return false;
}
// TODO: RelConst modeling the memory address being being used (first
@ -229,10 +249,15 @@ struct AsmOperand
}
/* Get the PC-relative target, assuming the instruction is at the provided
address, for arguments with PC-relative offsets. */
u32 getPCRelativeTarget(u32 pc, int size) const;
address, for arguments with PC-relative adressing. */
u32 getPCRelativeTarget(u32 pc) const;
private:
AsmOperand(Kind kind, CpuRegister base, i8 opsize);
AsmOperand(int disp, i8 opsize, CpuRegister base);
AsmOperand(CpuRegister index, CpuRegister base, i8 opsize);
AsmOperand(Kind kind, int disp_imm, i8 opsize = 0);
Kind m_kind;
/* Base register. Valid for all modes except Imm */
CpuRegister m_base;
@ -269,18 +294,10 @@ struct AsmInstruction
AsmInstruction(u32 encoding, char const *mnemonic, int tags, int opCount,
AsmOperand op1 = {}, AsmOperand op2 = {});
/* Original encoding */
u32 encoding() const
/* Operation code, one of AsmInstruction::SH_* (eg. SH_mov) */
uint operation() const
{
return m_encoding;
}
/* Mnemonic **without the size indicator** */
char const *mnemonic() const;
/* Size indication (purely syntactic), 0 if not present */
int opsize() const
{
return m_opsize;
return m_operation;
}
/* Access to operands */
@ -290,7 +307,7 @@ struct AsmInstruction
}
AsmOperand const &operand(int i) const
{
assert(i >= 0 && i < m_opCount && "operand out-of-bounds");
assert((uint)i < m_opCount && "operand out-of-bounds");
return m_ops[i];
}
auto operands() const // -> [AsmOperand const &]
@ -298,6 +315,25 @@ struct AsmInstruction
return std::views::take(m_ops, m_opCount);
}
/* Size indication (purely syntactic), 0 if not present */
int opsize() const
{
return m_opsize;
}
/* Original encoding */
u32 encoding() const
{
return m_encoding;
}
/* Mnemonic (with the size indicator, eg. "mov.l") */
std::string mnemonic() const;
/* Operation name (without the size indicator, eg. "mov") */
char const *operationString() const;
/* Size indicator to go after the op name (".b", ".w", ".l" or empty) */
char const *operationSizeString() const;
//=== Instruction classes ===//
/* Whether the instruction terminates the function it's in. */
@ -357,7 +393,7 @@ private:
/* Original encoding */
u32 m_encoding;
/* Assembler instruction name (mov, add, etc), without size modifier. */
u8 m_opcode;
u8 m_operation;
/* Operation size (0, 1, 2 or 4) */
i8 m_opsize;
/* Number of operands */

View File

@ -41,7 +41,7 @@ public:
/* Create an OS interface for this virtual space. If there is no data
loaded in ROM or the OS can't be identified, the type os OS is set to
UNKNOWN and no information is provided. */
OS(VirtualSpace &space);
OS(VirtualSpace const &space);
/* Type of OS, determined at construction */
enum Type { UNKNOWN, FX, CG };
@ -86,7 +86,7 @@ public:
private:
/* Virtual space being analyzed */
VirtualSpace &m_space;
VirtualSpace const &m_space;
/* Parse the OS header. This should be the first analysis function to
be called, because it determines the type of model (ie. fx9860g vs

View File

@ -88,6 +88,9 @@ struct ViewAssemblyOptions
/* Whether to print function analysis results from the binary */
bool printFunctionAnalysis = false;
/* Whether to show details of instruction addresses and encodings */
bool showInstructionDetails = false;
/* TODO: More view assembly options, including CFG layout */
/* TODO: View assembly options: syntax highlighting */
};

98
include/fxos/view/util.h Normal file
View File

@ -0,0 +1,98 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/view/util
#include <string>
#include <ranges>
#include <iostream>
#include <fmt/core.h>
#include <fmt/color.h>
#ifndef FXOS_VIEW_UTIL_H
#define FXOS_VIEW_UTIL_H
namespace FxOS {
static inline auto fmt_rgb(std::string s, fmt::detail::color_type fg)
{
return fmt::format("{}", fmt::styled(s, fmt::fg(fg)));
}
static inline auto fmt_color(std::string s, fmt::terminal_color fg)
{
return fmt::format("{}", fmt::styled(s, fmt::fg(fg)));
}
static size_t codePointLength(std::string s)
{
size_t len = 0;
for(auto c: s)
len += (c & 0xc0) != 0x80;
return len;
}
/* viewStrings: Display a set of strings with line wrapping */
template<typename R, typename T>
concept range_of
= std::ranges::range<R> && std::same_as<std::ranges::range_value_t<R>, T>;
struct ViewStringsOptions
{
/* Maximum number of columns to print elements in (excluding start) */
int maxColumns = 80;
/* Extra text at the beginning of each line (in addition to maxColumns) */
std::string lineStart = "";
/* Item separator */
std::string separator = ", ";
/* Style for text after lineStart */
fmt::text_style style {};
};
template<typename R>
requires(range_of<R, std::string>)
void viewStrings(R range, ViewStringsOptions const &opts)
{
bool newline = true;
int lineSize = 0;
for(std::string const &str: range) {
int strSize = codePointLength(str);
int lengthNeeded = strSize + (newline ? 0 : opts.separator.size());
/* Allow overflow if that's required for progress */
if(lineSize != 0 && lineSize + lengthNeeded > opts.maxColumns) {
fmt::print("\n");
newline = true;
lineSize = 0;
}
if(newline) {
std::cout << opts.lineStart;
fmt::print(opts.style, "{}", str);
}
else {
fmt::print(opts.style, "{}{}", opts.separator, str);
}
lineSize += strSize + (newline ? 0 : opts.separator.size());
newline = false;
}
if(!newline)
fmt::print("\n");
}
template<typename R, typename F>
void viewStrings(R range, F fun, ViewStringsOptions const &opts)
{
return viewStrings(
std::views::all(range) | std::views::transform(fun), opts);
}
} /* namespace FxOS */
#endif /* FXOS_VIEW_UTIL_H */

View File

@ -42,24 +42,25 @@ class AbstractMemory
{
public:
/* Checks if an address or interval is simulated (in its entirety) */
bool covers(uint32_t addr, int size = 1);
bool covers(uint32_t addr, int size = 1) const;
/* Check if a full region is simulated */
bool covers(MemoryRegion const &region);
bool covers(MemoryRegion const &region) const;
/* Returns the data located at the provided virtual address, nullptr if it
is not entirely covered. */
char const *translate(uint32_t addr, int size = 1);
char const *translate(uint32_t addr, int size = 1) const;
/* Returns the data located at the provided virtual address, and indicates
how much is available in *size. The pointer is null if [addr] itself is
not covered, in which case *size is also set to 0. */
virtual char const *translate_dynamic(uint32_t addr, int *size) = 0;
virtual char const *translate_dynamic(uint32_t addr, int *size) const = 0;
/* Search a binary pattern in the specified area. Returns the virtual
address of the first occurrence if any is found, [end] otherwise
(including if the range is empty or exceeds simulated memory). */
uint32_t search(uint32_t start, uint32_t end, void const *pat, int size);
uint32_t search(
uint32_t start, uint32_t end, void const *pat, int size) const;
/* Read a simple object from memory. The following methods all assume that
the specified address is simulated, and return a default value if it's
@ -71,15 +72,15 @@ public:
data type, see <fxos/util/Addressable.h>. */
/* Various sizes of integers with sign-extension or zero-extension. */
Addressable<int8_t> read_i8(uint32_t addr);
Addressable<uint8_t> read_u8(uint32_t addr);
Addressable<int16_t> read_i16(uint32_t addr);
Addressable<uint16_t> read_u16(uint32_t addr);
Addressable<int32_t> read_i32(uint32_t addr);
Addressable<uint32_t> read_u32(uint32_t addr);
Addressable<int8_t> read_i8(uint32_t addr) const;
Addressable<uint8_t> read_u8(uint32_t addr) const;
Addressable<int16_t> read_i16(uint32_t addr) const;
Addressable<uint16_t> read_u16(uint32_t addr) const;
Addressable<int32_t> read_i32(uint32_t addr) const;
Addressable<uint32_t> read_u32(uint32_t addr) const;
/* Read a non-NUL-terminated string */
Addressable<std::string> read_str(uint32_t addr, size_t len);
Addressable<std::string> read_str(uint32_t addr, size_t len) const;
};
/* A binding of a data buffer into a memory region of the target. */
@ -99,7 +100,7 @@ struct Binding: public AbstractMemory
Buffer buffer;
// - AbstractMemory interface
char const *translate_dynamic(uint32_t addr, int *size) override;
char const *translate_dynamic(uint32_t addr, int *size) const override;
};
/* A composite space where regions can be bound dynamically */
@ -123,7 +124,7 @@ public:
void bind_region(MemoryRegion const &region, Buffer const &buffer);
// - AbstractMemory interface
char const *translate_dynamic(uint32_t addr, int *size) override;
char const *translate_dynamic(uint32_t addr, int *size) const override;
// TODO: Remove these
Disassembly disasm;

View File

@ -313,7 +313,7 @@ RelConst::operator bool() const noexcept
return !spe;
}
std::string RelConst::str() const noexcept
std::string RelConst::str(bool detailed) const noexcept
{
using RegName = CpuRegister::CpuRegisterName;
@ -347,7 +347,7 @@ std::string RelConst::str() const noexcept
v = -ival;
}
return str + format("%d (0x%08x)", v, uval);
return str + format(detailed ? "%d (0x%08x)" : "%d", v, uval);
}
else {
return str + format("0x%08x", uval);

View File

@ -6,6 +6,7 @@
//---------------------------------------------------------------------------//
#include <fxos/analysis.h>
#include <fxos/util/log.h>
#include <fmt/core.h>
#include <cassert>
@ -36,6 +37,10 @@ void ProgramState::applyDiff(ProgramStateDiff const &diff)
for(int i = 0; i < 16; i++)
m_regs[i] = RCD.top();
}
else if(t == static_cast<int>(ProgramStateDiff::Target::CallStandard)) {
for(int i = 0; i < 7; i++)
m_regs[i] = RCD.top();
}
else {
assert((unsigned)t < 16 && "invalid register target");
m_regs[t] = diff.value();
@ -62,34 +67,16 @@ bool ProgramState::le(ProgramState const &other) const
return true;
}
std::string ProgramState::str(int indentLength) const
{
std::string indent(indentLength, ' ');
std::string str;
/* Registers */
for(int i = 0; i < 16; i++) {
if(i % 4 == 0) {
str += (i > 0 ? "\n" : "");
str += indent;
}
else
str += " ";
str += fmt::format("r{}:{}", i, m_regs[i].str());
}
return str + "\n";
}
std::string ProgramStateDiff::str() const
{
if(m_target == static_cast<int>(Target::None))
return "()";
if(m_target == static_cast<int>(Target::Unknown))
return "";
if(m_target == static_cast<int>(Target::CallStandard))
return "call(std)";
return fmt::format("r{} ← {}", m_target, m_value.str());
return fmt::format("r{} ← {}", m_target, m_value.str(false));
}
/* Information stored for each block during the fixpoint iteration */
@ -101,6 +88,44 @@ struct BlockStates
ProgramState nextEntry;
};
static u32 computeConstantOperand(Instruction const &ins, AsmOperand const &op)
{
Binary const &binary = ins.parentBinary();
u32 target;
assert(op.isConstant() && "analysis of constant operands is out of sync");
switch(op.kind()) {
case AsmOperand::PcRel:
target = op.getPCRelativeTarget(ins.address());
if(!binary.vspace().covers(target, op.opsize())) {
FxOS_log(ERR, "constant operand reads out of vspace bounds");
return -1;
}
if(op.opsize() == 1)
return binary.vspace().read_i8(target);
if(op.opsize() == 2)
return binary.vspace().read_i16(target);
if(op.opsize() == 4)
return binary.vspace().read_i32(target);
FxOS_log(ERR, "PcRel operand with no opsize");
return -1;
case AsmOperand::PcJump:
case AsmOperand::PcAddr:
return op.getPCRelativeTarget(ins.address());
case AsmOperand::Imm:
return op.imm();
default:
assert(false && "not a constant operand");
__builtin_unreachable();
}
}
static ProgramStateDiff interpretInstruction(
Instruction const &ins, ProgramState const &PS)
{
@ -108,12 +133,152 @@ static ProgramStateDiff interpretInstruction(
ProgramStateDiff diff;
diff.setUnknown();
// TODO: Do this properly
u16 opc = ins.opcode().encoding();
if((opc & 0xf000) == 0xe000) {
int reg = (opc >> 8) & 0xf;
int val = (int8_t)opc;
diff.setRegisterUpdate(reg, RCD.constant(val));
AsmInstruction asmins = ins.opcode();
switch(asmins.operation()) {
/* Moves */
case AsmInstruction::SH_mov:
case AsmInstruction::SH_ldc:
case AsmInstruction::SH_lds:
case AsmInstruction::SH_stc:
case AsmInstruction::SH_sts:
case AsmInstruction::SH_mova: {
AsmOperand src = asmins.operand(0);
AsmOperand dst = asmins.operand(1);
if(!dst.isReg())
diff.setNoop();
else if(src.isConstant()) {
RelConst c = RCD.constant(computeConstantOperand(ins, src));
diff.setRegisterUpdate(dst.base().getR(), c);
}
else
diff.setRegisterTouched(dst.base().getR());
break;
}
/* Opaque instructions with one operand and one output */
case AsmInstruction::SH_dt:
case AsmInstruction::SH_movt:
case AsmInstruction::SH_rotl:
case AsmInstruction::SH_rotr:
case AsmInstruction::SH_rotcl:
case AsmInstruction::SH_rotcr:
case AsmInstruction::SH_shal:
case AsmInstruction::SH_shar:
case AsmInstruction::SH_shll:
case AsmInstruction::SH_shlr:
case AsmInstruction::SH_shll2:
case AsmInstruction::SH_shlr2:
case AsmInstruction::SH_shll8:
case AsmInstruction::SH_shlr8:
case AsmInstruction::SH_shll16:
case AsmInstruction::SH_shlr16: {
AsmOperand op = asmins.operand(0);
assert(op.isReg());
if(op.base().getR() >= 0)
diff.setRegisterTouched(op.base().getR());
else
diff.setNoop();
break;
}
/* Opaque instructions with two operands and one output */
case AsmInstruction::SH_add:
case AsmInstruction::SH_addc:
case AsmInstruction::SH_addv:
case AsmInstruction::SH_and:
case AsmInstruction::SH_div1:
case AsmInstruction::SH_exts:
case AsmInstruction::SH_extu:
case AsmInstruction::SH_neg:
case AsmInstruction::SH_negc:
case AsmInstruction::SH_not:
case AsmInstruction::SH_or:
case AsmInstruction::SH_shad:
case AsmInstruction::SH_shld:
case AsmInstruction::SH_sub:
case AsmInstruction::SH_subc:
case AsmInstruction::SH_subv:
case AsmInstruction::SH_swap:
case AsmInstruction::SH_xor:
case AsmInstruction::SH_xtrct: {
AsmOperand op = asmins.operand(1);
if(op.isReg() && op.base().getR() >= 0)
diff.setRegisterTouched(op.base().getR());
else
diff.setNoop();
break;
}
/* No-op instructions that affect state not modeled by the analysis */
case AsmInstruction::SH_clrs:
case AsmInstruction::SH_clrt:
case AsmInstruction::SH_clrmac:
case AsmInstruction::SH_div0u:
case AsmInstruction::SH_ldtlb:
case AsmInstruction::SH_nop:
case AsmInstruction::SH_rte:
case AsmInstruction::SH_rts:
case AsmInstruction::SH_sets:
case AsmInstruction::SH_sett:
case AsmInstruction::SH_sleep:
case AsmInstruction::SH_cmp_pl:
case AsmInstruction::SH_cmp_pz:
case AsmInstruction::SH_cmp_eq:
case AsmInstruction::SH_cmp_hs:
case AsmInstruction::SH_cmp_ge:
case AsmInstruction::SH_cmp_hi:
case AsmInstruction::SH_cmp_gt:
case AsmInstruction::SH_cmp_str:
case AsmInstruction::SH_div0s:
case AsmInstruction::SH_dmuls:
case AsmInstruction::SH_dmulu:
case AsmInstruction::SH_mul:
case AsmInstruction::SH_muls:
case AsmInstruction::SH_mulu:
case AsmInstruction::SH_tst:
case AsmInstruction::SH_jmp:
case AsmInstruction::SH_pref:
case AsmInstruction::SH_tas:
case AsmInstruction::SH_mac:
case AsmInstruction::SH_braf:
case AsmInstruction::SH_bf:
case AsmInstruction::SH_bf_s:
case AsmInstruction::SH_bt:
case AsmInstruction::SH_bt_s:
case AsmInstruction::SH_bra:
case AsmInstruction::SH_trapa:
case AsmInstruction::SH_icbi:
case AsmInstruction::SH_ocbi:
case AsmInstruction::SH_ocbp:
case AsmInstruction::SH_ocbwb:
case AsmInstruction::SH_prefi:
case AsmInstruction::SH_synco:
diff.setNoop();
break;
case AsmInstruction::SH_bsr:
case AsmInstruction::SH_bsrf:
case AsmInstruction::SH_jsr:
diff.setCallStandard();
break;
case AsmInstruction::SH_movco:
case AsmInstruction::SH_movli:
case AsmInstruction::SH_movua:
case AsmInstruction::SH_movca:
diff.setUnknown();
break;
}
for(auto op: ins.opcode().operands()) {
/* TODO: Properly handle pre-decr/post-dec */
if(op.kind() == AsmOperand::PreDec || op.kind() == AsmOperand::PostInc)
diff.setUnknown();
}
return diff;
@ -156,7 +321,7 @@ std::unique_ptr<StaticFunctionAnalysis> analyzeFunction(Function const &f)
/* Compute the next entry state for each block */
for(uint i = 0; i < f.blockCount(); i++) {
BasicBlock const &bb = f.basicBlockByIndex(i);
VBS[i].nextEntry.setBottom();
VBS[i].nextEntry = VBS[i].entry;
for(int succIndex: bb.successorsByIndex())
VBS[i].nextEntry.joinWith(VBS[succIndex].exit);

View File

@ -47,11 +47,7 @@ void Binary::deserialize(BSON const &b)
OS *Binary::OSAnalysis(bool force) const
{
if(!m_os || force) {
/* We break constness a little bit here. We allow access to the OS
analysis for const Binary, even though it uses the VirtualSpace and
technically AbstractMemory allows implementations to modify the
memory in response to reads. */
m_os = std::make_unique<OS>(const_cast<VirtualSpace &>(m_vspace));
m_os = std::make_unique<OS>(m_vspace);
/* We don't keep an OS analysis result that failed */
if(m_os->type == OS::UNKNOWN)
m_os = nullptr;

View File

@ -88,7 +88,8 @@ void Function::updateFunctionSize()
if(bb.instructionCount() == 0)
continue;
Instruction &insn = bb.instructionAtIndex(bb.instructionCount() - 1);
max_address = std::max(max_address, insn.address() + insn.size());
max_address
= std::max(max_address, insn.address() + insn.encodingSize());
}
this->setSize(max_address - this->address());
@ -368,7 +369,7 @@ void BasicBlock::finalizeBlock()
u32 pc = this->address();
for(Instruction &insn: *this) {
assert(insn.address() == pc && "non-sequential instructions in bb");
pc += insn.size();
pc += insn.encodingSize();
}
/* The block must have no more than one terminator. */

View File

@ -28,7 +28,6 @@ char const *regnames[] = {
};
// clang-format on
/* Construction from string */
CpuRegister::CpuRegister(std::string name)
{
int regcount = (sizeof regnames / sizeof regnames[0]);
@ -44,7 +43,6 @@ CpuRegister::CpuRegister(std::string name)
m_name = CpuRegister::UNDEFINED;
}
/* Conversion to string */
std::string CpuRegister::str() const noexcept
{
int regcount = (sizeof regnames / sizeof regnames[0]);
@ -55,12 +53,26 @@ std::string CpuRegister::str() const noexcept
return regnames[i];
}
int CpuRegister::getR() const
{
if(m_name >= R0 && m_name <= R7)
return m_name - R0;
if(m_name >= R8 && m_name <= R15)
return m_name - R8 + 8;
return -1;
}
//---
// Instruction operands
//---
AsmOperand::AsmOperand(Kind kind, CpuRegister base):
m_kind {kind}, m_base {base}
AsmOperand::AsmOperand(): m_kind {Reg}, m_base {CpuRegister::R0}, m_opsize {0}
{
}
AsmOperand::AsmOperand(Kind kind, CpuRegister base, i8 opsize):
m_kind {kind}, m_base {base}, m_opsize {opsize}
{
}
@ -69,8 +81,8 @@ AsmOperand::AsmOperand(int disp, i8 opsize, CpuRegister base):
{
}
AsmOperand::AsmOperand(CpuRegister index, CpuRegister base):
m_kind {ArrayDeref}, m_base {base}, m_index {index}
AsmOperand::AsmOperand(CpuRegister index, CpuRegister base, i8 opsize):
m_kind {ArrayDeref}, m_base {base}, m_index {index}, m_opsize {opsize}
{
}
@ -107,9 +119,9 @@ std::string AsmOperand::str() const
}
}
u32 AsmOperand::getPCRelativeTarget(u32 pc, int size) const
u32 AsmOperand::getPCRelativeTarget(u32 pc) const
{
size = size + (size == 0);
int size = m_opsize + (m_opsize == 0);
if(m_kind == AsmOperand::PcRel)
return (pc & -size) + 4 + m_disp_imm;
@ -155,24 +167,41 @@ AsmInstruction::AsmInstruction(u32 encoding, char const *mnemonic, int tags,
int i;
for(i = 0; i < SH_MAX; i++) {
if(mn == instructionMnemonics[i]) {
m_opcode = i;
m_operation = i;
break;
}
}
assert(i < SH_MAX && "AsmInstruction with unknown opcode string");
assert(i < SH_MAX && "AsmInstruction with unknown operation string");
}
char const *AsmInstruction::mnemonic() const
std::string AsmInstruction::mnemonic() const
{
assert(m_opcode < SH_MAX);
return instructionMnemonics[m_opcode];
return std::string {operationString()} + operationSizeString();
}
char const *AsmInstruction::operationString() const
{
assert(m_operation < SH_MAX);
return instructionMnemonics[m_operation];
}
char const *AsmInstruction::operationSizeString() const
{
if(m_opsize == 1)
return ".b";
if(m_opsize == 2)
return ".w";
if(m_opsize == 4)
return ".l";
return "";
}
u32 AsmInstruction::getPCRelativeTarget(u32 pc) const
{
/* There can only be at most one PC-relative operand in an instruction */
for(AsmOperand const &op: operands()) {
if(op.usesPCRelativeAddressing())
return op.getPCRelativeTarget(pc, m_opsize);
return op.getPCRelativeTarget(pc);
}
return -1;
}

View File

@ -221,17 +221,17 @@ static AsmOperand make_arg(int token, int opsize, int m, int n, int d, int i)
case JUMP12: return AsmOperand::mkPcJump(d12 * 2);
case DPC: return AsmOperand::mkPcAddr(d * 4);
case IMM: return AsmOperand::mkImm(i8);
case AT_RN: return AsmOperand::mkDeref(Rn);
case AT_RM: return AsmOperand::mkDeref(Rm);
case AT_RMP: return AsmOperand::mkPostInc(Rm);
case AT_RNP: return AsmOperand::mkPostInc(Rn);
case AT_MRN: return AsmOperand::mkPreDec(Rn);
case AT_RN: return AsmOperand::mkDeref(Rn, opsize);
case AT_RM: return AsmOperand::mkDeref(Rm, opsize);
case AT_RMP: return AsmOperand::mkPostInc(Rm, opsize);
case AT_RNP: return AsmOperand::mkPostInc(Rn, opsize);
case AT_MRN: return AsmOperand::mkPreDec(Rn, opsize);
case AT_DRN: return AsmOperand::mkStructDeref(d*opsize, opsize, Rn);
case AT_DRM: return AsmOperand::mkStructDeref(d*opsize, opsize, Rm);
case AT_DGBR: return AsmOperand::mkStructDeref(d*opsize, opsize, Reg::GBR);
case AT_R0RN: return AsmOperand::mkArrayDeref(Reg::R0, Rn);
case AT_R0RM: return AsmOperand::mkArrayDeref(Reg::R0, Rm);
case AT_R0GBR: return AsmOperand::mkArrayDeref(Reg::R0, Reg::GBR);
case AT_R0RN: return AsmOperand::mkArrayDeref(Reg::R0, Rn, opsize);
case AT_R0RM: return AsmOperand::mkArrayDeref(Reg::R0, Rm, opsize);
case AT_R0GBR: return AsmOperand::mkArrayDeref(Reg::R0, Reg::GBR, opsize);
case AT_DPC:
if(!opsize)

View File

@ -23,7 +23,7 @@ namespace FxOS {
static std::map<OS::Type, std::unique_ptr<SyscallDefs>> syscallDefsCache;
static std::unique_ptr<SyscallDefs> buildSyscallDefs(char const *str, int len);
OS::OS(VirtualSpace &space): type {UNKNOWN}, m_space {space}
OS::OS(VirtualSpace const &space): type {UNKNOWN}, m_space {space}
{
if(!space.covers(0x80000000, (256 << 10))) {
FxOS_log(ERR,
@ -49,7 +49,7 @@ OS::OS(VirtualSpace &space): type {UNKNOWN}, m_space {space}
void OS::parse_header()
{
VirtualSpace &s = m_space;
VirtualSpace const &s = m_space;
if(this->type == FX) {
/* Bootcode timestamp at the very end of the bootcode */
@ -202,7 +202,7 @@ SyscallDefs const *OS::syscall_defs() const noexcept
void OS::parse_footer()
{
VirtualSpace &s = m_space;
VirtualSpace const &s = m_space;
/* Find the footer address (occurrence of "CASIOABSLangdata") */
uint32_t start = MemoryRegion::ROM.start;
@ -238,7 +238,7 @@ void OS::parse_footer()
//---
static uint32_t accumulate_range(
VirtualSpace &m_space, uint32_t start, uint32_t end)
VirtualSpace const &m_space, uint32_t start, uint32_t end)
{
uint32_t sum = 0;

View File

@ -6,6 +6,7 @@
//---------------------------------------------------------------------------//
#include <fxos/view/assembly.h>
#include <fxos/view/util.h>
#include <fxos/analysis.h>
#include <fxos/binary.h>
#include <fxos/function.h>
@ -15,7 +16,6 @@
#include <cstdio>
#include <cstring>
#include <numeric>
#include <fmt/color.h>
namespace FxOS {
@ -66,7 +66,7 @@ static void renderOperand(AsmOperand const &op, u32 pc, int opsize,
if(!op.usesPCRelativeAddressing())
return;
u32 location = op.getPCRelativeTarget(pc, opsize);
u32 location = op.getPCRelativeTarget(pc);
if(output(out, p, {}, format("<%08x>", location)))
return;
type = (type == PCRelative) ? Location : Constant;
@ -141,23 +141,24 @@ static void doOldInst(u32 pc, OldInstruction &i,
printf(">\n");
}
/* Raw data if instruction cannot be decoded */
printf(" %08x: %04x", pc, (i.inst ? i.inst->encoding() : i.opcode));
/* Only show the raw data if instruction cannot be decoded */
if(opts.showInstructionDetails)
printf(" %08x: %04x", pc, (i.inst ? i.inst->encoding() : i.opcode));
if(!i.inst) {
if(!opts.showInstructionDetails)
printf(" %04x", i.inst->encoding());
printf("\n");
m_lastAddress = pc;
return;
}
/* Mnemonic */
static char const *suffixes[5] = {"", ".b", ".w", "", ".l"};
char const *suffix
= suffixes[(i.inst->opsize() <= 4) ? i.inst->opsize() : 0];
int spacing = i.inst->operandCount()
? 8 - strlen(i.inst->mnemonic()) - strlen(suffix)
: 0;
printf(" %s%s%*s", i.inst->mnemonic(), suffix, spacing, "");
std::string mnemonic = i.inst->mnemonic();
std::string str = " ";
str += mnemonic;
int spacing = i.inst->operandCount() ? 8 - mnemonic.size() : 0;
printf(" %s%*s", mnemonic.c_str(), spacing, "");
/* Arguments */
for(int n = 0; n < i.inst->operandCount(); n++) {
@ -242,19 +243,16 @@ void viewAssemblyInstruction(Instruction const &ins, ViewAssemblyOptions *opts)
OperandOutput opout;
u32 pc = ins.address();
printf(" %08x: %04x", pc, opcode.encoding());
if(opts->showInstructionDetails)
printf(" %08x: %04x ", pc, opcode.encoding());
else
printf(" ");
/* Mnemonic */
static char const *suffixes[5] = {"", ".b", ".w", "", ".l"};
char const *suffix = suffixes[(opcode.opsize() <= 4) ? opcode.opsize() : 0];
int spacing = opcode.operandCount()
? 8 - strlen(opcode.mnemonic()) - strlen(suffix)
: 0;
std::string str = " ";
str += opcode.mnemonic();
str += suffix;
str += std::string(spacing, ' ');
std::string mnemonic = opcode.mnemonic();
std::string str = "";
str += mnemonic;
str += std::string(opcode.operandCount() ? 8 - mnemonic.size() : 0, ' ');
/* Arguments */
for(int n = 0; n < opcode.operandCount(); n++) {
@ -272,21 +270,40 @@ void viewAssemblyInstruction(Instruction const &ins, ViewAssemblyOptions *opts)
opout.clear();
}
std::vector<std::pair<std::string, fmt::text_style>> comments;
if(opts->printFunctionAnalysis) {
auto *an = ins.parentFunction().getAnalysis();
if(an) {
auto &block = an->blocks[ins.parentBlock().blockIndex()];
ProgramStateDiff const &diff = block.diffs[ins.indexInBlock()];
if(str.size() < 32)
str += std::string(32 - str.size(), ' ');
str += "# ";
str += diff.str();
if(diff.target()
!= static_cast<int>(ProgramStateDiff::Target::None))
comments.emplace_back(
diff.str(), fmt::fg(fmt::terminal_color::cyan));
}
}
fmt::print("{}\n", str);
std::cout << str;
if(!comments.empty()) {
if(str.size() < 28)
std::cout << std::string(28 - str.size(), ' ');
bool first = true;
for(auto &[c, style]: comments) {
if(first)
fmt::print(fmt::fg(fmt::color::gray), "# ");
else
fmt::print(", ");
fmt::print(style, "{}", c);
first = false;
}
}
std::cout << '\n';
}
static std::string objectsAt(
@ -320,6 +337,23 @@ static std::string objectsAt(
});
}
static void viewProgramState(ProgramState const &PS, std::string lineStart)
{
ViewStringsOptions opts = {
.maxColumns = 70,
.lineStart = lineStart,
.separator = ", ",
.style = fmt::fg(fmt::terminal_color::cyan),
};
RelConstDomain RCD;
std::cout << "\e[36m";
viewStrings(std::views::iota(0, 16) | std::views::transform([&](int i) {
return fmt::format("r{}:{}", i, PS.getRegister(i).str(false));
}),
opts);
std::cout << "\e[0m";
}
void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts)
{
opts = opts ? opts : &defaultOptions;
@ -365,8 +399,7 @@ void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts)
auto *an = bb.parentFunction().getAnalysis();
if(an) {
auto &block = an->blocks[bb.blockIndex()];
printf(" Entry state:\n");
fmt::print("{}", block.entry.str(6));
viewProgramState(block.entry, fmt_rgb(" | ", fmt::color::gray));
}
}

View File

@ -13,24 +13,24 @@ namespace FxOS {
//=== AbstractMemory ===//
bool AbstractMemory::covers(uint32_t addr, int size)
bool AbstractMemory::covers(uint32_t addr, int size) const
{
return (this->translate(addr, size) != nullptr);
}
bool AbstractMemory::covers(MemoryRegion const &region)
bool AbstractMemory::covers(MemoryRegion const &region) const
{
return this->covers(region.start, region.size());
}
char const *AbstractMemory::translate(uint32_t addr, int size)
char const *AbstractMemory::translate(uint32_t addr, int size) const
{
int actual_size;
char const *ptr = this->translate_dynamic(addr, &actual_size);
return (ptr && actual_size >= size) ? ptr : nullptr;
}
Addressable<int8_t> AbstractMemory::read_i8(uint32_t addr)
Addressable<int8_t> AbstractMemory::read_i8(uint32_t addr) const
{
int8_t *i8 = (int8_t *)this->translate(addr, 1);
if(!i8)
@ -38,7 +38,7 @@ Addressable<int8_t> AbstractMemory::read_i8(uint32_t addr)
return Addressable(addr, *i8);
}
Addressable<uint8_t> AbstractMemory::read_u8(uint32_t addr)
Addressable<uint8_t> AbstractMemory::read_u8(uint32_t addr) const
{
uint8_t *u8 = (uint8_t *)this->translate(addr, 1);
if(!u8)
@ -46,7 +46,7 @@ Addressable<uint8_t> AbstractMemory::read_u8(uint32_t addr)
return Addressable(addr, *u8);
}
Addressable<int16_t> AbstractMemory::read_i16(uint32_t addr)
Addressable<int16_t> AbstractMemory::read_i16(uint32_t addr) const
{
uint8_t *i16 = (uint8_t *)this->translate(addr, 2);
if(!i16)
@ -55,7 +55,7 @@ Addressable<int16_t> AbstractMemory::read_i16(uint32_t addr)
return Addressable(addr, v);
}
Addressable<uint16_t> AbstractMemory::read_u16(uint32_t addr)
Addressable<uint16_t> AbstractMemory::read_u16(uint32_t addr) const
{
uint8_t *u16 = (uint8_t *)this->translate(addr, 2);
if(!u16)
@ -64,7 +64,7 @@ Addressable<uint16_t> AbstractMemory::read_u16(uint32_t addr)
return Addressable(addr, v);
}
Addressable<int32_t> AbstractMemory::read_i32(uint32_t addr)
Addressable<int32_t> AbstractMemory::read_i32(uint32_t addr) const
{
uint8_t *i32 = (uint8_t *)this->translate(addr, 4);
if(!i32)
@ -73,7 +73,7 @@ Addressable<int32_t> AbstractMemory::read_i32(uint32_t addr)
return Addressable(addr, v);
}
Addressable<uint32_t> AbstractMemory::read_u32(uint32_t addr)
Addressable<uint32_t> AbstractMemory::read_u32(uint32_t addr) const
{
uint8_t *u32 = (uint8_t *)this->translate(addr, 4);
if(!u32)
@ -82,7 +82,8 @@ Addressable<uint32_t> AbstractMemory::read_u32(uint32_t addr)
return Addressable(addr, v);
}
Addressable<std::string> AbstractMemory::read_str(uint32_t addr, size_t len)
Addressable<std::string> AbstractMemory::read_str(
uint32_t addr, size_t len) const
{
char const *str = this->translate(addr, len);
if(!str)
@ -91,7 +92,7 @@ Addressable<std::string> AbstractMemory::read_str(uint32_t addr, size_t len)
}
uint32_t AbstractMemory::search(
uint32_t start, uint32_t end, void const *pattern, int size)
uint32_t start, uint32_t end, void const *pattern, int size) const
{
void const *data = translate(start, end - start);
if(!data)
@ -132,7 +133,7 @@ Binding::Binding(BSON const &b)
buffer.deserialize(b["buffer"]);
}
char const *Binding::translate_dynamic(uint32_t addr, int *size)
char const *Binding::translate_dynamic(uint32_t addr, int *size) const
{
if(addr >= region.start && addr < region.end) {
*size = region.end - addr;
@ -167,7 +168,7 @@ void VirtualSpace::bind_region(MemoryRegion const &region, Buffer const &buf)
this->bindings.emplace(this->bindings.begin(), region, buf);
}
char const *VirtualSpace::translate_dynamic(uint32_t addr, int *size)
char const *VirtualSpace::translate_dynamic(uint32_t addr, int *size) const
{
for(auto &b: this->bindings) {
char const *ptr = b.translate_dynamic(addr, size);

View File

@ -7,6 +7,7 @@
#include <fxos/function.h>
#include <fxos/analysis.h>
#include <fxos/vspace.h>
#include <fxos/view/util.h>
#include <fxos/util/Timer.h>
#include <fxos/util/log.h>
#include <fmt/core.h>
@ -25,6 +26,7 @@ struct _af_args
bool force = false;
std::string name = "";
std::vector<u32> addresses;
bool consistency = false;
};
static _af_args parse_af(Session &session, Parser &parser)
@ -33,8 +35,12 @@ static _af_args parse_af(Session &session, Parser &parser)
parser.option("-u", [&args](Parser &) { args.update = true; });
parser.option("--force", [&args](Parser &) { args.force = true; });
parser.option("-n", [&args](Parser &p) { args.name = p.symbol(""); });
parser.option("-c", [&args](Parser &) { args.consistency = true; });
parser.accept_options();
if(args.consistency && parser.at_end())
return args;
do {
args.addresses.push_back(parser.expr(session.currentBinary()));
}
@ -44,38 +50,6 @@ static _af_args parse_af(Session &session, Parser &parser)
return args;
}
template<typename R, typename T>
concept range_of
= std::ranges::range<R> && std::same_as<std::ranges::range_value_t<R>, T>;
// TODO: Move this visualization function (also put spacing as a feature)
template<typename R>
requires(range_of<R, std::string>)
void viewStrings(R range, int maxColumns = 80)
{
int columns = 0;
for(std::string const &str: range) {
int length = str.size();
if(columns != 0 && columns + length > maxColumns) {
fmt::print("\n");
columns = 0;
}
fmt::print("{}", str);
columns += length;
}
if(columns > 0)
fmt::print("\n");
}
template<typename R, typename F>
void viewStrings(R range, F fun, int maxColumns = 80)
{
return viewStrings(
std::views::all(range) | std::views::transform(fun), maxColumns);
}
static void _af_consistency(Binary const &binary)
{
/* List of functions with blocks before the function's entry point */
@ -93,25 +67,37 @@ static void _af_consistency(Binary const &binary)
noReturn.push_back(&f);
}
ViewStringsOptions opts = {
.maxColumns = 78,
.lineStart = " ",
.separator = " ",
};
if(earlyBlocks.size() > 0) {
fmt::print("{} functions have blocks before their entry point:\n",
earlyBlocks.size());
viewStrings(earlyBlocks, [](Function const *f) {
if(f->name().size() > 0)
return fmt::format(" {}", f->name());
else
return fmt::format(" fun.{:08x}", f->address());
});
viewStrings(
earlyBlocks,
[](Function const *f) {
if(f->name().size() > 0)
return fmt::format("{}", f->name());
else
return fmt::format("fun.{:08x}", f->address());
},
opts);
}
if(noReturn.size() > 0) {
fmt::print("{} functions do not return:\n", noReturn.size());
viewStrings(noReturn, [](Function const *f) {
if(f->name().size() > 0)
return fmt::format(" {}", f->name());
else
return fmt::format(" fun.{:08x}", f->address());
});
viewStrings(
noReturn,
[](Function const *f) {
if(f->name().size() > 0)
return fmt::format("{}", f->name());
else
return fmt::format("fun.{:08x}", f->address());
},
opts);
}
}
@ -185,6 +171,11 @@ void _afs(Session &session, _af_args &args)
if(!b)
return FxOS_log(ERR, "No current binary!\n");
if(args.consistency) {
_af_consistency(*b);
return;
}
OS *os = b->OSAnalysis();
if(!os)
return FxOS_log(ERR, "afs: No OS analysis, cannot enumerate syscalls");

View File

@ -21,6 +21,7 @@
struct _d_args
{
std::variant<long, Range> location;
bool analyze = false;
};
static _d_args parse_d(Session &session, Parser &parser)
@ -30,12 +31,15 @@ static _d_args parse_d(Session &session, Parser &parser)
if(!session.currentBinary())
return {};
parser.option("-a", [&args](Parser &) { args.analyze = true; });
parser.accept_options();
args.location = parser.expr_or_range(session.currentBinary());
parser.end();
return args;
}
void _d(Session &session, std::variant<long, Range> location)
void _d(Session &session, std::variant<long, Range> location, bool analyze)
{
Binary *b = session.currentBinary();
if(!b)
@ -86,11 +90,13 @@ void _d(Session &session, std::variant<long, Range> location)
Function f(*b, address);
if(f.exploreFunctionAt(address)) {
f.runAnalysis();
if(analyze)
f.runAnalysis();
ViewAssemblyOptions opts;
opts.binary = b;
opts.printFunctionAnalysis = true;
opts.printFunctionAnalysis = analyze;
opts.showInstructionDetails = !analyze;
viewAssemblyFunction(f, &opts);
}
}
@ -104,17 +110,17 @@ static ShellCommand _d_cmd(
"d",
[](Session &s, Parser &p) {
auto args = parse_d(s, p);
_d(s, args.location);
_d(s, args.location, args.analyze);
},
[](Session &s, Parser &p) { parse_d(s, p); }, "Disassemble", R"(
d [<address|range>]
d [-a] <address>
d <range>
Disassembles code starting at the specified address, exploring branches until
function terminators, invalid instructions, or dynamically-computed jumps. The
default address is $ (the cursor of the current virtual space).
Disassembles code. In the first form, explores a function at the provided
address (without modifying the binary), exploring branches until function
terminators, invalid instructions or dynamically-computed jumps. If -a is
specified, also perform static analysis.
The following disassembler passes are run:
cfg Explores the code reachable from the start address
pcrel Computes PC-relative addresses (eg mov.l, mova, bf, bra...)
syscall Annotates uses of syscall table entries with the syscall number
In the second form, disassembles instructions in the specified range. (This is
an older feature with less support for advanced features.)
)");