start with the analysis passes
-> The cfg pass loads the function into memory, annotates leaders and jumps, and resolves delay slots. -> The pcrel pass currently computes locations for pc-relative moves and jumps, but does not yet compute the pc-relative moved data. -> The print pass displays the results of analysis with various layout and formatting options.
This commit is contained in:
parent
c499ca1f90
commit
4d9edecad9
2
Makefile
2
Makefile
|
@ -78,7 +78,7 @@ bin/:
|
|||
# Dependency generation
|
||||
#
|
||||
|
||||
include $(wildcard build/*/*.d)
|
||||
include $(wildcard build/*/*.d build/*/*/*.d)
|
||||
|
||||
.PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean
|
||||
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include <fxos/os.h>
|
||||
|
||||
#include <fxos/disasm-passes/cfg.h>
|
||||
#include <fxos/disasm-passes/pcrel.h>
|
||||
#include <fxos/disasm-passes/print.h>
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
|
@ -221,7 +223,7 @@ int main_disassembly(int argc, char **argv)
|
|||
{
|
||||
int error=0, option=0, mpu='4';
|
||||
std::vector<std::string> passes {
|
||||
"cfg", "pcrel", "constprop", "syscall", "regs"
|
||||
"cfg", "pcrel", "constprop", "syscall", "regs", "print"
|
||||
};
|
||||
std::string file;
|
||||
|
||||
|
@ -242,18 +244,20 @@ int main_disassembly(int argc, char **argv)
|
|||
mpu = option;
|
||||
break;
|
||||
case 'p':
|
||||
{
|
||||
passes.clear();
|
||||
|
||||
{
|
||||
std::istringstream in(optarg);
|
||||
std::string pass;
|
||||
std::istringstream in(optarg);
|
||||
std::string pass;
|
||||
|
||||
while(std::getline(in, pass, ',')) {
|
||||
passes.push_back(pass);
|
||||
}
|
||||
while(std::getline(in, pass, ',')) {
|
||||
passes.push_back(pass);
|
||||
}
|
||||
|
||||
if(!passes.size()) error = 1, log(ERR "no pass specified");
|
||||
if(passes.back() != "print") passes.push_back("print");
|
||||
break;
|
||||
}
|
||||
case 'f':
|
||||
file = optarg;
|
||||
break;
|
||||
|
@ -305,16 +309,43 @@ int main_disassembly(int argc, char **argv)
|
|||
|
||||
log(LOG "Disassembling target %s at %s", tname, refstr);
|
||||
|
||||
for(auto pass: passes)
|
||||
try
|
||||
{
|
||||
log(LOG "Running pass %s", pass);
|
||||
|
||||
if(pass == "cfg")
|
||||
for(auto pass: passes)
|
||||
{
|
||||
CfgPass p(disasm);
|
||||
p.run(ref);
|
||||
auto start = timer_start();
|
||||
log(LOG "Running pass %s...\\", pass);
|
||||
|
||||
if(pass == "cfg")
|
||||
{
|
||||
CfgPass p(disasm);
|
||||
p.run(ref);
|
||||
}
|
||||
else if(pass == "pcrel")
|
||||
{
|
||||
PcrelPass p(disasm);
|
||||
p.run(ref);
|
||||
}
|
||||
else if(pass == "print")
|
||||
{
|
||||
PrintPass p(disasm);
|
||||
p.hide_resolved_pcjump = true;
|
||||
p.hide_resolved_pcrel = true;
|
||||
p.hide_movpc_address =
|
||||
PrintPass::Hide_MovPC_Region;
|
||||
p.run();
|
||||
}
|
||||
log(LOG "%s", timer_format(timer_end(start)));
|
||||
}
|
||||
}
|
||||
catch(LangError &e)
|
||||
{
|
||||
log(ERR "%08x: %s", e.addr(), e.what());
|
||||
}
|
||||
catch(AddrError &e)
|
||||
{
|
||||
log(ERR "%08x[%d]: %s", e.addr(), e.size(), e.what());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -8,6 +8,28 @@
|
|||
// This is the main exploration pass. Other passes do not typically load new
|
||||
// instructions from the underlying disassembly. Straightforward passes such as
|
||||
// [print] iterate on instructions loaded by this pass.
|
||||
//
|
||||
// The main gimmick of this pass is to "resolve delay slots" by forcing down
|
||||
// the properties of delayed instructions into their respective delay slots.
|
||||
// For instance, in
|
||||
// jmp @r0
|
||||
// mov #1, r4
|
||||
// the jump is delayed until after the move. To handle this, fxos makes the jmp
|
||||
// a no-op and applies dual move-jump semantics to the mov below it.
|
||||
//
|
||||
// This could be tricky for the abstract interpreter because the jump target
|
||||
// has to be computed with the environment before the jmp, which is not
|
||||
// available when considering the mov. Luckily all delayed jumps are state
|
||||
// no-ops so the state before the mov can be used instead.
|
||||
//
|
||||
// Note that jumping into a delay slot will activate the jump in fxos, which is
|
||||
// not the actual behavior of the processor. fxos usually complains about the
|
||||
// crazy compiler when this occurs. Note that if it happens but we don't know
|
||||
// that it's a delay slot (ie. the instruction from above is never executed in
|
||||
// the current function), then everything's fine.
|
||||
//
|
||||
// Take-home message: delay slots are a pain to analyze, so we get rid of them
|
||||
// as soon as possible and proceed with normal semantics.
|
||||
//---
|
||||
|
||||
#ifndef LIBFXOS_DISASM_PASSES_CFG_H
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
//---
|
||||
// fxos.disasm-passes.pcrel: Resolution of PC-relative addresses
|
||||
//
|
||||
// This pass computes all PC-relatives addresses used in fixed-target jumps and
|
||||
// in PC-relative mov instructions. It does so by setting the location of each
|
||||
// PC-relative argument to the associated constant value.
|
||||
//---
|
||||
|
||||
#ifndef FXOS_DISASM_PASSES_PCREL_H
|
||||
#define FXOS_DISASM_PASSES_PCREL_H
|
||||
|
||||
#include <fxos/disassembly.h>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
class PcrelPass: public DisassemblyPass
|
||||
{
|
||||
public:
|
||||
PcrelPass(Disassembly &disasm);
|
||||
void analyze(uint32_t pc, ConcreteInstruction &inst) override;
|
||||
};
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
||||
#endif /* FXOS_DISASM_PASSES_PCREL_H */
|
|
@ -0,0 +1,55 @@
|
|||
//---
|
||||
// fxos.disasm-passes.print: Concrete program printer
|
||||
//
|
||||
// This pass prints the program and some to all of its annotations, depending
|
||||
// on the specified parameters.
|
||||
//---
|
||||
|
||||
#ifndef LIBFXOS_DISASM_PASSES_PRINT_H
|
||||
#define LIBFXOS_DISASM_PASSES_PRINT_H
|
||||
|
||||
#include <fxos/disassembly.h>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
class PrintPass: public DisassemblyPass
|
||||
{
|
||||
public:
|
||||
PrintPass(Disassembly &disasm);
|
||||
void analyze(uint32_t pc, ConcreteInstruction &inst) override;
|
||||
|
||||
/* This pass uses another entry method that starts at the instruction
|
||||
with the smallest address loaded in the disassembly, then goes down.
|
||||
The standard run() is not available. */
|
||||
void run(void);
|
||||
|
||||
//---
|
||||
// Print pass parameters
|
||||
//---
|
||||
|
||||
/* In jump instructions, hide the raw value "pc+<disp>" if the target
|
||||
address has been computed */
|
||||
bool hide_resolved_pcjump;
|
||||
/* In PC-relative move instructions, hide the raw value "@(<disp>,pc)"
|
||||
of the argument if the target address has been computed */
|
||||
bool hide_resolved_pcrel;
|
||||
|
||||
/* In PC-relative move instructions, hide the target address of the
|
||||
argument... under conditions */
|
||||
enum {
|
||||
/* Always show the accessed address */
|
||||
Hide_MovPC_Never,
|
||||
/* Hide the address if it's in the same memory region as the
|
||||
instruction doing the move */
|
||||
Hide_MovPC_Region,
|
||||
/* Always hide the address */
|
||||
Hide_MovPC_Always,
|
||||
|
||||
} hide_movpc_address;
|
||||
|
||||
/* TODO: More print pass parameters */
|
||||
};
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
||||
#endif /* LIBFXOS_DISASM_PASSES_PRINT_H */
|
|
@ -30,17 +30,18 @@ struct ConcreteInstructionArg
|
|||
ConcreteInstructionArg();
|
||||
|
||||
//---
|
||||
// Data set by the abstract interpretation passes
|
||||
// Data set by the <pcrel> pass and abstract interpretater
|
||||
//---
|
||||
|
||||
/* Location in CPU or memory, if that can be determined */
|
||||
std::optional<Location> loc;
|
||||
/* Alternatively, data type, which can sometimes be determined uniquely
|
||||
even if the location is not constant */
|
||||
std::optional<DataType> type;
|
||||
Location location;
|
||||
/* The pointed value, if it can be determined. Valid only if the
|
||||
location is specified; under some conditions the location can't be
|
||||
determined by the type can. */
|
||||
DataValue value;
|
||||
|
||||
//---
|
||||
// Data set by the syscall and regs passes
|
||||
// Data set by the <syscall> and <regs> passes
|
||||
//---
|
||||
|
||||
/* If the value is a syscall address, the syscall's id */
|
||||
|
@ -52,50 +53,41 @@ struct ConcreteInstructionArg
|
|||
/* A loaded and annotated instruction. */
|
||||
struct ConcreteInstruction
|
||||
{
|
||||
ConcreteInstruction(Instruction &inst);
|
||||
ConcreteInstruction(Instruction const &inst);
|
||||
|
||||
/* What instruction it is */
|
||||
Instruction &inst;
|
||||
/* What instruction this is. Note that this does not determine all the
|
||||
properties below. Placement and delay slots greatly alter them. */
|
||||
Instruction const &inst;
|
||||
|
||||
/* Argument information (contains data set by several passes) */
|
||||
ConcreteInstructionArg args[2];
|
||||
|
||||
//---
|
||||
// Data set by the pcrel pass
|
||||
//---
|
||||
|
||||
/* Jump targets, used for jump instructions only. The target might
|
||||
either be that of an inconditional jump, or the non-trivial target
|
||||
of a conditional jump. In many situations the jump is forced on a
|
||||
general instruction by a preceding delated branch. */
|
||||
uint32_t jmptarget;
|
||||
/* Whether the instruction is terminal. Be careful, as this attribute
|
||||
is often forced onto delayed slot instructions. It is thus NOT the
|
||||
same as isterminal(), which tells whether the mnemonic implies a
|
||||
function exit. Said exit is generally delayed. */
|
||||
bool terminal;
|
||||
|
||||
//---
|
||||
// Data set by the cfg pass
|
||||
//---
|
||||
|
||||
/* Whether this instruction is a basic block leader */
|
||||
/* Whether this instruction is a leader. This is always set by another
|
||||
instruction jumping into this one. */
|
||||
bool leader;
|
||||
/* Whether this instruction is in a delay slot. This is always set by
|
||||
the preceding delayed instruction. */
|
||||
bool delayslot;
|
||||
|
||||
//---
|
||||
// Methods and utilities
|
||||
//---
|
||||
/* Whether this instruction is:
|
||||
-> Terminal, ie. has no successors and is the end of the function.
|
||||
-> An unconditional jump of target [jmptarget].
|
||||
-> A conditional jump that can hit [jmptarget] and pc+2.
|
||||
If delayslot==false, these attributes are set when analyzing this
|
||||
instruction. If delayslot==true, they are set when the preceding
|
||||
delayed instruction is analyzed. */
|
||||
bool terminal;
|
||||
bool jump;
|
||||
bool condjump;
|
||||
|
||||
bool isterminal() const noexcept { return inst.isterminal(); }
|
||||
bool isjump() const noexcept { return inst.isjump(); }
|
||||
bool iscondjump() const noexcept { return inst.iscondjump(); }
|
||||
bool isdelayed() const noexcept { return inst.isdelayed(); }
|
||||
/* The jump target, used if jump==true or condjump==true. */
|
||||
uint32_t jmptarget;
|
||||
};
|
||||
|
||||
/* Short aliases */
|
||||
using CI = ConcreteInstruction;
|
||||
using CIArg = ConcreteInstructionArg;
|
||||
|
||||
/* Disassembly interface that automatically loads code from a target */
|
||||
class Disassembly
|
||||
{
|
||||
|
@ -112,6 +104,16 @@ public:
|
|||
loaded and initialized if it had not been read before. */
|
||||
ConcreteInstruction &readins(uint32_t pc);
|
||||
|
||||
/* For other access patterns (careful with write accesses!) */
|
||||
std::map<uint32_t, ConcreteInstruction> &instructions() noexcept {
|
||||
return m_instructions;
|
||||
}
|
||||
|
||||
/* Access to memory */
|
||||
Target &target() noexcept {
|
||||
return m_target;
|
||||
}
|
||||
|
||||
private:
|
||||
/* Underlying target */
|
||||
Target &m_target;
|
||||
|
@ -158,19 +160,6 @@ private:
|
|||
std::set<uint32_t> m_seen;
|
||||
};
|
||||
|
||||
|
||||
class PcrelPass: public DisassemblyPass
|
||||
{
|
||||
PcrelPass(Disassembly &disasm);
|
||||
void analyze(uint32_t pc, ConcreteInstruction &inst) override;
|
||||
};
|
||||
|
||||
class PrintPass: public DisassemblyPass
|
||||
{
|
||||
PrintPass(Disassembly &disasm);
|
||||
void analyze(uint32_t pc, ConcreteInstruction &inst) override;
|
||||
};
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
||||
#endif /* LIBFXOS_DISASSEMBLY_H */
|
||||
|
|
|
@ -6,16 +6,18 @@
|
|||
#define FXOS_DOMAINS_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
/* An abstract domain over any user-defined lattice. */
|
||||
/* An abstract domain over a user-defined lattice. */
|
||||
template<typename T>
|
||||
class AbstractDomain
|
||||
{
|
||||
public:
|
||||
/* Bottom and Top constants */
|
||||
virtual T bottom() const noexcept = 0;
|
||||
virtual T top() const noexcept = 0;
|
||||
virtual T top() const noexcept = 0;
|
||||
|
||||
/* Construct abstract value from integer constant */
|
||||
virtual T constant(uint32_t value) const noexcept = 0;
|
||||
|
@ -96,14 +98,32 @@ struct RelConst
|
|||
int32_t ival;
|
||||
uint32_t uval;
|
||||
};
|
||||
|
||||
//---
|
||||
// RelConst methods
|
||||
//---
|
||||
|
||||
/* Default constructors gives zero */
|
||||
RelConst() = default;
|
||||
|
||||
/* Evaluates to true if the location is non-trivial, ie. if it is
|
||||
neither Top nor Bottom. */
|
||||
operator bool () const noexcept;
|
||||
|
||||
/* String representation */
|
||||
std::string str() const noexcept;
|
||||
};
|
||||
|
||||
class RelConstDomain: public AbstractDomain<RelConst>
|
||||
{
|
||||
public:
|
||||
/* Trivial instances */
|
||||
RelConstDomain() = default;
|
||||
|
||||
/* Implementation of the AbstractDomain specification */
|
||||
|
||||
RelConst bottom() const noexcept override;
|
||||
RelConst top() const noexcept override;
|
||||
RelConst top() const noexcept override;
|
||||
|
||||
RelConst constant(uint32_t value) const noexcept override;
|
||||
|
||||
|
|
|
@ -60,6 +60,29 @@ private:
|
|||
char const *m_what;
|
||||
};
|
||||
|
||||
/* Address errors */
|
||||
class AddrError: public std::exception
|
||||
{
|
||||
public:
|
||||
AddrError(uint32_t address, int size, char const *what):
|
||||
m_addr(address), m_size(size), m_what(what) {}
|
||||
|
||||
uint32_t addr() const noexcept {
|
||||
return m_addr;
|
||||
}
|
||||
int size() const noexcept {
|
||||
return m_size;
|
||||
}
|
||||
char const *what() const noexcept override {
|
||||
return m_what;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_addr;
|
||||
int m_size;
|
||||
char const *m_what;
|
||||
};
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
||||
#endif /* LIBFXOS_ERRORS_H */
|
||||
|
|
|
@ -69,7 +69,7 @@ struct Argument
|
|||
StructDeref, /* @(disp,rn) or @(disp,gbr) */
|
||||
ArrayDeref, /* @(r0,rn) or @(r0,gbr) */
|
||||
PcRel, /* @(disp,pc) with 4-alignment correction */
|
||||
PcJump, /* disp */
|
||||
PcJump, /* pc+disp */
|
||||
Imm, /* #imm */
|
||||
};
|
||||
|
||||
|
@ -137,6 +137,8 @@ struct Instruction
|
|||
bool iscondjump() const noexcept;
|
||||
/* Check whether instruction has a delay slot */
|
||||
bool isdelayed() const noexcept;
|
||||
/* Check whether instruction can be used in a delay slot */
|
||||
bool isvaliddelayslot() const noexcept;
|
||||
};
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#define LIBFXOS_SEMANTICS_H
|
||||
|
||||
#include <fxos/lang.h>
|
||||
#include <fxos/domains.h>
|
||||
#include <memory>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
|
@ -21,9 +23,8 @@ namespace FxOS {
|
|||
// Structures struct { <fields> } (mem)
|
||||
//---
|
||||
|
||||
class DataType
|
||||
struct DataType
|
||||
{
|
||||
public:
|
||||
enum DataKind {
|
||||
/* Base types */
|
||||
Integral,
|
||||
|
@ -59,6 +60,8 @@ public:
|
|||
/* The following members are not in the union because they have non-
|
||||
trivial destructors/copy and I don't want to care. */
|
||||
|
||||
/* For array */
|
||||
std::shared_ptr<DataType> arraytype;
|
||||
/* For bit field types */
|
||||
std::vector<Field> fields;
|
||||
/* For struct types */
|
||||
|
@ -66,39 +69,51 @@ public:
|
|||
};
|
||||
|
||||
//---
|
||||
// Location representation
|
||||
// Data values
|
||||
//
|
||||
// The abstract interpreter keeps track of data stored at the following
|
||||
// locations (attribute types in parentheses):
|
||||
// Registers .reg (CpuRegister)
|
||||
// Memory .addr (uint32_t)
|
||||
// MappedModule .addr (uint32_t)
|
||||
// These objects are instances of the types described by DataType.
|
||||
//---
|
||||
|
||||
struct Location
|
||||
struct DataValue
|
||||
{
|
||||
enum LocationType {
|
||||
/* CPU-held registers accessed with instructions */
|
||||
Register,
|
||||
/* Standard randomly-addressable memory */
|
||||
Memory,
|
||||
/* Memory-mapped module registers with specific access */
|
||||
MappedModule,
|
||||
};
|
||||
/* Default constructor, gives undetermined values */
|
||||
DataValue();
|
||||
|
||||
LocationType location;
|
||||
/* Data type affected to the value */
|
||||
DataType const *type;
|
||||
/* Whether the value can be determined. If this boolean is false, the
|
||||
rest of the data must be ignored. */
|
||||
bool determined;
|
||||
|
||||
union {
|
||||
/* For registers: register identifier */
|
||||
CpuRegister reg;
|
||||
/* For memory and mapped modules: addresses */
|
||||
uint32_t addr;
|
||||
/* Unsigned integer (all sizes) and bit fields */
|
||||
uint32_t uinteger;
|
||||
/* Signed integer (all sizes) */
|
||||
int32_t integer;
|
||||
};
|
||||
|
||||
/* String representation */
|
||||
std::string str() const noexcept;
|
||||
/* For arrays (homogeneous) and structures (heterogeneous) */
|
||||
std::vector<DataValue> entries;
|
||||
|
||||
/* Perform a read operation at the specified offset from the start of
|
||||
the object. */
|
||||
uint32_t read(int offset, int size);
|
||||
|
||||
/* Perform a write operation at the specified offset from the start of
|
||||
the object. */
|
||||
void write(int offset, int size, uint32_t contents);
|
||||
};
|
||||
|
||||
//---
|
||||
// Location representation
|
||||
//
|
||||
// The abstract interpreter keeps track of data stored at registers, memory
|
||||
// addresses and mapped modules as long as the exact location fits within the
|
||||
// expressive power of a RelConst.
|
||||
//---
|
||||
|
||||
using Location = RelConst;
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
||||
#endif /* LIBFXOS_SEMANTICS_H */
|
||||
|
|
|
@ -29,10 +29,10 @@ public:
|
|||
various types. (Not a template because of the restriction about
|
||||
template specialization in non-namespaces scopes still in g++.)
|
||||
|
||||
When reading data, provide a virtual address. The addres is saved in
|
||||
the returned object for later printing or inspection. The returned
|
||||
object Addressable<T> automatically converts to T when used, and
|
||||
supports operator & which returns the original address.
|
||||
When reading data, provide a virtual address. The address is saved
|
||||
in the returned object for later printing or inspection. The
|
||||
returned object Addressable<T> automatically converts to T when
|
||||
used, and supports operator & which returns the original address.
|
||||
|
||||
The size parameter is only meaningful for variable-sized types such
|
||||
as string, and ignored for fixed-size types such as integers. If the
|
||||
|
@ -40,7 +40,7 @@ public:
|
|||
throws std::out_of_range. */
|
||||
|
||||
/* Read integers with signed or unsigned extension. These functions do
|
||||
not check alignment, because exceptionnally the processor supports
|
||||
not check alignment, because exceptionally the processor supports
|
||||
unaligned operations (eg. movual.l). */
|
||||
Addressable<int8_t> read_i8 (uint32_t addr) const;
|
||||
Addressable<uint8_t> read_u8 (uint32_t addr) const;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include <fxos/disassembly.h>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <array>
|
||||
|
||||
namespace FxOS {
|
||||
|
@ -26,13 +27,17 @@ void register_instruction(Instruction ins)
|
|||
//---
|
||||
|
||||
ConcreteInstructionArg::ConcreteInstructionArg():
|
||||
loc {}, type {}, syscall_id {-1}
|
||||
value {}, syscall_id {-1}
|
||||
{
|
||||
location = RelConstDomain().bottom();
|
||||
reg_address = -1;
|
||||
}
|
||||
|
||||
ConcreteInstruction::ConcreteInstruction(Instruction &inst):
|
||||
inst {inst}, jmptarget {}, leader {false}
|
||||
ConcreteInstruction::ConcreteInstruction(Instruction const &inst):
|
||||
inst {inst}, args {},
|
||||
leader {false}, delayslot {false},
|
||||
terminal {false}, jump {false}, condjump {false},
|
||||
jmptarget {0xffffffff}
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -76,7 +81,7 @@ uint32_t Disassembly::maxpc()
|
|||
|
||||
ConcreteInstruction &Disassembly::readins(uint32_t pc)
|
||||
{
|
||||
if(pc & 1) throw std::runtime_error("Disassembly::ins_read at odd PC");
|
||||
if(pc & 1) throw std::runtime_error("Disassembly::readins at odd PC");
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -90,7 +95,7 @@ ConcreteInstruction &Disassembly::readins(uint32_t pc)
|
|||
throw std::runtime_error("No instruction for opcode");
|
||||
}
|
||||
|
||||
Instruction &inst = *insmap[opcode];
|
||||
Instruction const &inst = *insmap[opcode];
|
||||
|
||||
ConcreteInstruction ci(inst);
|
||||
m_instructions.emplace(pc, ci);
|
||||
|
@ -127,11 +132,12 @@ void DisassemblyPass::enqueue_next(uint32_t pc)
|
|||
void DisassemblyPass::enqueue_unseen_successors(uint32_t pc,
|
||||
ConcreteInstruction &inst)
|
||||
{
|
||||
if(!inst.isterminal() && !inst.isjump())
|
||||
if(!inst.terminal && !inst.jump)
|
||||
{
|
||||
if(pc == 0x80000078) printf("t%d j%d\n", inst.terminal, inst.jump);
|
||||
if(!m_seen.count(pc + 2)) enqueue(pc + 2);
|
||||
}
|
||||
if(inst.isjump() || inst.iscondjump())
|
||||
if(inst.jump || inst.condjump)
|
||||
{
|
||||
if(!m_seen.count(inst.jmptarget)) enqueue(inst.jmptarget);
|
||||
}
|
||||
|
@ -140,11 +146,11 @@ void DisassemblyPass::enqueue_unseen_successors(uint32_t pc,
|
|||
void DisassemblyPass::enqueue_all_successors(uint32_t pc,
|
||||
ConcreteInstruction &inst)
|
||||
{
|
||||
if(!inst.isterminal() && !inst.isjump())
|
||||
if(!inst.terminal && !inst.jump)
|
||||
{
|
||||
enqueue(pc + 2);
|
||||
}
|
||||
if(inst.isjump() || inst.iscondjump())
|
||||
if(inst.jump || inst.condjump)
|
||||
{
|
||||
enqueue(inst.jmptarget);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#include <fxos/domains.h>
|
||||
#include <fxos/util.h>
|
||||
#include <fxos/lang.h>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace FxOS {
|
||||
|
@ -47,7 +49,7 @@ RelConst RelConstDomain::minus(RelConst r) const noexcept
|
|||
if(r.spe) return r;
|
||||
/* This domain does not support multiplicative coefficients for the
|
||||
base. If the base is non-zero, return Top. */
|
||||
if(r.arg || r.org || r.reg) return top();
|
||||
if(r.base) return top();
|
||||
|
||||
r.ival = -r.ival;
|
||||
return r;
|
||||
|
@ -217,7 +219,7 @@ bool RelConstDomain::cmp(RelConst r1, RelConst r2) const noexcept
|
|||
|
||||
int RelConstDomain::cmpu(RelConst r1, RelConst r2) const noexcept
|
||||
{
|
||||
/* We can't just substract because of overflows (information is lost
|
||||
/* We can't just subtract because of overflows (information is lost
|
||||
because we don't have the V bit) */
|
||||
return (r1.uval > r2.uval) - (r1.uval < r2.uval);
|
||||
}
|
||||
|
@ -227,4 +229,42 @@ int RelConstDomain::cmps(RelConst r1, RelConst r2) const noexcept
|
|||
return (r1.ival > r2.ival) - (r1.ival < r2.ival);
|
||||
}
|
||||
|
||||
//---
|
||||
// Other functions
|
||||
//---
|
||||
|
||||
RelConst::operator bool () const noexcept
|
||||
{
|
||||
return !spe;
|
||||
}
|
||||
|
||||
std::string RelConst::str() const noexcept
|
||||
{
|
||||
using RegName = CpuRegister::CpuRegisterName;
|
||||
|
||||
if(!base && !uval) return "0";
|
||||
if(spe == Bottom) return "Bottom";
|
||||
if(spe == Top) return "Top";
|
||||
|
||||
std::string str;
|
||||
if(arg) str = format("arg%d", arg);
|
||||
if(org) str = format("org_%s", CpuRegister((RegName)org).str());
|
||||
if(reg) str = CpuRegister((RegName)org).str();
|
||||
|
||||
if(!uval) return str;
|
||||
|
||||
if(ival >= -256 && ival < 256)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
if(str.size() && ival > 0) str += "+", v = ival;
|
||||
if(str.size() && ival < 0) str += "-", v = -ival;
|
||||
|
||||
return str + format("%d", v);
|
||||
}
|
||||
else
|
||||
{
|
||||
return str + format("%08x", uval);
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
24
lib/lang.cpp
24
lib/lang.cpp
|
@ -53,13 +53,10 @@ static std::map<Reg,std::string> regnames = {
|
|||
/* Construction from string - pretty slow */
|
||||
CpuRegister::CpuRegister(std::string name)
|
||||
{
|
||||
for(auto it = regnames.begin(); it != regnames.end(); it++)
|
||||
for(auto &it: regnames) if(it.second == name)
|
||||
{
|
||||
if(it->second == name)
|
||||
{
|
||||
m_name = it->first;
|
||||
return;
|
||||
}
|
||||
m_name = it.first;
|
||||
return;
|
||||
}
|
||||
|
||||
throw std::invalid_argument("invalid CpuRegister name");
|
||||
|
@ -159,13 +156,13 @@ std::string Argument::str() const
|
|||
switch(kind)
|
||||
{
|
||||
case Argument::Reg:
|
||||
return format("r%d", base);
|
||||
return base.str();
|
||||
case Argument::Deref:
|
||||
return format("@r%d", base);
|
||||
return format("@%s", base.str());
|
||||
case Argument::PostInc:
|
||||
return format("@r%d+", base);
|
||||
return format("@%s+", base.str());
|
||||
case Argument::PreDec:
|
||||
return format("@-%dr", base);
|
||||
return format("@-%s", base.str());
|
||||
case Argument::StructDeref:
|
||||
return format("@(%d,%s)", disp, base.str().c_str());
|
||||
case Argument::ArrayDeref:
|
||||
|
@ -175,6 +172,8 @@ std::string Argument::str() const
|
|||
return format("@(%d,pc)", disp);
|
||||
case Argument::PcJump:
|
||||
return format("pc+%d", disp);
|
||||
case Argument::Imm:
|
||||
return format("#%d", imm);
|
||||
default:
|
||||
return "(invalid)";
|
||||
}
|
||||
|
@ -263,4 +262,9 @@ bool Instruction::isdelayed() const noexcept
|
|||
return false;
|
||||
}
|
||||
|
||||
bool Instruction::isvaliddelayslot() const noexcept
|
||||
{
|
||||
return !isdelayed() && !isterminal() && !isjump() && !iscondjump();
|
||||
}
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
|
@ -91,7 +91,7 @@ void Library::load(std::string path)
|
|||
|
||||
long long ns = timer_end(start);
|
||||
|
||||
log(LOG "done (%s)", timer_format(ns));
|
||||
log(LOG "%s", timer_format(ns));
|
||||
}
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
|
@ -214,16 +214,16 @@ static Argument make_arg(int token, int opsize, int m, int n, int d, int i)
|
|||
case AT_RMP: return Argument_PostInc(Rm);
|
||||
case AT_RNP: return Argument_PostInc(Rn);
|
||||
case AT_MRN: return Argument_PreDec(Rn);
|
||||
case AT_DRN: return Argument_StructDeref(d, opsize, Rn);
|
||||
case AT_DRM: return Argument_StructDeref(d, opsize, Rm);
|
||||
case AT_DGBR: return Argument_StructDeref(d, opsize, Reg::GBR);
|
||||
case AT_DRN: return Argument_StructDeref(d*opsize, opsize, Rn);
|
||||
case AT_DRM: return Argument_StructDeref(d*opsize, opsize, Rm);
|
||||
case AT_DGBR: return Argument_StructDeref(d*opsize, opsize, Reg::GBR);
|
||||
case AT_R0RN: return Argument_ArrayDeref(Reg::R0, Rn);
|
||||
case AT_R0RM: return Argument_ArrayDeref(Reg::R0, Rm);
|
||||
case AT_R0GBR: return Argument_ArrayDeref(Reg::R0, Reg::GBR);
|
||||
|
||||
case AT_DPC:
|
||||
if(!opsize) err("@(disp,pc) must have a size (.w, .l)");
|
||||
return Argument_PcRel(d, opsize);
|
||||
return Argument_PcRel(d*opsize, opsize);
|
||||
}
|
||||
|
||||
throw std::logic_error("lex asm builds args from bad tokens");
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
//---
|
||||
// fxos.passes.cfg: CFG construction, as used by other passes
|
||||
// fxos.passes.cfg: Control Flow Graph construction
|
||||
//---
|
||||
|
||||
#include <fxos/disasm-passes/cfg.h>
|
||||
#include <fxos/disassembly.h>
|
||||
#include <fxos/errors.h>
|
||||
#include <fxos/log.h>
|
||||
#include <cassert>
|
||||
|
||||
using namespace FxOS::Log;
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
CfgPass::CfgPass(Disassembly &disasm):
|
||||
|
@ -16,21 +19,59 @@ CfgPass::CfgPass(Disassembly &disasm):
|
|||
|
||||
void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci)
|
||||
{
|
||||
std::vector<std::string> jump_mnemonics {
|
||||
"bra", "bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s",
|
||||
};
|
||||
/* Compute the jump target for jump instructions. This is easy because
|
||||
they are all trivially computable. (...If they are not we dub them
|
||||
"terminal" to avoid the computation!) */
|
||||
uint32_t jmptarget = 0xffffffff;
|
||||
|
||||
/* Set the jmptarget fields whenever needed. This is easy because jump
|
||||
instructions have trivially computable destinations */
|
||||
for(auto mnemonic: jump_mnemonics)
|
||||
if(ci.inst.isjump() || ci.inst.iscondjump())
|
||||
{
|
||||
if(ci.inst.mnemonic != mnemonic) continue;
|
||||
auto &args = ci.inst.args;
|
||||
|
||||
if(args.size() != 1 || args[0].kind != Argument::PcJump)
|
||||
throw LangError(pc, "invalid jump instruction");
|
||||
|
||||
ci.jmptarget = pc + args[0].disp;
|
||||
jmptarget = (pc+4) + args[0].disp;
|
||||
|
||||
/* Make the target of the jump a leader */
|
||||
ConcreteInstruction &target = m_disasm.readins(jmptarget);
|
||||
target.leader = true;
|
||||
|
||||
/* Check that it's not in a delay slot */
|
||||
if(target.delayslot)
|
||||
throw LangError(pc, "jump into a delay slot!");
|
||||
}
|
||||
|
||||
/* If this instruction is in a delay slot, check its type. A valid
|
||||
delay slot has no branching properties on its own, so nothing new to
|
||||
set in the properties. */
|
||||
if(ci.delayslot)
|
||||
{
|
||||
if(!ci.inst.isvaliddelayslot())
|
||||
throw LangError(pc, "invalid delay slot");
|
||||
}
|
||||
/* Handle normal instructions */
|
||||
else if(!ci.inst.isdelayed())
|
||||
{
|
||||
ci.terminal = ci.inst.isterminal();
|
||||
ci.jump = ci.inst.isjump();
|
||||
ci.condjump = ci.inst.iscondjump();
|
||||
ci.jmptarget = jmptarget;
|
||||
}
|
||||
/* Create a new delay slot */
|
||||
else
|
||||
{
|
||||
ConcreteInstruction &slot = m_disasm.readins(pc+2);
|
||||
if(slot.leader)
|
||||
throw LangError(pc+2, "leader in a delay slot!");
|
||||
if(!slot.inst.isvaliddelayslot())
|
||||
throw LangError(pc+2, "invalid delay slot");
|
||||
|
||||
slot.delayslot = true;
|
||||
slot.terminal = ci.inst.isterminal();
|
||||
slot.jump = ci.inst.isjump();
|
||||
slot.condjump = ci.inst.iscondjump();
|
||||
slot.jmptarget = jmptarget;
|
||||
}
|
||||
|
||||
enqueue_unseen_successors(pc, ci);
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
//---
|
||||
// fxos.passes.pcrel: PC-relative addressing resolution
|
||||
// fxos.passes.pcrel: Resolution of PC-relative addresses
|
||||
//---
|
||||
|
||||
#include <fxos/disassembly.h>
|
||||
#include <fxos/disasm-passes/pcrel.h>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
|
@ -13,6 +13,28 @@ PcrelPass::PcrelPass(Disassembly &disasm):
|
|||
|
||||
void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci)
|
||||
{
|
||||
Instruction const &i = ci.inst;
|
||||
|
||||
for(size_t n = 0; n < i.args.size(); n++)
|
||||
{
|
||||
Argument const &a = i.args[n];
|
||||
ConcreteInstructionArg &ca = ci.args[n];
|
||||
|
||||
if(a.kind == Argument::PcRel)
|
||||
{
|
||||
uint32_t target = (pc & ~(a.opsize - 1)) + 4 + a.disp;
|
||||
ca.location = RelConstDomain().constant(target);
|
||||
|
||||
/* Also compute the value */
|
||||
uint32_t value = m_disasm.target().
|
||||
}
|
||||
else if(a.kind == Argument::PcJump)
|
||||
{
|
||||
uint32_t target = pc + 4 + a.disp;
|
||||
ca.location = RelConstDomain().constant(target);
|
||||
}
|
||||
}
|
||||
|
||||
enqueue_unseen_successors(pc, ci);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// fxos.passes.print: Print disassembly
|
||||
//---
|
||||
|
||||
#include <fxos/disasm-passes/print.h>
|
||||
#include <fxos/disassembly.h>
|
||||
|
||||
namespace FxOS {
|
||||
|
@ -9,12 +10,60 @@ namespace FxOS {
|
|||
PrintPass::PrintPass(Disassembly &disasm):
|
||||
DisassemblyPass(disasm)
|
||||
{
|
||||
/* Default parameter set */
|
||||
hide_resolved_pcjump = false;
|
||||
hide_resolved_pcrel = false;
|
||||
hide_movpc_address = Hide_MovPC_Never;
|
||||
}
|
||||
|
||||
void PrintPass::run(void)
|
||||
{
|
||||
for(auto &pair: m_disasm.instructions())
|
||||
{
|
||||
analyze(pair.first, pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci)
|
||||
{
|
||||
std::cout << ci.inst.mnemonic << "\n";
|
||||
enqueue_next(pc);
|
||||
Instruction const &i = ci.inst;
|
||||
|
||||
/* Mnemonic */
|
||||
|
||||
static std::map<int, std::string> suffixes = {
|
||||
{ 1, ".b" }, { 2, ".w" }, { 4, ".l" } };
|
||||
|
||||
std::string mnemonic = i.mnemonic + suffixes[i.opsize];
|
||||
if(i.args.size())
|
||||
mnemonic += std::string(8 - mnemonic.size(), ' ');
|
||||
|
||||
printf(" %08x: %04x %s", pc, ci.inst.opcode, mnemonic.c_str());
|
||||
|
||||
/* Arguments */
|
||||
|
||||
for(size_t n = 0; n < i.args.size(); n++)
|
||||
{
|
||||
auto &a = i.args[n];
|
||||
Location &l = ci.args[n].location;
|
||||
|
||||
if(n) printf(", ");
|
||||
|
||||
if(a.kind == Argument::PcJump && l && hide_resolved_pcjump)
|
||||
{
|
||||
printf("<%s>", l.str().c_str());
|
||||
}
|
||||
else if(a.kind == Argument::PcRel && l && hide_resolved_pcrel)
|
||||
{
|
||||
printf("<%s>", l.str().c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%s", a.str().c_str());
|
||||
if(l) printf(" <%s>", l.str().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
|
@ -2,4 +2,92 @@
|
|||
|
||||
namespace FxOS {
|
||||
|
||||
DataValue::DataValue():
|
||||
type(nullptr), determined(false)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t DataValue::read(int offset, int size)
|
||||
{
|
||||
if(offset < 0 || size < 0 || (type->size && offset+size >= type->size))
|
||||
throw std::logic_error("Invalid read into simulated data");
|
||||
if(!size || size & (size - 1))
|
||||
throw std::logic_error("Simulated read not a power of 2");
|
||||
|
||||
if(type->kind==DataType::Integral || type->kind==DataType::BitField)
|
||||
{
|
||||
int shift = (4 - size - offset) << 3;
|
||||
uint32_t u = uinteger >> shift;
|
||||
return (size == 4) ? u :
|
||||
(size == 2) ? (uint16_t)u :
|
||||
(uint8_t)u;
|
||||
}
|
||||
else if(type->kind == DataType::Array)
|
||||
{
|
||||
int elsize = type->arraytype->size;
|
||||
int index = offset / elsize;
|
||||
|
||||
if(index >= (int)entries.size())
|
||||
throw std::logic_error("Read out of array bounds");
|
||||
|
||||
/* Will throw if reading from two entries simultaneously */
|
||||
return entries[index].read(offset % elsize, size);
|
||||
}
|
||||
else if(type->kind == DataType::Array)
|
||||
{
|
||||
int index = 0;
|
||||
while(offset >= type->attributes[index].size)
|
||||
{
|
||||
index++;
|
||||
offset -= type->attributes[index].size;
|
||||
}
|
||||
|
||||
return entries[index].read(offset, size);
|
||||
}
|
||||
|
||||
throw std::logic_error("Read into unknown DataValue kind");
|
||||
}
|
||||
|
||||
void DataValue::write(int offset, int size, uint32_t contents)
|
||||
{
|
||||
if(offset < 0 || size < 0 || (type->size && offset+size >= type->size))
|
||||
throw std::logic_error("Invalid write into simulated data");
|
||||
if(!size || size & (size - 1))
|
||||
throw std::logic_error("Simulated write not a power of 2");
|
||||
|
||||
if(type->kind==DataType::Integral || type->kind==DataType::BitField)
|
||||
{
|
||||
uint32_t mask = (size == 1) ? 0xff :
|
||||
(size == 2) ? 0xffff :
|
||||
0xffffffff;
|
||||
|
||||
int shift = (4 - size - offset) << 3;
|
||||
|
||||
uinteger = (uinteger & ~(mask << shift)) | (contents << shift);
|
||||
}
|
||||
else if(type->kind == DataType::Array)
|
||||
{
|
||||
int elsize = type->arraytype->size;
|
||||
int index = offset / elsize;
|
||||
|
||||
if(index >= (int)entries.size())
|
||||
throw std::logic_error("Write out of array bounds");
|
||||
|
||||
entries[index].write(offset % elsize, size, contents);
|
||||
}
|
||||
else if(type->kind == DataType::Array)
|
||||
{
|
||||
int index = 0;
|
||||
while(offset >= type->attributes[index].size)
|
||||
{
|
||||
index++;
|
||||
offset -= type->attributes[index].size;
|
||||
}
|
||||
|
||||
entries[index].write(offset, size, contents);
|
||||
}
|
||||
|
||||
throw std::logic_error("Write into unknown DataValue kind");
|
||||
}
|
||||
|
||||
} /* namespace FxOS */
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <fxos/target.h>
|
||||
#include <fxos/errors.h>
|
||||
#include <fxos/os.h>
|
||||
#include <cstring>
|
||||
|
||||
|
@ -150,7 +151,7 @@ char const *Target::translate(uint32_t addr, int size) const
|
|||
catch(std::out_of_range &e) {}
|
||||
}
|
||||
|
||||
throw std::out_of_range("Out of target bindings");
|
||||
throw AddrError(addr, size, "out of target bindings");
|
||||
}
|
||||
|
||||
uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern,
|
||||
|
@ -159,7 +160,7 @@ uint32_t Target::search(uint32_t start, uint32_t end, void const *pattern,
|
|||
uint32_t occurrence;
|
||||
if(end < start || !covers(start, end - start))
|
||||
{
|
||||
throw std::out_of_range("Out of target bindings");
|
||||
throw AddrError(start, end-start, "out of target bindings");
|
||||
}
|
||||
|
||||
for(auto it = m_bindings.crbegin(); it != m_bindings.crend(); it++)
|
||||
|
|
Loading…
Reference in New Issue