fxos: binary-based passes, print pass -> basic asm view

This commit is contained in:
Lephenixnoir 2023-10-07 12:35:16 +02:00
parent b7569df076
commit f2c1ce50fe
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
19 changed files with 442 additions and 409 deletions

View File

@ -42,11 +42,11 @@ set(fxos_core_SOURCES
lib/os.cpp
lib/passes/cfg.cpp
lib/passes/pcrel.cpp
lib/passes/print.cpp
lib/passes/syscall.cpp
lib/project.cpp
lib/semantics.cpp
lib/symbols.cpp
lib/view/assembly.cpp
lib/vspace.cpp
lib/ai/RelConst.cpp

View File

@ -64,8 +64,19 @@ struct Binary
multiple objects with the same name, returns an arbitrary one. */
std::optional<u32> objectAddress(std::string const &name) const;
/* Return the address of an object defined at the specified address if
there is one, nullptr otherwise. If multiple objects are defined at the
specified address, an arbitrary one is returned. */
BinaryObject *objectAt(u32 address);
BinaryObject const *objectAt(u32 address) const;
/* Returns the list of all objects defined at the specified address. */
std::vector<BinaryObject *> objectsAt(u32 address);
std::vector<BinaryObject const *> objectsAt(u32 address) const;
/* Locate all objects that intersect an address. */
std::vector<BinaryObject *> objectsCovering(u32 address);
std::vector<BinaryObject const *> objectsCovering(u32 address) const;
private:
VirtualSpace m_vspace;

View File

@ -23,10 +23,12 @@
#include <set>
#include <map>
#include <queue>
#include <optional>
namespace FxOS {
class VirtualSpace;
class Binary;
/* Register an instruction. This is called by loader functions from the asm
table lexer. [inst] must have its opcode field set. */
@ -35,6 +37,9 @@ void register_instruction(AsmInstruction const &inst);
/* Lex and register an assembly instruction table. */
int load_instructions(Buffer const &file);
/* Map of all decodable instructions. */
extern std::array<std::optional<AsmInstruction>, 65536> insmap;
//---
// Dynamic information on instructions
//---
@ -231,17 +236,17 @@ struct Disassembly
class DisassemblyPass
{
public:
DisassemblyPass(Disassembly &disasm);
DisassemblyPass(Binary &binary);
/* Underlying disassembly */
Disassembly &m_disasm;
Binary &m_binary;
};
/* A disassembly pass that observes each function independently */
class FunctionPass: public DisassemblyPass
{
public:
FunctionPass(Disassembly &disasm);
FunctionPass(Binary &binary);
/* Analyze the whole disassembly */
bool analyzeAllFunctions();
@ -267,7 +272,7 @@ private:
class InstructionPass: public FunctionPass
{
public:
InstructionPass(Disassembly &disasm);
InstructionPass(Binary &binary);
/* If set, this pass loads instructions from the disassembly automatically.
This is useful for passes that explore new functions. By default,

View File

@ -53,7 +53,7 @@ namespace FxOS {
class CfgPass: public InstructionPass
{
public:
CfgPass(Disassembly &disasm);
CfgPass(Binary &binary);
bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override;
/* Explore a new function at the specified address. This method creates the

View File

@ -27,7 +27,7 @@ namespace FxOS {
class PcrelPass: public InstructionPass
{
public:
PcrelPass(Disassembly &disasm);
PcrelPass(Binary &binary);
bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override;
};

View File

@ -1,115 +0,0 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/passes/print: Disassembly printer
//
// This pass prints the program and adds annotations depending on a number of
// customizable boolean parameters.
//
// Data for an instruction, and arguments in particular, might have a large
// number of equivalent representations, depending on how much information was
// added during disassembly.
//
// The main mechanic of this pass is to define *promotions* which allow high-
// level information to be added or to replace low-level data. For instance, an
// @(disp,pc) argument could promote to a statically-computed address, which
// could promote to its known pointed value in the case of a read, which could
// itself promote to a symbol or syscall number.
//
// Each promotion opportunity has 3 possible settings:
// - Never: the higher-level information is not shown.
// - Append: the higher-level information is shown after the low-level one.
// - Promote: the higher-level information replaces the low-level one.
//
// For example, by default @(disp,pc) is set to Promote to statically-computed
// addresses, their values, and syscall numbers, but syscall names are only set
// to Append. Therefore, a mov.l @(disp,pc) which loads the address of syscall
// %ace on an fx-series model (which is memcp) might show as
//
// mov.l %ace memcmp, r3
//
// where the first element has been promoted twice and the second appended.
//---
#ifndef FXOS_PASSES_PRINT_H
#define FXOS_PASSES_PRINT_H
#include <fxos/disassembly.h>
#include <fxos/symbols.h>
namespace FxOS {
class OS;
class PrintPass: public InstructionPass
{
public:
PrintPass(Disassembly &disasm);
bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override;
//---
// Print pass parameters
//---
/* Promotion parameters. Default is always to append. */
enum Promotion {
/* Never promote */
Never = 1,
/* Promote but keep the lower-level information */
Append = 0,
/* Promote and hide the lower-level information */
Promote = 2,
};
/** In the following, promote_x always means promote *to x* **/
/* In jumps, promote "pc+<disp>" to the target address */
int promote_pcjump_loc;
/* In a PC-relative mov, promote "@(<disp>,pc)" to computed address */
int promote_pcrel_loc;
/* In a PC-relative mov, promote address to pointed value */
int promote_pcrel_value;
/* Promote an integer to a syscall number */
int promote_syscall;
/* Promote a syscall number to a syscall name */
int promote_syscallname;
/* Promote an integer to a symbol */
int promote_symbol;
/* In a mova, promote "pc+<disp>" to the computed address */
int promote_pcaddr_loc;
/* TODO: More print pass parameters */
private:
/* Symbol tables to look up names */
std::vector<std::reference_wrapper<SymbolTable const>> m_symtables;
/* Query symbol tables, most recent first */
std::optional<std::string> symquery(Symbol::Type type, uint32_t value);
/* OS for the target, to mark syscalls before instructions */
OS *m_os;
/* Last printed address (for ellipses) */
uint32_t m_last_address;
/** Internal promotion tree printers **/
void queue(std::string, bool = false);
void queue_flush();
std::vector<std::string> m_messages;
void pcjumploc(Argument const &);
void pcrelloc(Argument const &);
void pcrelval(Argument const &);
void syscall(Argument const &);
void syscallname(Argument const &);
void symbol(Argument const &);
void pcaddrloc(Argument const &);
};
} /* namespace FxOS */
#endif /* FXOS_PASSES_PRINT_H */

View File

@ -22,7 +22,7 @@ namespace FxOS {
class SyscallPass: public InstructionPass
{
public:
SyscallPass(Disassembly &disasm, OS *os);
SyscallPass(Binary &binary, OS *os);
bool analyzeInstruction(uint32_t pc, OldInstruction &inst) override;
private:

View File

@ -0,0 +1,105 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
// fxos/view/assembly: Assembly code visualization
//
// This header provides a configurable assembly code pretty-printer. Options
// are provided to select between equivalent representations of arguments.
//
// Code is usually printed for a function. On-the-fly disassembly of non-
// functions is supported by either:
// - Building a function rooted at a specified address with a CFG analysis;
// - Building a function with a single block covering a specified interval.
//
// # Visualizing instructions
//
// The main mechanic is the _promotion_ of low-level information to a higher
// level. For instance, a @(disp,pc) argument can promote to its statically-
// determined taret, which can promote to its pointer value for a ROM read,
// which might itself be an object address or a syscall number.
//
// Each promotion thus has 3 settings:
// - Never: the higher-level information is not shown.
// - Append: the higher-level information is shown after the low-level one.
// - Promote: the higher-level information replaces the low-level one.
//
// For example, by default @(disp,pc) promotes to the target address, accessed
// value, and syscall number, but syscall names are set to Append. Hence, a
// mov.l @(disp,pc) loading the address of syscall %ace on FX (which is memcp)
// into r3 will show as "mov.l %ace memcmp, r3".
//
// # Visualizing blocks and functions
// TODO: CFG layout and rendering algorithms
//
// The rendering is split into three steps:
// 1. Generating text for each instruction or object
// 2. Grouping by basic block
// 3. Laying out basic blocks as a graph
//---
#ifndef FXOS_VIEW_ASSEMBLY_H
#define FXOS_VIEW_ASSEMBLY_H
#include <fxos/util/types.h>
#include <fxos/memory.h>
namespace FxOS {
class OS;
struct Binary;
struct Function;
struct BasicBlock;
struct Instruction;
struct ViewAssemblyOptions
{
enum Promotion : u8 {
Never, /* Never promote */
Append, /* Promote but keep the lower-level information */
Promote, /* Promote and hide the lower-level information */
};
struct
{
/* In jumps, promote "pc+<disp>" to the target address */
Promotion PCJump_to_Location = Promote;
/* In a PC-relative mov, promote "@(<disp>,pc)" to computed address */
Promotion PCRelative_to_Location = Promote;
/* In a mova, promote "pc+<disp>" to the computed address */
Promotion PCAddr_to_Location = Promote;
/* In a read with a know location, promote address to pointed value */
Promotion ReadLocation_to_Constant = Promote;
/* Promote an integer to a binary object's name */
Promotion Constant_to_ObjectName = Promote;
/* Promote an integer to a syscall number (if no name is available) */
Promotion Constant_to_SyscallNumber = Promote;
} promotions;
/* Binary to get symbols from */
Binary *binary = nullptr;
/* TODO: More view assembly options, including CFG layout */
/* TODO: View assembly options: syntax highlighting */
};
void viewAssemblyInstruction(
Instruction const &inst, ViewAssemblyOptions *opts = nullptr);
void viewAssemblyBasicBlock(
BasicBlock const &bb, ViewAssemblyOptions *opts = nullptr);
void viewAssemblyFunction(
Function const &fun, ViewAssemblyOptions *opts = nullptr);
/* Legacy functions. Automatically set b as the binary in the options. */
void viewAssemblyLegacyRegion(
Binary &binary, MemoryRegion r, ViewAssemblyOptions *opts = nullptr);
void viewAssemblyLegacyAddress(
Binary &binary, u32 pc, ViewAssemblyOptions *opts = nullptr);
} /* namespace FxOS */
#endif /* FXOS_VIEW_ASSEMBLY_H */

View File

@ -127,11 +127,8 @@ public:
char const *translate_dynamic(uint32_t addr, int *size) override;
// TODO: Remove these
std::string mpu;
SymbolTable symbols;
uint32_t cursor;
Disassembly disasm;
OS *os_analysis(bool force = false);
private:
std::unique_ptr<OS> m_os;
};

View File

@ -64,6 +64,34 @@ std::optional<u32> Binary::objectAddress(std::string const &name) const
return {};
}
BinaryObject *Binary::objectAt(u32 address)
{
auto it = m_objects.find(address);
return (it == m_objects.end()) ? nullptr : it->second.get();
}
BinaryObject const *Binary::objectAt(u32 address) const
{
auto it = m_objects.find(address);
return (it == m_objects.end()) ? nullptr : it->second.get();
}
std::vector<BinaryObject *> Binary::objectsAt(u32 address)
{
std::vector<BinaryObject *> objects;
for(auto [it, end] = m_objects.equal_range(address); it != end; ++it)
objects.push_back(it->second.get());
return objects;
}
std::vector<BinaryObject const *> Binary::objectsAt(u32 address) const
{
std::vector<BinaryObject const *> objects;
for(auto [it, end] = m_objects.equal_range(address); it != end; ++it)
objects.push_back(it->second.get());
return objects;
}
std::vector<BinaryObject *> Binary::objectsCovering(u32 address)
{
std::vector<BinaryObject *> objects;
@ -76,6 +104,18 @@ std::vector<BinaryObject *> Binary::objectsCovering(u32 address)
return objects;
}
std::vector<BinaryObject const *> Binary::objectsCovering(u32 address) const
{
std::vector<BinaryObject const *> objects;
for(auto const &[obj_address, obj]: m_objects) {
if(obj_address <= address && obj_address + obj->size() < address)
objects.push_back(obj.get());
}
return objects;
}
//=== BinaryObject ===//
bool BinaryObject::intersects(BinaryObject const &other) const

View File

@ -7,6 +7,7 @@
#include <fxos/disassembly.h>
#include <fxos/vspace.h>
#include <fxos/binary.h>
#include <fxos/util/log.h>
#include <optional>
#include <array>
@ -14,7 +15,7 @@
namespace FxOS {
/* Instruction map */
static std::array<std::optional<AsmInstruction>, 65536> insmap;
std::array<std::optional<AsmInstruction>, 65536> insmap;
void register_instruction(AsmInstruction const &ins)
{
@ -245,7 +246,7 @@ std::vector<Claim const *> Disassembly::findClaimsOwnedBy(uint32_t address)
// DisassemblyPass
//---
DisassemblyPass::DisassemblyPass(Disassembly &disasm): m_disasm {disasm}
DisassemblyPass::DisassemblyPass(Binary &binary): m_binary {binary}
{
}
@ -253,7 +254,7 @@ DisassemblyPass::DisassemblyPass(Disassembly &disasm): m_disasm {disasm}
// FunctionPass
//---
FunctionPass::FunctionPass(Disassembly &disasm): DisassemblyPass(disasm)
FunctionPass::FunctionPass(Binary &binary): DisassemblyPass(binary)
{
}
@ -261,7 +262,8 @@ bool FunctionPass::analyzeAllFunctions()
{
bool ok = true;
for(auto &pair: m_disasm.functions)
// TODO: Use Binary's functions
for(auto &pair: m_binary.vspace().disasm.functions)
ok &= this->analyzeFunction(pair.second);
return ok;
@ -269,7 +271,8 @@ bool FunctionPass::analyzeAllFunctions()
bool FunctionPass::analyzeFunction(uint32_t pc)
{
OldFunction *func = m_disasm.getFunctionAt(pc);
// TODO: Use Binary's functions
OldFunction *func = m_binary.vspace().disasm.getFunctionAt(pc);
if(!func) {
FxOS_log(ERR, "no function at 0x%08x", pc);
return false;
@ -289,7 +292,8 @@ bool FunctionPass::analyzeFunctionRecursively(uint32_t pc)
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
OldFunction *next = m_disasm.getFunctionAt(pc);
// TODO: Use Binary's functions
OldFunction *next = m_binary.vspace().disasm.getFunctionAt(pc);
if(this->analyzeFunction(*next))
this->enqueueSubfunctions(*next);
else
@ -315,8 +319,8 @@ void FunctionPass::updateSubfunctions(OldFunction &func)
// InstructionPass
//---
InstructionPass::InstructionPass(Disassembly &disasm):
FunctionPass(disasm), m_allowDiscovery {false}
InstructionPass::InstructionPass(Binary &binary):
FunctionPass(binary), m_allowDiscovery {false}
{
}
@ -329,7 +333,8 @@ bool InstructionPass::analyzeAllInstructions()
{
bool ok = true;
for(auto &pair: m_disasm.instructions)
// TODO: Use Binary's instructions
for(auto &pair: m_binary.vspace().disasm.instructions)
ok &= this->analyzeInstruction(pair.first, pair.second);
return ok;
@ -349,7 +354,9 @@ bool InstructionPass::analyzeAnonymousFunction(uint32_t pc)
while(!m_queue.empty()) {
uint32_t pc = m_queue.pop();
OldInstruction *i = m_disasm.getInstructionAt(pc, m_allowDiscovery);
// TODO: Use Binary's instructions
OldInstruction *i
= m_binary.vspace().disasm.getInstructionAt(pc, m_allowDiscovery);
if(i != nullptr && this->analyzeInstruction(pc, *i))
this->enqueueSuccessors(pc, *i);

View File

@ -8,12 +8,13 @@
#include <fxos/passes/cfg.h>
#include <fxos/disassembly.h>
#include <fxos/util/log.h>
#include <fxos/binary.h>
#include <cassert>
namespace FxOS {
CfgPass::CfgPass(Disassembly &disasm):
InstructionPass(disasm), m_claimedInstructions {}, m_pcrel {disasm}
CfgPass::CfgPass(Binary &binary):
InstructionPass(binary), m_claimedInstructions {}, m_pcrel {binary}
{
this->setAllowDiscovery(true);
}
@ -45,7 +46,9 @@ bool CfgPass::analyzeInstruction(uint32_t pc, OldInstruction &i)
jmptarget = (pc + 4) + args[0].disp;
/* Make the target of the jump a leader */
OldInstruction &target = *m_disasm.getInstructionAt(jmptarget, true);
// TODO: Use Binary instructions
OldInstruction &target
= *m_binary.vspace().disasm.getInstructionAt(jmptarget, true);
target.leader = true;
/* Check that it's not in a delay slot */
@ -68,7 +71,9 @@ bool CfgPass::analyzeInstruction(uint32_t pc, OldInstruction &i)
}
/* If it has a delay slot, create it at the next instruction */
else if(i.inst->hasDelaySlot()) {
OldInstruction &slot = *m_disasm.getInstructionAt(pc + 2, true);
// TODO: Use Binary instructions
OldInstruction &slot
= *m_binary.vspace().disasm.getInstructionAt(pc + 2, true);
if(slot.leader)
throw std::logic_error(format(
"0x%08x is a leader and also a delay"
@ -103,7 +108,8 @@ bool CfgPass::exploreFunction(uint32_t pc)
m_lastFunction = pc;
m_claimedInstructions.clear();
OldFunction *func = m_disasm.getOrCreateFunctionAt(pc);
// TODO: Use Binary functions
OldFunction *func = m_binary.vspace().disasm.getOrCreateFunctionAt(pc);
if(!this->analyzeFunction(pc))
return false;
@ -111,7 +117,9 @@ bool CfgPass::exploreFunction(uint32_t pc)
/* Look for call targets */
for(uint32_t pc: m_claimedInstructions) {
OldInstruction const *ci = m_disasm.getInstructionAt(pc);
// TODO: Use Binary instructions
OldInstruction const *ci
= m_binary.vspace().disasm.getInstructionAt(pc);
if(!ci)
continue;
AsmInstruction const &i = *ci->inst;

View File

@ -6,11 +6,11 @@
//---------------------------------------------------------------------------//
#include <fxos/passes/pcrel.h>
#include <fxos/vspace.h>
#include <fxos/binary.h>
namespace FxOS {
PcrelPass::PcrelPass(Disassembly &disasm): InstructionPass(disasm)
PcrelPass::PcrelPass(Binary &binary): InstructionPass(binary)
{
}
@ -31,7 +31,7 @@ bool PcrelPass::analyzeInstruction(uint32_t pc, OldInstruction &ci)
/* Also compute the value. This is sign-extended from 16-bit with
mov.w. There is no mov.b for this instruction. */
VirtualSpace &space = m_disasm.vspace;
VirtualSpace &space = m_binary.vspace();
uint32_t v = -1;
if(i->opsize == 2 && space.covers(addr, 2)) {

View File

@ -1,205 +0,0 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/passes/print.h>
#include <fxos/vspace.h>
#include <fxos/util/format.h>
#include <cstdarg>
#include <cstring>
namespace FxOS {
PrintPass::PrintPass(Disassembly &disasm):
InstructionPass(disasm), m_symtables {}, m_last_address {0xffffffff}
{
/* Default parameters: all 0 */
/* Use an OS observer to describe syscalls in header lines */
m_os = disasm.vspace.os_analysis();
/* Use the symbol tables from the virtual space */
m_symtables.push_back(disasm.vspace.symbols);
}
bool PrintPass::analyzeInstruction(uint32_t pc, OldInstruction &i)
{
/* Ellipsis if there is a gap since last instruction */
if(m_last_address + 1 != 0 && pc != m_last_address + 2)
printf(" ...\n");
/* Preliminary syscall number */
int syscall_id;
if(m_os && (syscall_id = m_os->find_syscall(pc)) >= 0) {
printf("\n<%%%04x", syscall_id);
auto maybe_str = symquery(Symbol::Syscall, syscall_id);
if(maybe_str)
printf(" %s", (*maybe_str).c_str());
printf(">\n");
}
/* Raw data if instruction cannot be decoded */
printf(" %08x: %04x", pc, (i.inst ? i.inst->opcode : i.opcode));
if(!i.inst) {
printf("\n");
m_last_address = pc;
return true;
}
/* Mnemonic */
static char const *suffixes[5] = {"", ".b", ".w", "", ".l"};
char const *suffix = suffixes[(i.inst->opsize <= 4) ? i.inst->opsize : 0];
int spacing
= i.inst->arg_count ? 8 - strlen(i.inst->mnemonic) - strlen(suffix) : 0;
printf(" %s%s%*s", i.inst->mnemonic, suffix, spacing, "");
/* Arguments */
for(size_t n = 0; n < i.inst->arg_count; n++) {
AsmArgument const &arg = i.inst->args[n];
Argument const &a = i.args[n];
if(n)
printf(", ");
queue(arg.str());
if(arg.kind == AsmArgument::PcJump)
pcjumploc(a);
else if(arg.kind == AsmArgument::PcRel)
pcrelloc(a);
else if(arg.kind == AsmArgument::PcAddr)
pcaddrloc(a);
queue_flush();
}
printf("\n");
m_last_address = pc;
return true;
}
std::optional<std::string> PrintPass::symquery(
Symbol::Type type, uint32_t value)
{
for(int i = m_symtables.size() - 1; i >= 0; i--) {
SymbolTable const &st = m_symtables[i];
auto maybe_str = st.query(type, value);
if(maybe_str)
return maybe_str;
}
return std::nullopt;
}
void PrintPass::queue(std::string str, bool override)
{
if(override && m_messages.size())
m_messages.pop_back();
m_messages.push_back(str);
}
void PrintPass::queue_flush()
{
for(size_t i = 0; i < m_messages.size(); i++) {
if(i != 0)
printf(" ");
printf("%s", m_messages[i].c_str());
}
m_messages.clear();
}
void PrintPass::pcjumploc(Argument const &a)
{
if(!RelConstDomain().is_constant(a.location))
return;
if(promote_pcjump_loc == Never)
return;
queue(format("<%s>", a.location.str()), promote_pcjump_loc == Promote);
syscall(a);
}
void PrintPass::pcrelloc(Argument const &a)
{
if(!RelConstDomain().is_constant(a.location))
return;
if(promote_pcrel_loc == Never)
return;
queue(format("<%s>", a.location.str()), promote_pcrel_loc == Promote);
pcrelval(a);
}
void PrintPass::pcrelval(Argument const &a)
{
if(!a.value)
return;
if(promote_pcrel_value == Never)
return;
queue(a.value.str(), promote_pcrel_value == Promote);
syscall(a);
}
void PrintPass::syscall(Argument const &a)
{
if(!a.value)
return;
/* If this is not a syscall, try to display as a symbol instead */
if(promote_syscall == Never || a.syscall_id < 0) {
symbol(a);
return;
}
queue(format("%%%04x", a.syscall_id), promote_syscall == Promote);
syscallname(a);
}
void PrintPass::syscallname(Argument const &a)
{
if(a.syscall_id < 0)
return;
auto maybe_name = symquery(Symbol::Syscall, a.syscall_id);
if(!maybe_name)
return;
queue(*maybe_name, promote_syscallname == Promote);
}
void PrintPass::symbol(Argument const &a)
{
if(!a.value)
return;
auto maybe_name
= symquery(Symbol::Address, RelConstDomain().constant_value(a.value));
if(!maybe_name)
return;
queue(*maybe_name, promote_symbol == Promote);
}
void PrintPass::pcaddrloc(Argument const &a)
{
if(!RelConstDomain().is_constant(a.location))
return;
if(promote_pcaddr_loc == Never)
return;
queue(format("<%s>", a.location.str()), promote_pcaddr_loc == Promote);
}
} /* namespace FxOS */

View File

@ -9,8 +9,8 @@
namespace FxOS {
SyscallPass::SyscallPass(Disassembly &disasm, OS *os):
InstructionPass(disasm), m_os {os}
SyscallPass::SyscallPass(Binary &binary, OS *os):
InstructionPass(binary), m_os {os}
{
}

211
lib/view/assembly.cpp Normal file
View File

@ -0,0 +1,211 @@
//---------------------------------------------------------------------------//
// 1100101 |_ mov #0, r4 __ //
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
// |_ base# + offset |_| /_\_\___/__/ //
//---------------------------------------------------------------------------//
#include <fxos/view/assembly.h>
#include <fxos/binary.h>
#include <fxos/util/format.h>
#include <fxos/util/Queue.h>
#include <vector>
#include <cstdio>
#include <cstring>
#include <fmt/color.h>
namespace FxOS {
/* Output for a single argument, which consists of one or more text segments
each with their own text style. */
using ArgumentOutput = std::vector<std::pair<fmt::text_style, std::string>>;
static inline bool output(ArgumentOutput &out, ViewAssemblyOptions::Promotion p,
fmt::text_style style, std::string str)
{
if(p == ViewAssemblyOptions::Never)
return true;
if(p == ViewAssemblyOptions::Promote)
out.pop_back();
out.push_back({style, std::move(str)});
return false;
}
static void renderArgument(AsmArgument const &arg, Argument const &a,
ArgumentOutput &out, ViewAssemblyOptions const &opts)
{
out.push_back({{}, arg.str()});
// clang-format off
enum { None, PCJump, PCRelative, PCAddr, Location, Constant, SyscallNumber,
ObjectName }
type = None;
// clang-format on
if(arg.kind == AsmArgument::PcJump)
type = PCJump;
else if(arg.kind == AsmArgument::PcRel)
type = PCRelative;
else if(arg.kind == AsmArgument::PcAddr)
type = PCAddr;
if(type == PCJump || type == PCRelative || type == PCAddr) {
auto p = (type == PCJump) ? opts.promotions.PCJump_to_Location
: (type == PCAddr) ? opts.promotions.PCAddr_to_Location
: opts.promotions.PCRelative_to_Location;
if(!RelConstDomain().is_constant(a.location))
return;
if(output(out, p, {}, format("<%s>", a.location.str())))
return;
type = (type == PCRelative) ? Location : Constant;
}
if(type == Location) {
// TODO: Check that this is a read operation!
auto p = opts.promotions.ReadLocation_to_Constant;
if(!a.value || output(out, p, {}, a.value.str()))
return;
type = Constant;
}
/* Promote to object name first if available... */
if(type == Constant && a.value && opts.binary) {
auto p = opts.promotions.Constant_to_ObjectName;
u32 address = RelConstDomain().constant_value(a.value);
BinaryObject *obj = opts.binary->objectAt(address);
if(obj) {
if(output(out, p, {}, obj->name()))
return;
type = ObjectName;
}
}
/* ... or, as a default, a syscall number */
if(type == Constant && a.value && a.syscall_id >= 0) {
auto p = opts.promotions.Constant_to_SyscallNumber;
if(output(out, p, {}, format("%%%04x", a.syscall_id)))
return;
type = SyscallNumber;
}
}
//=== Legacy-style instruction printer ===//
static void doOldInst(u32 pc, OldInstruction &i,
ViewAssemblyOptions const &opts, u32 &m_lastAddress)
{
OS *os = opts.binary ? opts.binary->OSAnalysis() : nullptr;
ArgumentOutput argout;
/* Ellipsis if there is a gap since last instruction */
if(m_lastAddress + 1 != 0 && pc != m_lastAddress + 2)
printf(" ...\n");
/* Preliminary syscall number */
int syscall_id;
if(os && (syscall_id = os->find_syscall(pc)) >= 0) {
printf("\n<%%%04x", syscall_id);
BinaryObject *obj = opts.binary ? opts.binary->objectAt(pc) : nullptr;
if(obj)
printf(" %s", obj->name().c_str());
printf(">\n");
}
/* Raw data if instruction cannot be decoded */
printf(" %08x: %04x", pc, (i.inst ? i.inst->opcode : i.opcode));
if(!i.inst) {
printf("\n");
m_lastAddress = pc;
return;
}
/* Mnemonic */
static char const *suffixes[5] = {"", ".b", ".w", "", ".l"};
char const *suffix = suffixes[(i.inst->opsize <= 4) ? i.inst->opsize : 0];
int spacing
= i.inst->arg_count ? 8 - strlen(i.inst->mnemonic) - strlen(suffix) : 0;
printf(" %s%s%*s", i.inst->mnemonic, suffix, spacing, "");
/* Arguments */
for(size_t n = 0; n < i.inst->arg_count; n++) {
if(n)
printf(", ");
renderArgument(i.inst->args[n], i.args[n], argout, opts);
for(size_t i = 0; i < argout.size(); i++) {
if(i != 0)
printf(" ");
printf("%s", argout[i].second.c_str());
}
argout.clear();
}
printf("\n");
m_lastAddress = pc;
}
void viewAssemblyLegacyRegion(
Binary &binary, MemoryRegion r, ViewAssemblyOptions *opts_ptr)
{
ViewAssemblyOptions opts;
if(opts_ptr)
opts = *opts_ptr;
opts.binary = &binary;
u32 lastAddress = 0xffffffff;
for(u32 pc = r.start & -2; pc <= r.end; pc += 2) {
OldInstruction *i = binary.vspace().disasm.getInstructionAt(pc, true);
if(i != nullptr)
doOldInst(pc, *i, opts, lastAddress);
}
}
void viewAssemblyLegacyAddress(
Binary &binary, u32 pc, ViewAssemblyOptions *opts_ptr)
{
ViewAssemblyOptions opts;
if(opts_ptr)
opts = *opts_ptr;
opts.binary = &binary;
u32 lastAddress = 0xffffffff;
Queue<u32> queue;
queue.enqueue(pc);
while(!queue.empty()) {
u32 pc = queue.pop();
OldInstruction *i = binary.vspace().disasm.getInstructionAt(pc, true);
if(i == nullptr)
continue;
/* Enqueue successors */
if(!i->terminal && !i->jump)
queue.enqueue(pc + 2);
if(i->jump || i->condjump)
queue.enqueue(i->jmptarget);
}
/* Print explored instructions in increasing order of addresses */
for(u32 pc: queue.seen) {
OldInstruction *i = binary.vspace().disasm.getInstructionAt(pc, false);
if(i)
doOldInst(pc, *i, opts, lastAddress);
}
}
//=== Binary-API assembly printer ===//
void viewAssemblyInstruction(
Instruction const &inst, ViewAssemblyOptions *opts);
void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts);
void viewAssemblyFunction(Function const &fun, ViewAssemblyOptions *opts);
} /* namespace FxOS */

View File

@ -143,8 +143,7 @@ char const *Binding::translate_dynamic(uint32_t addr, int *size)
//=== VirtualSpace ===//
VirtualSpace::VirtualSpace():
bindings {}, mpu {}, cursor {0}, disasm {*this}, m_os {nullptr}
VirtualSpace::VirtualSpace(): bindings {}, disasm {*this}
{
}
@ -163,17 +162,6 @@ void VirtualSpace::deserialize(BSON const &b)
this->bindings.push_back(Binding(b[i]));
}
OS *VirtualSpace::os_analysis(bool force)
{
if(!m_os || force) {
m_os = std::make_unique<OS>(*this);
/* We don't keep an OS analysis result that failed */
if(m_os->type == OS::UNKNOWN)
m_os = nullptr;
}
return m_os.get();
}
void VirtualSpace::bind_region(MemoryRegion const &region, Buffer const &buf)
{
this->bindings.emplace(this->bindings.begin(), region, buf);

View File

@ -19,7 +19,7 @@
//---
static void ad_disassemble_all(
VirtualSpace &space, std::vector<uint32_t> const &addresses, bool force)
Binary &binary, std::vector<uint32_t> const &addresses, bool force)
{
int successes = 0, errors = 0;
Timer timer;
@ -27,7 +27,7 @@ static void ad_disassemble_all(
/* Analyze the CFGs of all functions */
timer.start();
CfgPass cfg_pass(space.disasm);
CfgPass cfg_pass(binary);
/* We collect subfunction addresses while running the pass */
for(int i = 0; i < (int)addresses.size(); i++) {
@ -40,11 +40,8 @@ static void ad_disassemble_all(
if(!force)
return;
}
else {
for(Claim const &c: cfg_pass.resultClaims())
space.disasm.addExclusiveClaim(c);
else
successes++;
}
}
timer.stop();
printf("\n");
@ -52,9 +49,9 @@ static void ad_disassemble_all(
printr("[syscall] Finding syscall references...");
timer.restart();
OS *os = space.os_analysis();
OS *os = binary.OSAnalysis();
if(os) {
SyscallPass syscall_pass(space.disasm, os);
SyscallPass syscall_pass(binary, os);
if(!syscall_pass.analyzeAllInstructions()) {
errors++;
if(!force)
@ -90,7 +87,10 @@ static std::vector<uint32_t> parse_ad(Session &session, Parser &parser)
void _ad(Session &session, std::vector<uint32_t> const &addresses)
{
ad_disassemble_all(session.currentBinary()->vspace(), addresses, false);
Binary *b = session.currentBinary();
if(!b)
return FxOS_log(ERR, "No current binary!\n");
ad_disassemble_all(*b, addresses, false);
}
//--
@ -106,11 +106,9 @@ void _ads(Session &session)
{
Binary *b = session.currentBinary();
if(!b)
return;
return FxOS_log(ERR, "No current binary!\n");
VirtualSpace &space = b->vspace();
OS *os = b->OSAnalysis();
if(!os) {
printf("ads: OS analysis failed, cannot enumerate syscalls");
return;
@ -120,7 +118,7 @@ void _ads(Session &session)
for(int i = 0; i < os->syscall_count(); i++)
addresses.push_back(os->syscall(i));
ad_disassemble_all(space, addresses, true);
ad_disassemble_all(*b, addresses, true);
}
//---

View File

@ -8,13 +8,13 @@
#include <fxos/disassembly.h>
#include <fxos/passes/cfg.h>
#include <fxos/passes/pcrel.h>
#include <fxos/passes/print.h>
#include <fxos/passes/syscall.h>
#include <fxos/view/assembly.h>
#include <fxos/util/Timer.h>
#include <fxos/util/log.h>
static void disassemble(Session &session, Disassembly &disasm,
std::vector<std::string> const &passes, uint32_t address)
static void disassemble(
Binary &binary, std::vector<std::string> const &passes, u32 address)
{
for(auto pass: passes) {
Timer timer;
@ -23,32 +23,22 @@ static void disassemble(Session &session, Disassembly &disasm,
bool ok;
if(pass == "cfg") {
CfgPass p(disasm);
CfgPass p(binary);
ok = p.analyzeAnonymousFunction(address);
}
else if(pass == "pcrel") {
PcrelPass p(disasm);
PcrelPass p(binary);
ok = p.analyzeAllInstructions();
}
else if(pass == "syscall") {
OS *os = session.currentBinary()->OSAnalysis();
OS *os = binary.OSAnalysis();
if(os) {
SyscallPass p(disasm, os);
SyscallPass p(binary, os);
ok = p.analyzeAllInstructions();
}
}
else if(pass == "print") {
PrintPass p(disasm);
p.promote_pcjump_loc = PrintPass::Promote;
p.promote_pcrel_loc = PrintPass::Promote;
p.promote_pcrel_value = PrintPass::Promote;
p.promote_syscall = PrintPass::Promote;
p.promote_syscallname = PrintPass::Append;
p.promote_symbol = PrintPass::Append;
p.promote_pcaddr_loc = PrintPass::Promote;
ok = p.analyzeAllInstructions();
viewAssemblyLegacyAddress(binary, address);
}
else {
FxOS_log(ERR, "unknown pass <%s>", pass);
@ -88,10 +78,9 @@ static _d_args parse_d(Session &session, Parser &parser)
void _d(Session &session, std::variant<long, Range> location)
{
if(!session.currentBinary())
return;
FxOS::Disassembly disasm(session.currentBinary()->vspace());
Binary *b = session.currentBinary();
if(!b)
return FxOS_log(ERR, "No current binary!\n");
if(std::holds_alternative<Range>(location)) {
Range range = std::get<Range>(location);
@ -111,10 +100,9 @@ void _d(Session &session, std::variant<long, Range> location)
/* Load the block into memory */
for(uint32_t pc = range.start; pc < range.end; pc += 2)
disasm.getInstructionAt(pc, true);
b->vspace().disasm.getInstructionAt(pc, true);
disassemble(session, disasm,
{"pcrel", /*"constprop",*/ "syscall", "print"}, -1);
disassemble(*b, {"pcrel", /*"constprop",*/ "syscall", "print"}, -1);
}
else {
uint32_t address = std::get<long>(location);
@ -126,8 +114,7 @@ void _d(Session &session, std::variant<long, Range> location)
}
/* cfg implicitly does pcrel */
disassemble(session, disasm,
{"cfg", /*"constprop",*/ "syscall", "print"}, address);
disassemble(*b, {"cfg", /*"constprop",*/ "syscall", "print"}, address);
}
}
@ -148,10 +135,6 @@ Disassembles code starting at the specified address, exploring branches until
function terminators, invalid instructions, or dynamically-computed jumps. The
default address is $ (the cursor of the current virtual space).
This command does not extend the virtual space's main disassembly. It reads
analysis results from the virtual space, but doesn't add new information. Try
as? to disassemble in the space's main disassembly.
The following disassembler passes are run:
cfg Explores the code reachable from the start address
pcrel Computes PC-relative addresses (eg mov.l, mova, bf, bra...)