2022-03-28 21:59:30 +02:00
|
|
|
//---------------------------------------------------------------------------//
|
|
|
|
// 1100101 |_ mov #0, r4 __ //
|
|
|
|
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
|
|
|
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
|
|
|
// |_ base# + offset |_| /_\_\___/__/ //
|
|
|
|
//---------------------------------------------------------------------------//
|
|
|
|
// fxos/disassembly: Disassembler infrastructure
|
|
|
|
//
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// TODO: Instead of defining every field for every argument of every
|
|
|
|
// disassembled instruction, set up a system of external annotations.
|
2019-12-14 22:33:57 +01:00
|
|
|
//---
|
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
#ifndef FXOS_DISASSEMBLY_H
|
|
|
|
#define FXOS_DISASSEMBLY_H
|
2019-12-14 22:33:57 +01:00
|
|
|
|
|
|
|
#include <fxos/lang.h>
|
2021-03-16 14:43:43 +01:00
|
|
|
#include <fxos/vspace.h>
|
2019-12-20 11:17:09 +01:00
|
|
|
#include <fxos/semantics.h>
|
2022-03-27 13:12:53 +02:00
|
|
|
#include <fxos/util/Buffer.h>
|
2019-12-16 22:14:02 +01:00
|
|
|
|
2019-12-28 17:18:13 +01:00
|
|
|
#include <set>
|
|
|
|
#include <map>
|
|
|
|
#include <queue>
|
2019-12-14 22:33:57 +01:00
|
|
|
|
|
|
|
namespace FxOS {
|
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Register an instruction. This is called by loader functions from the asm
|
|
|
|
table lexer. [inst] must have its opcode field set. */
|
|
|
|
void register_instruction(AsmInstruction const &inst);
|
2019-12-14 22:33:57 +01:00
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Lex and register an assembly instruction table. */
|
2022-03-04 12:29:33 +01:00
|
|
|
int load_instructions(Buffer const &file);
|
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
|
|
|
|
/* An argument for a disassembled instruction. */
|
|
|
|
struct Argument
|
2019-12-16 22:14:02 +01:00
|
|
|
{
|
2022-03-28 21:59:30 +02:00
|
|
|
Argument();
|
|
|
|
|
|
|
|
// Data set by the <pcrel> pass and abstract interpreter
|
|
|
|
|
|
|
|
/* Location in CPU or memory, if it makes sense and can be determined */
|
|
|
|
Location location;
|
|
|
|
/* Manipulated value. If no information can be obtained, this object
|
|
|
|
evaluates to false when converted to bool. */
|
|
|
|
RelConst value;
|
|
|
|
|
|
|
|
// Data set by the <syscall> pass
|
|
|
|
|
|
|
|
/* If the value is a syscall address, the syscall's id */
|
|
|
|
int syscall_id;
|
2019-12-20 19:33:41 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* A loaded and annotated instruction. */
|
2022-03-28 21:59:30 +02:00
|
|
|
struct Instruction
|
2019-12-20 19:33:41 +01:00
|
|
|
{
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Build from instruction, cannot be nullptr. */
|
|
|
|
Instruction(AsmInstruction const *inst);
|
|
|
|
/* Build from opcode, if instruction could not be decoded. */
|
|
|
|
Instruction(uint16_t opcode);
|
|
|
|
|
|
|
|
/* What instruction this is. Note that this does not determine all the
|
|
|
|
properties below. Placement and delay slots greatly alter them.
|
|
|
|
This pointer is nullptr if the instruction could not be decoded. */
|
|
|
|
AsmInstruction const *inst;
|
|
|
|
|
|
|
|
/* Argument information (contains data set by several passes) */
|
|
|
|
Argument args[2];
|
|
|
|
|
|
|
|
/* Opcode, valid only if inst==nullptr */
|
|
|
|
uint16_t opcode;
|
|
|
|
|
|
|
|
// Data set by the cfg pass
|
|
|
|
|
|
|
|
/* Whether this instruction is a leader. This is always set by another
|
|
|
|
instruction jumping into this one. */
|
|
|
|
bool leader;
|
|
|
|
/* Whether this instruction is in a delay slot. This is always set by
|
|
|
|
the preceding delayed instruction. */
|
|
|
|
bool delayslot;
|
|
|
|
|
|
|
|
/* Whether this instruction is:
|
|
|
|
-> Terminal, ie. has no successors and is the end of the function.
|
|
|
|
-> An unconditional jump of target [jmptarget]. This is the case for eg.
|
|
|
|
bt, but not bt.s; rather the successor of bt.s is the jump.
|
|
|
|
-> A conditional jump that can hit [jmptarget] and pc+2.
|
|
|
|
If delayslot==false, these attributes are set when analyzing this
|
|
|
|
instruction. If delayslot==true, they are set when the preceding
|
|
|
|
delayed instruction is analyzed. */
|
|
|
|
bool terminal;
|
|
|
|
bool jump;
|
|
|
|
bool condjump;
|
|
|
|
|
|
|
|
/* The jump target, used if jump==true or condjump==true. */
|
|
|
|
uint32_t jmptarget;
|
2019-12-20 19:33:41 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Disassembly interface that automatically loads code from a target */
|
2022-03-28 21:59:30 +02:00
|
|
|
struct Disassembly
|
2019-12-20 19:33:41 +01:00
|
|
|
{
|
2022-03-28 21:59:30 +02:00
|
|
|
Disassembly(VirtualSpace &space);
|
|
|
|
|
|
|
|
/* Check whether an instruction has been visited so far */
|
|
|
|
bool hasins(uint32_t pc);
|
|
|
|
/* Get the minimum and maximum loaded instruction addresses */
|
|
|
|
uint32_t minpc();
|
|
|
|
uint32_t maxpc();
|
|
|
|
|
|
|
|
/* Get the storage to any concrete instruction. The instruction will be
|
|
|
|
loaded and initialized if it had not been read before. */
|
|
|
|
Instruction &readins(uint32_t pc);
|
|
|
|
|
|
|
|
/* For other access patterns */
|
|
|
|
std::map<uint32_t, Instruction> instructions;
|
|
|
|
/* Underlying target */
|
|
|
|
VirtualSpace &space;
|
2019-12-16 22:14:02 +01:00
|
|
|
};
|
|
|
|
|
2019-12-28 17:18:13 +01:00
|
|
|
//---
|
|
|
|
// Disassembler passes
|
|
|
|
//---
|
|
|
|
|
|
|
|
class DisassemblyPass
|
|
|
|
{
|
|
|
|
public:
|
2022-03-28 21:59:30 +02:00
|
|
|
DisassemblyPass(Disassembly &disasm, std::string name="");
|
2019-12-28 17:18:13 +01:00
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Analyze a single instruction, probably updating the annotations and
|
|
|
|
the state of the pass itself. */
|
|
|
|
virtual bool analyze(uint32_t pc, Instruction &inst) = 0;
|
2019-12-28 17:18:13 +01:00
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Run the pass from the given entry point */
|
|
|
|
virtual bool run(uint32_t entry_pc);
|
2019-12-28 17:18:13 +01:00
|
|
|
|
|
|
|
protected:
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Add an instruction to the queue to analyze next */
|
|
|
|
void enqueue(uint32_t pc);
|
|
|
|
/* Add the next loaded instruction in address space */
|
|
|
|
void enqueue_next(uint32_t pc);
|
|
|
|
/* Enqueue the unseen successors of this instruction */
|
|
|
|
void enqueue_unseen_successors(uint32_t pc, Instruction &inst);
|
|
|
|
/* Enqueue all the success of this instruction */
|
|
|
|
void enqueue_all_successors(uint32_t pc, Instruction &inst);
|
|
|
|
|
|
|
|
/* Underlying disassembly */
|
|
|
|
Disassembly &m_disasm;
|
2019-12-28 17:18:13 +01:00
|
|
|
|
|
|
|
private:
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Blocks to visit next, ordered for uniqueness */
|
|
|
|
std::set<uint32_t> m_next;
|
|
|
|
std::priority_queue<uint32_t> m_queue;
|
2019-12-28 17:18:13 +01:00
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Visited blocks */
|
|
|
|
std::set<uint32_t> m_seen;
|
2020-02-29 11:22:26 +01:00
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Name of pass */
|
|
|
|
std::string m_name;
|
2019-12-28 17:18:13 +01:00
|
|
|
};
|
|
|
|
|
2020-02-28 16:19:50 +01:00
|
|
|
/* A disassembly pass that observes each instruction independently */
|
|
|
|
class InstructionDisassemblyPass: public DisassemblyPass
|
|
|
|
{
|
|
|
|
public:
|
2022-03-28 21:59:30 +02:00
|
|
|
InstructionDisassemblyPass(Disassembly &disasm, std::string name="");
|
2020-02-28 16:19:50 +01:00
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
/* Runs the pass from the first instruction currently loaded, all the
|
|
|
|
way down to the bottom, as if always using enqueue_next(). */
|
|
|
|
virtual bool run();
|
2020-02-28 16:19:50 +01:00
|
|
|
};
|
|
|
|
|
2019-12-14 22:33:57 +01:00
|
|
|
} /* namespace FxOS */
|
|
|
|
|
2022-03-28 21:59:30 +02:00
|
|
|
#endif /* FXOS_DISASSEMBLY_H */
|