basic stuff on the disassembler/interpreter, nothing serious yet

This commit is contained in:
Lephenixnoir 2019-12-16 22:14:02 +01:00
parent c9ecb855de
commit 8812886e58
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
7 changed files with 110 additions and 69 deletions

24
data/base-types.txt Normal file
View File

@ -0,0 +1,24 @@
type: types
name: base
---
u32 StatusRegister {
u _ :1;
u MD :1;
u RB :1;
u BL :1;
u _ :20;
u IMASK :4;
u _ :3;
u T :1;
}
struct MountTableEntry {
u32 _;
u32 _;
u32 _;
char[20] path2;
char[18] path1;
u8 mounted;
u8 _;
}

View File

@ -37,14 +37,17 @@ Disassembly options:
-l <length> Length of region
--passes=<list> Execute the specified comma-separated list of passes
The default list of passes is pcrel,cfg,cstprop,syscall,regs. The available
passes are the following:
Available passes:
pcrel Resolve PC-relative references as their target address
cfg Build the control flow graph (uses pcrel)
cstprop Propagate constants by abstract interpretation (uses cfg)
syscall Annotate code with reverse syscalls
regs Annotate code with peripheral register addresses
The default sequence of passes is pcrel,cfg,cstprop,syscall,regs. When
disassembling a function (ie. no size specified on the command-line), the pcrel
and cfg passes are always executed to explore the function.
Analysis modes:
-f, --full Run all analysis passes on <number> (same as -sar)
-s, --syscall Run syscall ID analysis
@ -106,13 +109,11 @@ int main_disassembly(int argc, char **argv)
{
std::cerr << "doing main_disasm, which is incomplete x_x\n";
try
{
try {
FxOS::load("data/sh3.txt");
FxOS::load("data/sh4.txt");
}
catch(FxOS::SyntaxError &e)
{
catch(FxOS::SyntaxError &e) {
std::cerr << e.file() << ":" << e.line() << ": " <<
e.what() << "\n" << std::flush;
return 1;

View File

@ -6,17 +6,38 @@
#define LIBFXOS_DISASSEMBLY_H
#include <fxos/lang.h>
#include <cstdint>
#include <fxos/operands.h>
#include <vector>
#include <optional>
namespace FxOS {
/* Register an instruction.
@inst Instruction with [opcode] set to the binary pattern
@inst Instruction with [opcode] set to the binary pattern
Typically this is called by loader functions from data tables describing
instructions with parameters, not manually. See <fxos/load.h>. */
void register_instruction(Instruction ins);
/* A loaded instruction with all relevant information, and more. */
class LoadedInstruction
{
private:
/* What instruction it is */
Instruction &m_inst;
/* Operands for arguments, if they have been determined */
std::vector<std::optional<Operand>> args;
/* Jump targets, used for jump instructions only. The first jmp is for
unconditional jumps; jmpt and jmpf are for conditional jumps. In
many situations the jump is forced on a general instruction by a
preceding branch due to the delay slot mechanism. */
union { uint32_t jmp, jmpt; };
uint32_t jmpf;
};
} /* namespace FxOS */
#endif /* LIBFXOS_DISASSEMBLY_H */

View File

@ -18,7 +18,7 @@ public:
SyntaxError(char const *file, int line, char const *what):
m_file(file), m_line(line), m_what(what) {}
/* Provides access to these free objets */
/* Provides access to these free objects */
char const *file() const noexcept {
return m_file;
}

View File

@ -32,13 +32,13 @@ public:
uint32_t size() const noexcept;
/* Conversion to MemoryAreaName for switch */
constexpr operator MemoryAreaName() noexcept { return m_name; }
constexpr operator MemoryAreaName () noexcept { return m_name; }
/* Comparison operators */
constexpr bool operator==(MemoryArea a) const {
constexpr bool operator == (MemoryArea a) const {
return m_name == a.m_name;
}
constexpr bool operator!=(MemoryArea a) const {
constexpr bool operator != (MemoryArea a) const {
return m_name != a.m_name;
}
@ -71,8 +71,8 @@ struct MemoryRegion
std::string name {};
/* Start address and end address. Generally the end address has one
additionnaly byte. This is okay since no region is supposed to
extend to the very end of the memory. */
additional byte. This is okay since no region is supposed to extend
to the very end of the memory. */
uint32_t start, end;
/* The region is writable under normal conditions */

View File

@ -1,66 +1,61 @@
#ifndef LIBFXOS_OPERANDS_H
#define LIBFXOS_OPERANDS_H
#include <fxos/lang.h>
namespace FxOS {
/* Data types:
Registers longwords
Memory aligned u8, u16, u32
User-defined structs, ... */
/* Status register does not contain too much useful stuff:
(30)MD (29)RB -> Disassembler needs not understand them
(9)M (8)Q -> Idem, divisions are very rare
(7-4)IMASK (28)BL -> Interrupt system rarely used, even less explicitly
(1)S -> ?
(0)T -> Now THAT is important
Disassembler should name them but needs not understand their role, except T.
The T bit might just be stored outside. */
enum class DataKind {
/* Base types */
Integral,
/* Bit fields over integers */
BitField,
/* Structures (can only reside in memory) */
Struct,
/* Arrays (can only reside in memory) */
Array,
};
//---
// Data type representation
//
// The abstract interpreter supports the following data types when analyzing
// data movement and access:
// Integers i8 u8 i16 u16 i32 u32 (regs, mem)
// Bit fields over ints T { <fields } (regs, mem)
// Arrays of any type T[] T[n] (mem)
// Strings char[] char[n] (mem)
// Structures struct { <fields> } (mem)
//---
class DataType
{
public:
virtual DataKind kind() const noexcept = 0;
};
enum DataKind {
/* Base types */
Integral,
/* Bit fields over integers */
BitField,
/* Structures (can only reside in memory) */
Struct,
/* Arrays (can only reside in memory) */
Array,
};
class IntegralType: public DataType
{
public:
IntegralType(int bitsize);
DataKind kind() const noexcept override {
return DataKind::Integral;
}
/* Type kind */
DataKind kind;
/* Type size in bytes, as would be returned by sizeof(). Must be 1, 2
or 4 for integral types and bit fields. Might be 0 for arrays if the
size of the array is unknown */
uint16_t size;
/* Type alignment, can only be 1, 2 or 4 */
uint16_t align;
private:
int m_size;
};
class BitFieldType: public DataType
{
/* Type of fields in bit fields */
using Field = std::pair<int, std::string>;
public:
BitFieldType(std::vector<Field> fields);
DataKind kind() const noexcept override {
return DataKind::BitField;
}
private:
int m_size;
std::vector<Field> m_fields;
union {
/* For integer types of size 1, whether to display as char
(might be extended to more attributes later) */
bool ischar;
/* For bit field types */
std::vector<Field> fields;
/* For struct types */
std::vector<DataType> attributes;
/* For array types, number of elements (0 if unknown or
variable-size NUL-terminated strings) */
int elements;
};
};
enum class OperandKind {
@ -77,30 +72,30 @@ class Operand
public:
/* Returns the operand kind (which is also the subclass identity) */
virtual OperandKind type() const noexcept = 0;
/* Sring representation */
/* String representation */
virtual std::string str() const noexcept = 0;
};
class RegisterOperand: public Operand
{
public:
RegisterOperand(std::string name): m_name(name) {}
RegisterOperand(CpuRegister name): m_name(name) {}
OperandKind type() const noexcept override {
return OperandKind::CpuRegister;
}
std::string name() const noexcept {
CpuRegister name() const noexcept {
return m_name;
}
std::string str() const noexcept override {
return this->name();
return m_name.str();
}
private:
/* Register name for assembler listings */
std::string m_name;
CpuRegister m_name;
};
} /* namespace FxOS */

View File

@ -25,7 +25,7 @@ std::string format(std::string const &format, Args ... args)
return std::string(buf.get(), buf.get() + size - 1);
}
/* An object extracted from a targets, which has a virtual address */
/* An object extracted from a target, which has a virtual address */
template<typename T>
struct Addressable
{