basic setup for running disassembly passes

This commit is contained in:
Lephenixnoir 2019-12-29 19:17:33 +01:00
parent 468495856d
commit b20731c829
8 changed files with 131 additions and 303 deletions

View File

@ -1,225 +0,0 @@
type: assembly
name: sh-3
---
# Format: [01nmdi]{16}, followed by the mnemonic and the list of arguments.
# In each opcode, there should be at most one sequence of "m", "n", "d" and "i"
# each (representing the location of the argument).
#
# Possible argument strings are predefined and include:
# rn rm #imm
# jump8 jump12 disp pcdisp
# @rn @rm @rn+ @rm+ @-rn
# @(disp,rn) @(disp,rm) @(r0,rn) @(r0,rm) @(disp,gbr)
#
# The disassembler substitutes some elements as follows:
# rn -> value of the "n"-sequence
# rm -> value of the "m"-sequence
# #imm -> value of the "i"-sequence
# disp -> value of the "d"-sequence
# jump8 -> value of the 8-bit "d"-sequence x2 plus value of PC
# jump12 -> value of the 12-bit "d"-sequence x2 plus value of PC
# @(disp,pc) -> value of the 8-bit "d"-sequence x2 or x4, plus value of PC
# TODO: This list does not exactly reflect the behavior of the parser
0000000001001000 clrs
0000000000001000 clrt
0000000000101000 clrmac
0000000000011001 div0u
0000000000111000 ldtlb
0000000000001001 nop
0000000000101011 rte
0000000000001011 rts
0000000001011000 sets
0000000000011000 sett
0000000000011011 sleep
0100nnnn00010101 cmp/pl rn
0100nnnn00010001 cmp/pz rn
0100nnnn00010000 dt rn
0000nnnn00101001 movt rn
0100nnnn00000100 rotl rn
0100nnnn00000101 rotr rn
0100nnnn00100100 rotcl rn
0100nnnn00100101 rotcr rn
0100nnnn00100000 shal rn
0100nnnn00100001 shar rn
0100nnnn00000000 shll rn
0100nnnn00000001 shlr rn
0100nnnn00001000 shll2 rn
0100nnnn00001001 shlr2 rn
0100nnnn00011000 shll8 rn
0100nnnn00011001 shlr8 rn
0100nnnn00101000 shll16 rn
0100nnnn00101001 shlr16 rn
0011nnnnmmmm1100 add rm, rn
0011nnnnmmmm1110 addc rm, rn
0011nnnnmmmm1111 addv rm, rn
0010nnnnmmmm1001 and rm, rn
0011nnnnmmmm0000 cmp/eq rm, rn
0011nnnnmmmm0010 cmp/hs rm, rn
0011nnnnmmmm0011 cmp/ge rm, rn
0011nnnnmmmm0110 cmp/hi rm, rn
0011nnnnmmmm0111 cmp/gt rm, rn
0010nnnnmmmm1100 cmp/str rm, rn
0011nnnnmmmm0100 div1 rm, rn
0010nnnnmmmm0111 div0s rm, rn
0011nnnnmmmm1101 dmuls.l rm, rn
0011nnnnmmmm0101 dmulu.l rm, rn
0110nnnnmmmm1110 exts.b rm, rn
0110nnnnmmmm1111 exts.w rm, rn
0110nnnnmmmm1100 extu.b rm, rn
0110nnnnmmmm1101 extu.w rm, rn
0110nnnnmmmm0011 mov rm, rn
0000nnnnmmmm0111 mul.l rm, rn
0010nnnnmmmm1111 muls.w rm, rn
0010nnnnmmmm1110 mulu.w rm, rn
0110nnnnmmmm1011 neg rm, rn
0110nnnnmmmm1010 negc rm, rn
0110nnnnmmmm0111 not rm, rn
0010nnnnmmmm1011 or rm, rn
0100nnnnmmmm1100 shad rm, rn
0100nnnnmmmm1101 shld rm, rn
0011nnnnmmmm1000 sub rm, rn
0011nnnnmmmm1010 subc rm, rn
0011nnnnmmmm1011 subv rm, rn
0110nnnnmmmm1000 swap.b rm, rn
0110nnnnmmmm1001 swap.w rm, rn
0010nnnnmmmm1000 tst rm, rn
0010nnnnmmmm1010 xor rm, rn
0010nnnnmmmm1101 xtrct rm, rn
0100mmmm00001110 ldc rm, sr
0100mmmm00011110 ldc rm, gbr
0100mmmm00101110 ldc rm, vbr
0100mmmm00111110 ldc rm, ssr
0100mmmm01001110 ldc rm, spc
0100mmmm10001110 ldc rm, r0_bank
0100mmmm10011110 ldc rm, r1_bank
0100mmmm10101110 ldc rm, r2_bank
0100mmmm10111110 ldc rm, r3_bank
0100mmmm11001110 ldc rm, r4_bank
0100mmmm11011110 ldc rm, r5_bank
0100mmmm11101110 ldc rm, r6_bank
0100mmmm11111110 ldc rm, r7_bank
0100mmmm00001010 lds rm, mach
0100mmmm00011010 lds rm, macl
0100mmmm00101010 lds rm, pr
0000nnnn00000010 stc sr, rn
0000nnnn00010010 stc gbr, rn
0000nnnn00100010 stc vbr, rn
0000nnnn00110010 stc ssr, rn
0000nnnn01000010 stc spc, rn
0000nnnn10000010 stc r0_bank, rn
0000nnnn10010010 stc r1_bank, rn
0000nnnn10100010 stc r2_bank, rn
0000nnnn10110010 stc r3_bank, rn
0000nnnn11000010 stc r4_bank, rn
0000nnnn11010010 stc r5_bank, rn
0000nnnn11100010 stc r6_bank, rn
0000nnnn11110010 stc r7_bank, rn
0000nnnn00001010 sts mach, rn
0000nnnn00011010 sts macl, rn
0000nnnn00101010 sts pr, rn
0100nnnn00101011 jmp @rn
0100nnnn00001011 jsr @rn
0000nnnn10000011 pref @rn
0100nnnn00011011 tas.b @rn
0010nnnnmmmm0000 mov.b rm, @rn
0010nnnnmmmm0001 mov.w rm, @rn
0010nnnnmmmm0010 mov.l rm, @rn
0110nnnnmmmm0000 mov.b @rm, rn
0110nnnnmmmm0001 mov.w @rm, rn
0110nnnnmmmm0010 mov.l @rm, rn
0000nnnnmmmm1111 mac.l @rm+, @rn+
0100nnnnmmmm1111 mac.w @rm+, @rn+
0110nnnnmmmm0100 mov.b @rm+, rn
0110nnnnmmmm0101 mov.w @rm+, rn
0110nnnnmmmm0110 mov.l @rm+, rn
0100mmmm00000111 ldc.l @rm+, sr
0100mmmm00010111 ldc.l @rm+, gbr
0100mmmm00100111 ldc.l @rm+, vbr
0100mmmm00110111 ldc.l @rm+, ssr
0100mmmm01000111 ldc.l @rm+, spc
0100mmmm10000111 ldc.l @rm+, r0_bank
0100mmmm10010111 ldc.l @rm+, r1_bank
0100mmmm10100111 ldc.l @rm+, r2_bank
0100mmmm10110111 ldc.l @rm+, r3_bank
0100mmmm11000111 ldc.l @rm+, r4_bank
0100mmmm11010111 ldc.l @rm+, r5_bank
0100mmmm11100111 ldc.l @rm+, r6_bank
0100mmmm11110111 ldc.l @rm+, r7_bank
0100mmmm00000110 lds.l @rm+, mach
0100mmmm00010110 lds.l @rm+, macl
0100mmmm00100110 lds.l @rm+, pr
0010nnnnmmmm0100 mov.b rm, @-rn
0010nnnnmmmm0101 mov.w rm, @-rn
0010nnnnmmmm0110 mov.l rm, @-rn
0100nnnn00000011 stc.l sr, @-rn
0100nnnn00010011 stc.l gbr, @-rn
0100nnnn00100011 stc.l vbr, @-rn
0100nnnn00110011 stc.l ssr, @-rn
0100nnnn01000011 stc.l spc, @-rn
0100nnnn10000011 stc.l r0_bank, @-rn
0100nnnn10010011 stc.l r1_bank, @-rn
0100nnnn10100011 stc.l r2_bank, @-rn
0100nnnn10110011 stc.l r3_bank, @-rn
0100nnnn11000011 stc.l r4_bank, @-rn
0100nnnn11010011 stc.l r5_bank, @-rn
0100nnnn11100011 stc.l r6_bank, @-rn
0100nnnn11110011 stc.l r7_bank, @-rn
0100nnnn00000010 sts.l mach, @-rn
0100nnnn00010010 sts.l macl, @-rn
0100nnnn00100010 sts.l pr, @-rn
10000000nnnndddd mov.b r0, @(disp,rn)
10000001nnnndddd mov.w r0, @(disp,rn)
0001nnnnmmmmdddd mov.l rm, @(disp,rn)
10000100mmmmdddd mov.b @(disp,rm), r0
10000101mmmmdddd mov.w @(disp,rm), r0
0101nnnnmmmmdddd mov.l @(disp,rm), rn
0000nnnnmmmm0100 mov.b rm, @(r0,rn)
0000nnnnmmmm0101 mov.w rm, @(r0,rn)
0000nnnnmmmm0110 mov.l rm, @(r0,rn)
0000nnnnmmmm1100 mov.b @(r0,rm), rn
0000nnnnmmmm1101 mov.w @(r0,rm), rn
0000nnnnmmmm1110 mov.l @(r0,rm), rn
11000000dddddddd mov.b r0, @(disp,gbr)
11000001dddddddd mov.w r0, @(disp,gbr)
11000010dddddddd mov.l r0, @(disp,gbr)
11000100dddddddd mov.b @(disp,gbr), r0
11000101dddddddd mov.w @(disp,gbr), r0
11000110dddddddd mov.l @(disp,gbr), r0
11001101iiiiiiii and.b #imm, @(r0,gbr)
11001111iiiiiiii or.b #imm, @(r0,gbr)
11001100iiiiiiii tst.b #imm, @(r0,gbr)
11001110iiiiiiii xor.b #imm, @(r0,gbr)
1001nnnndddddddd mov.w @(disp,pc), rn
1101nnnndddddddd mov.l @(disp,pc), rn
11000111dddddddd mova.l @(disp,pc), r0
0000mmmm00100011 braf rm
0000mmmm00000011 bsrf rm
10001011dddddddd bf jump8
10001111dddddddd bf.s jump8
10001001dddddddd bt jump8
10001101dddddddd bt.s jump8
1010dddddddddddd bra jump12
1011dddddddddddd bsr jump12
0111nnnniiiiiiii add #imm, rn
11001001iiiiiiii and #imm, r0
10001000iiiiiiii cmp/eq #imm, r0
1110nnnniiiiiiii mov #imm, rn
11001011iiiiiiii or #imm, r0
11001000iiiiiiii tst #imm, r0
11001010iiiiiiii xor #imm, r0
11000011iiiiiiii trapa #imm

View File

@ -1,26 +0,0 @@
type: assembly
name: sh-4a-extensions
---
0000nnnn01110011 movco.l r0, @rn
0000mmmm01100011 movli.l @rm, r0
0100mmmm10101001 movua.l @rm, r0
0100mmmm11101001 movua.l @rm+, r0
0000nnnn11000011 movca.l r0, @rn
0000nnnn11100011 icbi @rn
0000nnnn10010011 ocbi @rn
0000nnnn10100011 ocbp @rn
0000nnnn10110011 ocbwb @rn
0000nnnn11010011 prefi @rn
0000000010101011 synco
0100mmmm00111010 ldc rm, sgr
0100mmmm11111010 ldc rm, dbr
0100mmmm00110110 ldc.l @rm+, sgr
0100mmmm11110110 ldc.l @rm+, dbr
0000nnnn00111010 stc sgr, rn
0000nnnn11111010 stc dbr, rn
0100nnnn00110010 stc.l sgr, @-rn
0100nnnn11110010 stc.l dbr, @-rn

View File

@ -1,8 +1,13 @@
#include "fxos-cli.h"
#include <fxos/load.h>
#include <fxos/disassembly.h>
#include <fxos/errors.h>
#include <fxos/target.h>
#include <fxos/load.h>
#include <fxos/os.h>
#include <fxos/disasm-passes/cfg.h>
#include <getopt.h>
#include <filesystem>
@ -16,42 +21,43 @@ namespace fs = std::filesystem;
using namespace FxOS;
static char const *help_string = R"(
usage: fxos info <os file>
fxos disasm <target or file> <region or function> [options...]
fxos disasm -b <binary file> [options...]
usage: fxos info <target>
fxos disasm <target> <region or function> [options...]
fxos analyze [-f] [-s] [-a] [-r] <number> <os file> [options...]
fxos is a reverse-engineering tool that disassembles and analyzes SuperH
programs and OS dumps for fx9860g and fxcg50-like CASIO calculators, using an
editable database of platforms, syscalls, and OS knowledge.
Commands:
info Identify an OS image: version, platform, date, checksums...
disasm Disassemble and annotate code with relative address targets,
syscall invocations, control flow, constant propagation and hints
about memory structure.
analyze Dig an address or syscall number, finding syscall references,
4-aligned occurrences, memory region and probable role.
editable database of platform, syscall, and OS knowledge.
General options:
-b Work with an arbitrary binary file, not an OS
-3, --sh3 Assume SH3 OS and platform (default: SH4)
-4, --sh4 Assume SH4 OS and platform (default: SH4)
Database extensions:
--load <file> Read documentation from <file>
--load <folder> Read documentation recursively from <folder>
A <target> is either:
<targetname> A target in library (eg "fx@3.10")
-f <file> An arbitrary file which is loaded as ROM
Disassembly file selection:
<target> Disassemble this target from the database (eg. "fx@3.10")
-f <file> Disassemble this file as standalone ROM
INFO COMMAND
Disassembly options:
Identify an OS image: version, platform, date, checksums...
DISASM COMMAND
Disassemble and annotate code with relative address targets, syscalls, control
flow, propagated constants and hints about memory structure.
Location specifiers:
<address> Start disassembling at this address (hexa)
<address>:<len> Disassemble exactly the specified region. <len> is an
hexadecimal number optionnally followed by k, M, or G.
hexadecimal number optionally followed by k, M, or G.
%<syscall id> Start disassembling at this syscall's address (hexa)
<symbol> Disassemble this library symbol (typically a syscall name).
Note that <address> takes precedence if ambiguous.
Disassembly options:
-p <list> Execute the specified comma-separated list of passes
--load <file> Read additional documentation from <file>
--load <folder> Read additional documentation recursively from <folder>
Available passes:
cfg Build the control flow graph (always required)
@ -61,8 +67,13 @@ Available passes:
regs Annotate code with peripheral register addresses
The default sequence of passes is cfg,pcrel,cstprop,syscall,regs. When
disassembling a function (ie. no size specified on the command-line), the pcrel
and cfg passes are always executed to explore the function.
disassembling a function (ie. no size specified on the command-line), the cfg
pass is always executed to explore the function.
ANALYZE COMMAND
Dig an address or syscall number. Finds syscall references, 4-aligned
occurrences, memory region...
Analysis modes:
-f, --full Run all analysis passes on <number> (same as -sar)
@ -312,16 +323,6 @@ int main_disassembly(int argc, char **argv)
}
if(error) return 1;
/* try {
FxOS::load("data/sh3.txt");
if(mpu == '4') FxOS::load("data/sh4.txt");
}
catch(FxOS::SyntaxError &e) {
std::cerr << e.file() << ":" << e.line() << ": " <<
e.what() << "\n" << std::flush;
return 1;
} */
if(!file.size())
{
std::string targetname = argv[optind + 1];
@ -333,11 +334,26 @@ int main_disassembly(int argc, char **argv)
return 1;
}
Target t(targets[targetname], library);
Target target(targets[targetname], library);
char const *ref = argv[optind + 2];
char const *refstr = argv[optind + 2];
uint32_t ref;
sscanf(refstr, "%x", &ref);
std::cout << "disassembling target:" << targetname << " ref:" << ref << "\n";
Disassembly disasm(target);
OS *os = nullptr;
std::cout << "disassembling target:" << targetname << " ref:" << refstr << "\n";
for(auto pass: passes)
{
std::cout << "running pass: " << pass << "\n";
if(pass == "cfg")
{
CfgPass p(disasm);
p.run(ref);
}
}
}
else
{

View File

@ -0,0 +1,29 @@
//---
// fxos.disasm-passes.cfg: Control Flow Graph construction
//
// This pass explores functions and computes the [jmptarget] field of concrete
// instructions as it goes. This is required for other passes that work by
// traversing the CFG, such as the abstract interpretor.
//
// This is the main exploration pass. Other passes do not typically load new
// instructions from the underlying disassembly. Straightforward passes such as
// [print] iterate on instructions loaded by this pass.
//---
#ifndef LIBFXOS_DISASM_PASSES_CFG_H
#define LIBFXOS_DISASM_PASSES_CFG_H
#include <fxos/disassembly.h>
namespace FxOS {
class CfgPass: public DisassemblyPass
{
public:
CfgPass(Disassembly &disasm);
void analyze(uint32_t pc, ConcreteInstruction &inst) override;
};
} /* namespace FxOS */
#endif /* LIBFXOS_DISASM_PASSES_CFG_H */

View File

@ -102,6 +102,12 @@ class Disassembly
public:
Disassembly(Target &target);
/* Check whether an instruction has been visited so far */
bool hasins(uint32_t pc);
/* Get the minimum and maximum loaded instruction addresses */
uint32_t minpc();
uint32_t maxpc();
/* Get the storage to any concrete instruction. The instruction will be
loaded and initialized if it had not been read before. */
ConcreteInstruction &readins(uint32_t pc);
@ -133,6 +139,8 @@ public:
protected:
/* Add an instruction to the queue to analyze next */
void enqueue(uint32_t pc);
/* Add the next loaded instruction in address space */
void enqueue_next(uint32_t pc);
/* Enqueue the unseen successors of this instruction */
void enqueue_unseen_successors(uint32_t pc, ConcreteInstruction &inst);
/* Enqueue all the success of this instruction */
@ -150,11 +158,6 @@ private:
std::set<uint32_t> m_seen;
};
class CfgPass: public DisassemblyPass
{
CfgPass(Disassembly &disasm);
void analyze(uint32_t pc, ConcreteInstruction &inst) override;
};
class PcrelPass: public DisassemblyPass
{

View File

@ -24,13 +24,6 @@ namespace FxOS {
class DataType
{
public:
/* Copy constructor */
DataType(DataType const &other);
DataType & operator = (DataType other);
/* Destructor that takes into account the non-trivial union */
~DataType();
enum DataKind {
/* Base types */
Integral,

View File

@ -45,9 +45,38 @@ Disassembly::Disassembly(Target &target):
{
}
bool Disassembly::hasins(uint32_t pc)
{
return m_instructions.count(pc) > 0;
}
uint32_t Disassembly::minpc()
{
uint32_t min = 0xffffffff;
for(auto &it: m_instructions)
{
if(it.first < min) min = it.first;
}
return min;
}
uint32_t Disassembly::maxpc()
{
uint32_t max = 0x00000000;
for(auto &it: m_instructions)
{
if(it.first > max) max = it.first;
}
return max;
}
ConcreteInstruction &Disassembly::readins(uint32_t pc)
{
if(pc & 1) throw std::runtime_error("Disassembly::readins at odd PC");
if(pc & 1) throw std::runtime_error("Disassembly::ins_read at odd PC");
try
{
@ -64,7 +93,7 @@ ConcreteInstruction &Disassembly::readins(uint32_t pc)
Instruction &inst = *insmap[opcode];
ConcreteInstruction ci(inst);
m_instructions.emplace(std::make_pair(pc, ci));
m_instructions.emplace(pc, ci);
return m_instructions.at(pc);
}
}
@ -86,6 +115,15 @@ void DisassemblyPass::enqueue(uint32_t pc)
m_queue.push(pc);
}
void DisassemblyPass::enqueue_next(uint32_t pc)
{
/* TODO: DisassemblyPass::enqueue_next is inefficient */
do pc += 2;
while(!m_disasm.hasins(pc));
enqueue(pc);
}
void DisassemblyPass::enqueue_unseen_successors(uint32_t pc,
ConcreteInstruction &inst)
{

View File

@ -14,7 +14,7 @@ PrintPass::PrintPass(Disassembly &disasm):
void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci)
{
std::cout << ci.inst.mnemonic << "\n";
enqueue_unseen_successors(pc, ci);
enqueue_next(pc);
}
} /* namespace FxOS */