masive improvements to memory use by compacting core objects
* Store CpuRegister on a single byte * Store operation sizes (0, 1, 2, 4) on a single byte * Share the (disp) and (imm) fields of instruction arguments * Store instructions as char[12] instead of std::string (>32B) * Store instruction args in Argument[2], not std::vector (>24B) Size changes: CpuRegister: 4B -> 1B Argument: 24B -> 8B Instruction: >64B -> 32B This reduced the malloc size from 3.3M to 177k after a standard 40-line disassembly (this excludes OS files mapped to memory), and improved the loading time for the SH3 instruction table by about 30% (100 ms -> 65 ms).
This commit is contained in:
parent
fc9b292846
commit
c8b28b447f
|
@ -15,8 +15,8 @@ namespace FxOS {
|
|||
class CpuRegister
|
||||
{
|
||||
public:
|
||||
enum CpuRegisterName {
|
||||
/* Value 0 is reserved for special purposes such as "no reg" */
|
||||
enum CpuRegisterName: int8_t {
|
||||
/* Value 0 is reserved for special purposes such as "no register" */
|
||||
UNDEFINED = 0,
|
||||
/* Caller-saved general-purpose registers */
|
||||
R0, R1, R2, R3, R4, R5, R6, R7,
|
||||
|
@ -61,7 +61,7 @@ private:
|
|||
struct Argument
|
||||
{
|
||||
/* Various addressing modes in the language */
|
||||
enum Kind {
|
||||
enum Kind: int8_t {
|
||||
Reg, /* rn */
|
||||
Deref, /* @rn */
|
||||
PostInc, /* @rn+ */
|
||||
|
@ -85,12 +85,15 @@ struct Argument
|
|||
CpuRegister base;
|
||||
/* Index register. Valid for ArrayDeref */
|
||||
CpuRegister index;
|
||||
/* Displacement in bytes. For StructDeref, PcRel, PcJump, and PcAddr */
|
||||
int disp;
|
||||
/* Operation size. Generally a multiplier for disp */
|
||||
int opsize;
|
||||
/* Immediate value. Valid for Imm */
|
||||
int imm;
|
||||
/* Operation size (0, 1, 2 or 4). Generally a multiplier for disp */
|
||||
int8_t opsize;
|
||||
|
||||
union {
|
||||
/* Displacement in bytes. For StructDeref, PcRel, PcJump, and PcAddr */
|
||||
int disp;
|
||||
/* Immediate value. Valid for Imm */
|
||||
int imm;
|
||||
};
|
||||
};
|
||||
|
||||
/* Argument constructors */
|
||||
|
@ -112,20 +115,22 @@ struct Instruction
|
|||
Instruction() = default;
|
||||
|
||||
/* Construct with one or several arguments */
|
||||
Instruction(std::string mnemonic);
|
||||
Instruction(std::string mnemonic, Argument arg);
|
||||
Instruction(std::string mnemonic, Argument arg1, Argument arg2);
|
||||
Instruction(char const *mnemonic);
|
||||
Instruction(char const *mnemonic, Argument arg);
|
||||
Instruction(char const *mnemonic, Argument arg1, Argument arg2);
|
||||
|
||||
/* Original opcode. Initialized to 0 when unset, which is an invalid
|
||||
instruction by design. */
|
||||
uint16_t opcode;
|
||||
/* Operation size (0, 1, 2 or 4) */
|
||||
int8_t opsize;
|
||||
/* Number of arguments */
|
||||
uint8_t arg_count;
|
||||
|
||||
/* Mnemonic **without the size indicator** */
|
||||
std::string mnemonic;
|
||||
/* Operation size (0, 1, 2 or 4) */
|
||||
int opsize;
|
||||
/* Arguments */
|
||||
std::vector<Argument> args;
|
||||
char mnemonic[12];
|
||||
/* Arguments (up to 2) */
|
||||
Argument args[2];
|
||||
|
||||
//---
|
||||
// Instruction classes
|
||||
|
|
56
lib/lang.cpp
56
lib/lang.cpp
|
@ -2,6 +2,7 @@
|
|||
#include <fxos/util.h>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
|
||||
namespace FxOS {
|
||||
|
@ -193,41 +194,46 @@ std::string Argument::str() const
|
|||
// Instruction management
|
||||
//---
|
||||
|
||||
Instruction::Instruction(std::string mn):
|
||||
opcode(0), opsize(0)
|
||||
Instruction::Instruction(char const *mn):
|
||||
opcode(0), opsize(0), arg_count(0)
|
||||
{
|
||||
int pos = std::max(0, (int)mn.size() - 2);
|
||||
int len = strlen(mn);
|
||||
int pos = std::max(0, len - 2);
|
||||
|
||||
if(mn.substr(pos, 2) == ".b")
|
||||
if(!strncmp(mn + pos, ".b", 2))
|
||||
{
|
||||
opsize = 1;
|
||||
mn.erase(pos, 2);
|
||||
len -= 2;
|
||||
}
|
||||
else if(mn.substr(pos, 2) == ".w")
|
||||
else if(!strncmp(mn + pos, ".w", 2))
|
||||
{
|
||||
opsize = 2;
|
||||
mn.erase(pos, 2);
|
||||
len -= 2;
|
||||
}
|
||||
else if(mn.substr(pos, 2) == ".l")
|
||||
else if(!strncmp(mn + pos, ".l", 2))
|
||||
{
|
||||
opsize = 4;
|
||||
mn.erase(pos, 2);
|
||||
len -= 2;
|
||||
}
|
||||
|
||||
mnemonic = mn;
|
||||
len = std::min(len, 11);
|
||||
strncpy(mnemonic, mn, len);
|
||||
mnemonic[len] = 0;
|
||||
}
|
||||
|
||||
Instruction::Instruction(std::string mn, Argument arg):
|
||||
Instruction::Instruction(char const *mn, Argument arg):
|
||||
Instruction(mn)
|
||||
{
|
||||
args.push_back(arg);
|
||||
args[0] = arg;
|
||||
arg_count = 1;
|
||||
}
|
||||
|
||||
Instruction::Instruction(std::string mn, Argument arg1, Argument arg2):
|
||||
Instruction::Instruction(char const *mn, Argument arg1, Argument arg2):
|
||||
Instruction(mn)
|
||||
{
|
||||
args.push_back(arg1);
|
||||
args.push_back(arg2);
|
||||
args[0] = arg1;
|
||||
args[1] = arg2;
|
||||
arg_count = 2;
|
||||
}
|
||||
|
||||
//---
|
||||
|
@ -236,39 +242,39 @@ Instruction::Instruction(std::string mn, Argument arg1, Argument arg2):
|
|||
|
||||
bool Instruction::isterminal() const noexcept
|
||||
{
|
||||
if(mnemonic == "rte" || mnemonic == "rts") return true;
|
||||
if(!strcmp(mnemonic, "rte") || !strcmp(mnemonic, "rts")) return true;
|
||||
|
||||
/* Also jmp @rn which is regarded as a terminal call */
|
||||
if(mnemonic == "jmp" && args[0].kind == Argument::Deref) return true;
|
||||
if(!strcmp(mnemonic,"jmp") && args[0].kind == Argument::Deref) return true;
|
||||
/* Same for braf because we can't analyse further */
|
||||
if(mnemonic == "braf") return true;
|
||||
if(!strcmp(mnemonic, "braf")) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Instruction::isjump() const noexcept
|
||||
{
|
||||
return (mnemonic == "bra");
|
||||
return !strcmp(mnemonic, "bra");
|
||||
}
|
||||
|
||||
bool Instruction::iscondjump() const noexcept
|
||||
{
|
||||
std::vector<std::string> v {
|
||||
"bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s"
|
||||
char const *v[] = {
|
||||
"bf", "bf.s", "bf/s", "bt", "bt.s", "bt/s", NULL,
|
||||
};
|
||||
|
||||
for(auto el: v) if(mnemonic == el) return true;
|
||||
for(int i = 0; v[i]; i++) if(!strcmp(mnemonic, v[i])) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Instruction::isdelayed() const noexcept
|
||||
{
|
||||
std::vector<std::string> v {
|
||||
char const *v[] = {
|
||||
"rte", "rts", "jmp", "jsr", "bra", "braf", "bsr", "bsrf",
|
||||
"bf.s", "bf/s", "bt.s", "bt/s",
|
||||
"bf.s", "bf/s", "bt.s", "bt/s", NULL,
|
||||
};
|
||||
|
||||
for(auto el: v) if(mnemonic == el) return true;
|
||||
for(int i = 0; v[i]; i++) if(!strcmp(mnemonic, v[i])) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -241,7 +241,7 @@ static Argument make_arg(int token, int opsize, int m, int n, int d, int i)
|
|||
|
||||
Generates all the instances of the instruction, then sends them to the
|
||||
disassembler for fast lookup. Returns number of instantiated opcodes. */
|
||||
static int instantiate(struct Pattern p, std::string mnemonic, int argtoken1,
|
||||
static int instantiate(struct Pattern p, char const *mnemonic, int argtoken1,
|
||||
int argtoken2)
|
||||
{
|
||||
int total = 0;
|
||||
|
@ -260,10 +260,14 @@ static int instantiate(struct Pattern p, std::string mnemonic, int argtoken1,
|
|||
Instruction ins(mnemonic);
|
||||
ins.opcode = opcode;
|
||||
|
||||
if(argtoken1) ins.args.push_back(
|
||||
make_arg(argtoken1, ins.opsize, m, n, d, i));
|
||||
if(argtoken2) ins.args.push_back(
|
||||
make_arg(argtoken2, ins.opsize, m, n, d, i));
|
||||
if(argtoken1) {
|
||||
ins.args[0] = make_arg(argtoken1, ins.opsize, m,n,d,i);
|
||||
ins.arg_count = 1;
|
||||
}
|
||||
if(argtoken2) {
|
||||
ins.args[1] = make_arg(argtoken2, ins.opsize, m,n,d,i);
|
||||
ins.arg_count = 2;
|
||||
}
|
||||
|
||||
register_instruction(ins);
|
||||
total++;
|
||||
|
|
|
@ -32,7 +32,7 @@ void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci)
|
|||
{
|
||||
auto &args = ci.inst->args;
|
||||
|
||||
if(args.size() != 1 || args[0].kind != Argument::PcJump)
|
||||
if(ci.inst->arg_count != 1 || args[0].kind != Argument::PcJump)
|
||||
throw LangError(pc, "invalid jump instruction");
|
||||
|
||||
jmptarget = (pc+4) + args[0].disp;
|
||||
|
|
|
@ -16,7 +16,7 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci)
|
|||
Instruction const *i = ci.inst;
|
||||
if(!i) return;
|
||||
|
||||
for(size_t n = 0; n < i->args.size(); n++)
|
||||
for(size_t n = 0; n < i->arg_count; n++)
|
||||
{
|
||||
Argument const &a = i->args[n];
|
||||
ConcreteInstructionArg &ca = ci.args[n];
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <fxos/disassembly.h>
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstring>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
|
@ -57,18 +58,15 @@ void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci)
|
|||
|
||||
/* Mnemonic */
|
||||
|
||||
static std::map<int, std::string> suffixes = {
|
||||
{ 1, ".b" }, { 2, ".w" }, { 4, ".l" } };
|
||||
static char const *suffixes[5] = { "", ".b", ".w", "", ".l" };
|
||||
char const *suffix = suffixes[(i->opsize <= 4) ? i->opsize : 0];
|
||||
|
||||
std::string mnemonic = i->mnemonic + suffixes[i->opsize];
|
||||
if(i->args.size())
|
||||
mnemonic += std::string(8 - mnemonic.size(), ' ');
|
||||
|
||||
printf(" %s", mnemonic.c_str());
|
||||
int spacing = i->arg_count ? 8 - strlen(i->mnemonic) - strlen(suffix) : 0;
|
||||
printf(" %s%s%*s", i->mnemonic, suffix, spacing, "");
|
||||
|
||||
/* Arguments */
|
||||
|
||||
for(size_t n = 0; n < i->args.size(); n++)
|
||||
for(size_t n = 0; n < i->arg_count; n++)
|
||||
{
|
||||
Argument const &a = i->args[n];
|
||||
ConcreteInstructionArg const &arg = ci.args[n];
|
||||
|
|
|
@ -19,7 +19,7 @@ void SyscallPass::analyze([[maybe_unused]] uint32_t pc,ConcreteInstruction &ci)
|
|||
Instruction const *i = ci.inst;
|
||||
if(!i) return;
|
||||
|
||||
for(size_t n = 0; n < i->args.size(); n++)
|
||||
for(size_t n = 0; n < i->arg_count; n++)
|
||||
{
|
||||
Argument const &a = i->args[n];
|
||||
ConcreteInstructionArg &ca = ci.args[n];
|
||||
|
|
Loading…
Reference in New Issue