diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 1b2ca2b..f5a37a0 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -52,15 +52,22 @@ struct ConcreteInstructionArg /* A loaded and annotated instruction. */ struct ConcreteInstruction { - ConcreteInstruction(Instruction const &inst); + /* Build from instruction, cannot be nullptr. */ + ConcreteInstruction(Instruction const *inst); + /* Build from opcode, if instruction could not be decoded. */ + ConcreteInstruction(uint16_t opcode); /* What instruction this is. Note that this does not determine all the - properties below. Placement and delay slots greatly alter them. */ - Instruction const &inst; + properties below. Placement and delay slots greatly alter them. + This pointer is nullptr if the instruction could not be decoded. */ + Instruction const *inst; /* Argument information (contains data set by several passes) */ ConcreteInstructionArg args[2]; + /* Opcode, valid only if inst==nullptr */ + uint16_t opcode; + //--- // Data set by the cfg pass //--- diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index 601ec24..8f5e502 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -32,12 +32,23 @@ ConcreteInstructionArg::ConcreteInstructionArg(): location = RelConstDomain().bottom(); } -ConcreteInstruction::ConcreteInstruction(Instruction const &inst): - inst(inst), args(), +ConcreteInstruction::ConcreteInstruction(Instruction const *inst): + inst(inst), args(), opcode(), leader(false), delayslot(false), terminal(false), jump(false), condjump(false), jmptarget(0xffffffff) { + if(!inst) throw std::logic_error( + "ConcreteInstruction built from a null pointer"); +} + +ConcreteInstruction::ConcreteInstruction(uint16_t opcode): + inst(nullptr), args(), opcode(opcode), + leader(false), delayslot(false), + terminal(false), jump(false), condjump(false), + jmptarget(0xffffffff) +{ + inst = nullptr; } //--- @@ -89,14 +100,11 @@ ConcreteInstruction &Disassembly::readins(uint32_t pc) catch(std::out_of_range &e) { uint16_t opcode = m_target.read_u16(pc); - if(!insmap[opcode]) - { - throw std::runtime_error("No instruction for opcode"); - } + ConcreteInstruction ci(opcode); - Instruction const &inst = *insmap[opcode]; + if(insmap[opcode]) + ci = ConcreteInstruction(&*insmap[opcode]); - ConcreteInstruction ci(inst); m_instructions.emplace(pc, ci); return m_instructions.at(pc); } diff --git a/lib/passes/cfg.cpp b/lib/passes/cfg.cpp index ff04af4..ee5ad3b 100644 --- a/lib/passes/cfg.cpp +++ b/lib/passes/cfg.cpp @@ -19,14 +19,18 @@ CfgPass::CfgPass(Disassembly &disasm): void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) { + /* Don't explore successors if the instruction cannot be decoded, not + even pc+2. This will prevent wild overshoot. */ + if(!ci.inst) return; + /* Compute the jump target for jump instructions. This is easy because they are all trivially computable. (...If they are not we dub them "terminal" to avoid the computation!) */ uint32_t jmptarget = 0xffffffff; - if(ci.inst.isjump() || ci.inst.iscondjump()) + if(ci.inst->isjump() || ci.inst->iscondjump()) { - auto &args = ci.inst.args; + auto &args = ci.inst->args; if(args.size() != 1 || args[0].kind != Argument::PcJump) throw LangError(pc, "invalid jump instruction"); @@ -47,15 +51,15 @@ void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) set in the properties. */ if(ci.delayslot) { - if(!ci.inst.isvaliddelayslot()) + if(!ci.inst->isvaliddelayslot()) throw LangError(pc, "invalid delay slot"); } /* Handle normal instructions */ - else if(!ci.inst.isdelayed()) + else if(!ci.inst->isdelayed()) { - ci.terminal = ci.inst.isterminal(); - ci.jump = ci.inst.isjump(); - ci.condjump = ci.inst.iscondjump(); + ci.terminal = ci.inst->isterminal(); + ci.jump = ci.inst->isjump(); + ci.condjump = ci.inst->iscondjump(); ci.jmptarget = jmptarget; } /* Create a new delay slot */ @@ -64,13 +68,13 @@ void CfgPass::analyze(uint32_t pc, ConcreteInstruction &ci) ConcreteInstruction &slot = m_disasm.readins(pc+2); if(slot.leader) throw LimitError("leader in a delay slot!"); - if(!slot.inst.isvaliddelayslot()) + if(!slot.inst->isvaliddelayslot()) throw LangError(pc+2, "invalid delay slot"); slot.delayslot = true; - slot.terminal = ci.inst.isterminal(); - slot.jump = ci.inst.isjump(); - slot.condjump = ci.inst.iscondjump(); + slot.terminal = ci.inst->isterminal(); + slot.jump = ci.inst->isjump(); + slot.condjump = ci.inst->iscondjump(); slot.jmptarget = jmptarget; } diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index 4846c07..c5d832a 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -13,14 +13,16 @@ PcrelPass::PcrelPass(Disassembly &disasm): void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) { - Instruction const &i = ci.inst; + Instruction const *i = ci.inst; + if(!i) return; - for(size_t n = 0; n < i.args.size(); n++) + for(size_t n = 0; n < i->args.size(); n++) { - Argument const &a = i.args[n]; + Argument const &a = i->args[n]; ConcreteInstructionArg &ca = ci.args[n]; - if(a.kind == Argument::PcRel && (i.opsize==2 || i.opsize==4)) + if(a.kind == Argument::PcRel && + (i->opsize == 2 || i->opsize == 4)) { uint32_t addr = (pc & ~(a.opsize - 1)) + 4 + a.disp; ca.location = RelConstDomain().constant(addr); @@ -31,8 +33,8 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) Target &t = m_disasm.target(); uint32_t v = -1; - if(i.opsize == 2) v = t.read_i16(addr); - if(i.opsize == 4) v = t.read_i32(addr); + if(i->opsize == 2) v = t.read_i16(addr); + if(i->opsize == 4) v = t.read_i32(addr); ca.value = DataValue(IntegerType::u32); ca.value.write(0, 4, v); diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index c91e2a4..55515a2 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -25,7 +25,7 @@ PrintPass::PrintPass(Disassembly &disasm, void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) { - Instruction const &i = ci.inst; + Instruction const *i = ci.inst; /* Preliminary syscall number */ @@ -40,22 +40,31 @@ void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) printf(">\n"); } + /* Raw data if instruction cannot be decoded */ + + printf(" %08x: %04x", pc, (i ? i->opcode : ci.opcode)); + if(!i) + { + printf("\n"); + return; + } + /* Mnemonic */ static std::map suffixes = { { 1, ".b" }, { 2, ".w" }, { 4, ".l" } }; - std::string mnemonic = i.mnemonic + suffixes[i.opsize]; - if(i.args.size()) + std::string mnemonic = i->mnemonic + suffixes[i->opsize]; + if(i->args.size()) mnemonic += std::string(8 - mnemonic.size(), ' '); - printf(" %08x: %04x %s", pc, ci.inst.opcode, mnemonic.c_str()); + printf(" %s", mnemonic.c_str()); /* Arguments */ - for(size_t n = 0; n < i.args.size(); n++) + for(size_t n = 0; n < i->args.size(); n++) { - Argument const &a = i.args[n]; + Argument const &a = i->args[n]; ConcreteInstructionArg const &arg = ci.args[n]; if(n) printf(", "); diff --git a/lib/passes/syscall.cpp b/lib/passes/syscall.cpp index f81e4b9..b2d0cd8 100644 --- a/lib/passes/syscall.cpp +++ b/lib/passes/syscall.cpp @@ -16,11 +16,12 @@ void SyscallPass::analyze([[maybe_unused]] uint32_t pc,ConcreteInstruction &ci) /* Nothing to do if no syscall table is provided! */ if(!m_os) return; - Instruction const &i = ci.inst; + Instruction const *i = ci.inst; + if(!i) return; - for(size_t n = 0; n < i.args.size(); n++) + for(size_t n = 0; n < i->args.size(); n++) { - Argument const &a = i.args[n]; + Argument const &a = i->args[n]; ConcreteInstructionArg &ca = ci.args[n]; bool eligible = false;