From ff2e9c207280084b5cc08f5aaa66c67fc6253917 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Mon, 13 Nov 2023 23:41:27 +0100 Subject: [PATCH] fxos: prototype analysis, C++20 views --- CMakeLists.txt | 3 +- include/fxos/ai/AbstractDomain.h | 5 + include/fxos/ai/RelConst.h | 3 + include/fxos/analysis.h | 121 ++++++++++++++++++++++ include/fxos/binary.h | 168 +++++++++++++++++++------------ include/fxos/function.h | 82 ++++++++++----- include/fxos/util/log.h | 2 +- lib/ai/RelConst.cpp | 37 +++++++ lib/analysis.cpp | 146 +++++++++++++++++++++++++++ lib/function.cpp | 104 +++++++++++++++++-- lib/util/bson.cpp | 1 + lib/view/assembly.cpp | 24 ++++- shell/a.cpp | 79 ++++++++++++++- shell/e.cpp | 3 +- shell/errors.h | 2 +- shell/i.cpp | 2 +- 16 files changed, 674 insertions(+), 108 deletions(-) create mode 100644 include/fxos/analysis.h create mode 100644 lib/analysis.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e37abc..1a253a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ find_package(FLEX 2.6) add_compile_options( -Wall -Wextra -D_GNU_SOURCE - $<$:-std=c++17> + $<$:-std=c++20> -fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}/= $<$:-O0> $<$:-g> $<$:-O2>) @@ -49,6 +49,7 @@ add_custom_command( DEPENDS lib/syscalls_cg.def) set(fxos_core_SOURCES + lib/analysis.cpp lib/binary.cpp lib/disassembly.cpp lib/lang.cpp diff --git a/include/fxos/ai/AbstractDomain.h b/include/fxos/ai/AbstractDomain.h index 3005c0d..d778906 100644 --- a/include/fxos/ai/AbstractDomain.h +++ b/include/fxos/ai/AbstractDomain.h @@ -28,6 +28,11 @@ template class AbstractDomain { public: + /* Lattice order, must be decidable */ + virtual bool le(T, T) const noexcept = 0; + /* Lattice operations */ + virtual T join(T, T) const noexcept = 0; + /* Bottom and Top constants */ virtual T bottom() const noexcept = 0; virtual T top() const noexcept = 0; diff --git a/include/fxos/ai/RelConst.h b/include/fxos/ai/RelConst.h index 7a0c1ed..aae1cb3 100644 --- a/include/fxos/ai/RelConst.h +++ b/include/fxos/ai/RelConst.h @@ -96,6 +96,9 @@ public: /* Implementation of the AbstractDomain specification */ + bool le(RelConst, RelConst) const noexcept override; + RelConst join(RelConst, RelConst) const noexcept override; + RelConst bottom() const noexcept override; RelConst top() const noexcept override; diff --git a/include/fxos/analysis.h b/include/fxos/analysis.h new file mode 100644 index 0000000..9ff466f --- /dev/null +++ b/include/fxos/analysis.h @@ -0,0 +1,121 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/analysis: Static analysis for assembler programs +// +// TODO: Designed to be an abstract interpreter, still WIP +//--- + +#ifndef FXOS_ANALYSIS_H +#define FXOS_ANALYSIS_H + +#include +#include +#include +#include +#include + +namespace FxOS { + +struct ProgramStateDiff; + +/* Full description of a program state for the analyzer, at a known PC. */ +struct ProgramState +{ + ProgramState() + { + setBottom(); + } + + RelConst getRegister(int n) const + { + return ((unsigned)n >= 16) ? RelConstDomain().bottom() : m_regs[n]; + } + + // TODO: More value in program state + + /* Set to initial program state at entry of function. */ + void setFunctionInit(); + /* Set to initial non-entry-block state at entry of function (all bot). */ + void setBottom(); + /* Apply a diff. */ + void applyDiff(ProgramStateDiff const &diff); + + /* Join with another program state. */ + void joinWith(ProgramState const &other); + /* Lattice order. */ + bool le(ProgramState const &other) const; + +private: + /* Values for registers r0..r15 */ + RelConst m_regs[16]; +}; + +/* Change in program state over a single (contextually known) instruction. */ +struct ProgramStateDiff +{ + enum class Target : int { None = -1, Unknown = -2 }; + + /* Number of the register that changes, or Target::*. */ + int target() const + { + return m_target; + } + /* New value for that register. */ + RelConst value() const + { + return m_value; + } + + // TODO: Needs way more flexibility + + /* Set the diff to changing register rn to new value v. */ + void setRegisterUpdate(int n, RelConst v) + { + m_target = n; + m_value = v; + } + /* Set the diff to changing register rn to an unknown value. */ + void setRegisterTouched(int n) + { + setRegisterUpdate(n, RelConstDomain().top()); + } + /* Set the diff to changing no register state. */ + void setNoop() + { + m_target = static_cast(Target::None); + } + /* Set the diff to unknown effect on registers. */ + void setUnknown() + { + m_target = static_cast(Target::Unknown); + } + +private: + int m_target; + RelConst m_value; +}; + +/* Function's storage of program states at every control point. */ +struct StaticFunctionAnalysis +{ + /* Information stored for each block */ + struct Block + { + ProgramState entry; + std::vector diffs; + }; + + std::vector blocks; +}; + +/* Analyze a function; returns analysis results if successful, a null pointer + on error. Does not store the results in f itself. */ +std::unique_ptr analyzeFunction(Function const &f); + +} // namespace FxOS + +#endif /* FXOS_ANALYSIS_H */ diff --git a/include/fxos/binary.h b/include/fxos/binary.h index 6ec6f12..a42cb4d 100644 --- a/include/fxos/binary.h +++ b/include/fxos/binary.h @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace FxOS { @@ -31,72 +32,6 @@ struct Mark; struct Variable; struct Function; -struct Binary -{ - /* Empty binary with an empty virtual space. */ - Binary() = default; - BSON serialize() const; - void deserialize(BSON const &); - - VirtualSpace &vspace() - { - return m_vspace; - } - VirtualSpace const &vspace() const - { - return m_vspace; - } - - /* OS analysis (performed on-demand). Returns the new or cached OS - analysis results, nullptr if analysis failed. */ - OS *OSAnalysis(bool force = false) const; - - // TODO: Platform information in a binary - // TODO: Implement OS analysis - // TODO: Add and manage objects - - std::multimap> const &objects() const - { - return m_objects; - } - - /* Add an object to the binary. */ - void addObject(std::unique_ptr &&obj); - - /* Return the address of an object by name, if it exists. If there are - multiple objects with the same name, returns an arbitrary one. */ - std::optional objectAddress(std::string const &name) const; - - /* Return the address of an object defined at the specified address if - there is one, nullptr otherwise. If multiple objects are defined at the - specified address, an arbitrary one is returned. */ - BinaryObject *objectAt(u32 address); - BinaryObject const *objectAt(u32 address) const; - - /* Returns the list of all objects defined at the specified address. */ - std::vector objectsAt(u32 address); - std::vector objectsAt(u32 address) const; - - /* Locate all objects that intersect an address. */ - std::vector objectsCovering(u32 address); - std::vector objectsCovering(u32 address) const; - - /* Return one or all functions defined at a given address. */ - Function *functionAt(u32 address); - Function const *functionAt(u32 address) const; - std::vector functionsAt(u32 address); - std::vector functionsAt(u32 address) const; - -private: - VirtualSpace m_vspace; - - /* OS analysis results */ - mutable std::unique_ptr m_os; - - /* All binary objects */ - std::multimap> m_objects; -}; - /* Base structure for all /binary objets/, ie. program objects that can be declared in the program space. */ struct BinaryObject @@ -202,6 +137,107 @@ private: std::string m_comment; }; +struct Binary +{ + /* Empty binary with an empty virtual space. */ + Binary() = default; + BSON serialize() const; + void deserialize(BSON const &); + + VirtualSpace &vspace() + { + return m_vspace; + } + VirtualSpace const &vspace() const + { + return m_vspace; + } + + /* OS analysis (performed on-demand). Returns the new or cached OS + analysis results, nullptr if analysis failed. */ + OS *OSAnalysis(bool force = false) const; + + // TODO: Platform information in a binary + // TODO: Implement OS analysis + // TODO: Add and manage objects + + std::multimap> const &objects() const + { + return m_objects; + } + + auto functions() // -> [Function &] + { + return std::views::all(m_objects) | std::views::filter([](auto &pair) { + return pair.second->isFunction(); + }) | std::views::transform([](auto &pair) -> Function & { + return pair.second->getFunction(); + }); + } + auto functions() const // -> [Function const &] + { + return std::views::all(m_objects) | std::views::filter([](auto &pair) { + return pair.second->isFunction(); + }) | std::views::transform([](auto &pair) -> Function const & { + return pair.second->getFunction(); + }); + } + + /* Iterator on only variables; yields a series of Variable [const] &. */ + auto variables() + { + return std::views::all(m_objects) | std::views::filter([](auto &pair) { + return pair.second->isVariable(); + }) | std::views::transform([](auto &pair) -> Variable & { + return pair.second->getVariable(); + }); + } + auto variables() const + { + return std::views::all(m_objects) | std::views::filter([](auto &pair) { + return pair.second->isVariable(); + }) | std::views::transform([](auto &pair) -> Variable const & { + return pair.second->getVariable(); + }); + } + + /* Add an object to the binary. */ + void addObject(std::unique_ptr &&obj); + + /* Return the address of an object by name, if it exists. If there are + multiple objects with the same name, returns an arbitrary one. */ + std::optional objectAddress(std::string const &name) const; + + /* Return the address of an object defined at the specified address if + there is one, nullptr otherwise. If multiple objects are defined at the + specified address, an arbitrary one is returned. */ + BinaryObject *objectAt(u32 address); + BinaryObject const *objectAt(u32 address) const; + + /* Returns the list of all objects defined at the specified address. */ + std::vector objectsAt(u32 address); + std::vector objectsAt(u32 address) const; + + /* Locate all objects that intersect an address. */ + std::vector objectsCovering(u32 address); + std::vector objectsCovering(u32 address) const; + + /* Return one or all functions defined at a given address. */ + Function *functionAt(u32 address); + Function const *functionAt(u32 address) const; + std::vector functionsAt(u32 address); + std::vector functionsAt(u32 address) const; + +private: + VirtualSpace m_vspace; + + /* OS analysis results */ + mutable std::unique_ptr m_os; + + /* All binary objects */ + std::multimap> m_objects; +}; + /* Basic, unattributed binary object used to mark out regions of code (eg. "kernel", "timer driver", "LINK app", "zero", "interrupt handlers"...). May alias with other binary objects. */ diff --git a/include/fxos/function.h b/include/fxos/function.h index 3b869e1..69ae6db 100644 --- a/include/fxos/function.h +++ b/include/fxos/function.h @@ -49,14 +49,22 @@ struct Function: public BinaryObject return m_blocks[index]; } + /* Get basic block by its address. */ + BasicBlock &basicBlockByAddress(u32 pc); + BasicBlock const &basicBlockByAddress(u32 pc) const; + /* Get the entry block. */ + int entryBlockIndex() const + { + return m_entryBlockIndex; + } BasicBlock &entryBlock() { - return basicBlockByIndex(0); + return m_blocks[m_entryBlockIndex]; } BasicBlock const &entryBlock() const { - return basicBlockByIndex(0); + return m_blocks[m_entryBlockIndex]; } /* Iterators over basic blocks. */ @@ -87,7 +95,8 @@ struct Function: public BinaryObject /* Construction functions to be used only by the analysis pass. */ bool exploreFunctionAt(u32 address); - BasicBlock &addBasicBlock(BasicBlock &&bb); + BasicBlock &createBasicBlock(u32 address, bool isEntryBlock); + void sortBasicBlocks(); void updateFunctionSize(); void setAnalysisVersion(int version); @@ -96,6 +105,8 @@ private: std::vector m_blocks; /* Analysis version */ int m_analysisVersion = 0; + /* ID of the entry block */ + int m_entryBlockIndex; }; /* Basic block within a function. */ @@ -134,19 +145,19 @@ struct BasicBlock /* Binary and function that own the basic block. */ Binary &parentBinary() { - return m_function.parentBinary(); + return m_function.get().parentBinary(); } Binary const &parentBinary() const { - return m_function.parentBinary(); + return m_function.get().parentBinary(); } Function &parentFunction() { - return m_function; + return m_function.get(); } Function const &parentFunction() const { - return m_function; + return m_function.get(); } /* Block's index within function. */ @@ -279,28 +290,52 @@ struct BasicBlock /* CFG navigation. */ - std::vector const &successors() + auto successors() // -> [BasicBlock &] + { + return std::views::all(m_successors) + | std::views::transform([this](int index) { + return parentFunction().basicBlockByIndex(index); + }); + } + auto successors() const // -> [BasicBlock const &] + { + return std::views::all(m_successors) + | std::views::transform([this](int index) { + return parentFunction().basicBlockByIndex(index); + }); + } + std::vector const &successorsByIndex() const { return m_successors; } - std::vector successors() const + auto successorsByAddress() const // -> [u32] { - std::vector succ(m_successors.size()); - for(auto *bb: m_successors) - succ.push_back(bb); - return succ; + return successors() + | std::views::transform([](auto bb) { return bb.address(); }); } - std::vector const &predecessors() + auto predecessors() // -> [BasicBlock &] + { + return std::views::all(m_predecessors) + | std::views::transform([this](int index) { + return parentFunction().basicBlockByIndex(index); + }); + } + auto predecessors() const // -> [BasicBlock const &] + { + return std::views::all(m_predecessors) + | std::views::transform([this](int index) { + return parentFunction().basicBlockByIndex(index); + }); + } + std::vector const &predecessorsByIndex() const { return m_predecessors; } - std::vector predecessors() const + auto predecessorsByAddress() const // -> [u32] { - std::vector pred(m_predecessors.size()); - for(auto *bb: m_predecessors) - pred.push_back(bb); - return pred; + return predecessors() + | std::views::transform([](auto bb) { return bb.address(); }); } uint successorCount() const @@ -315,14 +350,15 @@ struct BasicBlock /* Construction functions to be used only by the cfg pass. */ void addInstruction(Instruction &&insn); void finalizeBlock(); - // TODO: Set successors and predecessors + void addSuccessor(BasicBlock *succ); + void addPredecessor(BasicBlock *pred); private: - Function &m_function; + std::reference_wrapper m_function; std::vector m_instructions; /* TODO: More compact storage for CFG edges, especially successors (≤ 2) */ - std::vector m_successors; - std::vector m_predecessors; + std::vector m_successors; + std::vector m_predecessors; u32 m_address; u32 m_flags; }; diff --git a/include/fxos/util/log.h b/include/fxos/util/log.h index 7f3797a..207d25b 100644 --- a/include/fxos/util/log.h +++ b/include/fxos/util/log.h @@ -45,6 +45,6 @@ void logmsg(int level, char const *file, int line, char const *func, #define FxOS_log(level, fmt, ...) \ FxOS::logmsg(FxOS::LOG_LEVEL_##level, __FILE__, __LINE__, __func__, \ - format(fmt, ##__VA_ARGS__)) + ::format(fmt, ##__VA_ARGS__)) #endif /* FXOS_UTIL_LOG_H */ diff --git a/lib/ai/RelConst.cpp b/lib/ai/RelConst.cpp index b92c1e8..604a4bb 100644 --- a/lib/ai/RelConst.cpp +++ b/lib/ai/RelConst.cpp @@ -28,6 +28,43 @@ auto constexpr Bottom = RelConst::Bottom; return bottom(); \ } +//--- +// Lattice basics +//--- + +bool RelConstDomain::le(RelConst r1, RelConst r2) const noexcept +{ + if(r1.spe == Bottom || r2.spe == Top) + return true; + if(r1.spe == Top || r2.spe == Bottom) + return false; + + /* Since this domain is non-relational, differing bases don't compare */ + if(r1.base != r2.base) + return false; + + /* And since offsets are also constants, they must match */ + if(r1.uval != r2.uval) + return false; + + /* Yes, this domain is pretty flat. It's just weirdly shaped constants. */ + return true; +} + +RelConst RelConstDomain::join(RelConst r1, RelConst r2) const noexcept +{ + if(r1.spe == Bottom || r2.spe == Top) + return r2; + if(r1.spe == Top || r2.spe == Bottom) + return r1; + + if(r1.base != r2.base || r1.uval != r2.uval) + return top(); + + /* r1 = r2 */ + return r1; +} + RelConst RelConstDomain::bottom() const noexcept { RelConst b {}; diff --git a/lib/analysis.cpp b/lib/analysis.cpp new file mode 100644 index 0000000..537bc11 --- /dev/null +++ b/lib/analysis.cpp @@ -0,0 +1,146 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// + +#include +#include + +namespace FxOS { + +void ProgramState::setFunctionInit() +{ + // TODO: Analysis: Set symbolic parameters at function entry + for(int i = 0; i < 16; i++) + m_regs[i] = RelConstDomain().top(); +} + +void ProgramState::setBottom() +{ + for(int i = 0; i < 16; i++) + m_regs[i] = RelConstDomain().bottom(); +} + +void ProgramState::applyDiff(ProgramStateDiff const &diff) +{ + RelConstDomain RCD; + int t = diff.target(); + + if(t == static_cast(ProgramStateDiff::Target::None)) { + /* Nothing */ + } + else if(t == static_cast(ProgramStateDiff::Target::Unknown)) { + for(int i = 0; i < 16; i++) + m_regs[i] = RCD.top(); + } + else { + assert((unsigned)t < 16 && "invalid register target"); + m_regs[t] = diff.value(); + } +} + +void ProgramState::joinWith(ProgramState const &other) +{ + RelConstDomain RCD; + + for(int i = 0; i < 16; i++) { + m_regs[i] = RCD.join(m_regs[i], other.getRegister(i)); + } +} + +bool ProgramState::le(ProgramState const &other) const +{ + RelConstDomain RCD; + + for(int i = 0; i < 16; i++) { + if(!RCD.le(m_regs[i], other.getRegister(i))) + return false; + } + return true; +} + +/* Information stored for each block during the fixpoint iteration */ +struct BlockStates +{ + ProgramState entry; + std::vector diffs; + ProgramState exit; + ProgramState nextEntry; +}; + +static ProgramStateDiff interpretInstruction( + Instruction const &ins, ProgramState const &PS) +{ +} + +static void interpretBlock(BasicBlock const &bb, BlockStates &states) +{ + ProgramState PS(states.entry); + states.diffs.clear(); + + for(Instruction const &i: bb) { + ProgramStateDiff diff = interpretInstruction(i, PS); + states.diffs.push_back(diff); + PS.applyDiff(diff); + } + + states.exit = PS; +} + +std::unique_ptr analyzeFunction(Function const &f) +{ + std::vector VBS; + + /* Initialize all blocks' entry states */ + for(uint i = 0; i < f.blockCount(); i++) { + BlockStates BS; + if(i == 0) + BS.entry.setFunctionInit(); + else + BS.entry.setBottom(); + VBS.push_back(BS); + } + + /* The naive iteration strategy */ + while(true) { + /* Interpret all blocks on their current states */ + for(uint i = 0; i < f.blockCount(); i++) + interpretBlock(f.basicBlockByIndex(i), VBS[i]); + + /* Compute the next entry state for each block */ + for(uint i = 0; i < f.blockCount(); i++) { + BasicBlock const &bb = f.basicBlockByIndex(i); + VBS[i].nextEntry.setBottom(); + + for(auto succ: bb.successors()) + VBS[i].nextEntry.joinWith(VBS[succ.blockIndex()].exit); + } + + /* Determine whether a fixpoint has been reached yet */ + bool pfp = std::all_of(VBS.begin(), VBS.end(), + [](BlockStates &BS) { return BS.nextEntry.le(BS.entry); }); + + if(pfp) + break; + + /* Switch to next state */ + for(uint i = 0; i < f.blockCount(); i++) + VBS[i].entry = VBS[i].nextEntry; + } + + auto an = std::make_unique(); + for(uint i = 0; i < f.blockCount(); i++) { + StaticFunctionAnalysis::Block B; + B.entry = VBS[i].entry; + B.diffs = std::move(VBS[i].diffs); + an->blocks.push_back(std::move(B)); + } + + return an; +} + +// TODO_need_bb_successors; + +} /* namespace FxOS */ diff --git a/lib/function.cpp b/lib/function.cpp index 4f38126..2ffbba6 100644 --- a/lib/function.cpp +++ b/lib/function.cpp @@ -18,17 +18,58 @@ Function::Function(Binary &binary, u32 address): { /* Size is not determined at first. */ + /* Entry block index is not determined at first. */ + m_entryBlockIndex = -1; + /* Default unambiguous name */ setName(format("fun.%08x", address)); } -/* Add a basic block to the function. The entry block must be added first. */ -BasicBlock &Function::addBasicBlock(BasicBlock &&bb) +BasicBlock &Function::basicBlockByAddress(u32 pc) { - m_blocks.push_back(bb); + for(BasicBlock &bb: *this) { + if(bb.address() == pc) + return bb; + } + assert(false && "not the address of a basic block in this function"); + __builtin_unreachable(); +} + +BasicBlock const &Function::basicBlockByAddress(u32 pc) const +{ + for(BasicBlock const &bb: *this) { + if(bb.address() == pc) + return bb; + } + assert(false && "not the address of a basic block in this function"); + __builtin_unreachable(); +} + +/* Create a new basic block and add it to the function. Invalidates previous + pointers to other blocks! */ +BasicBlock &Function::createBasicBlock(u32 address, bool isEntryBlock) +{ + assert(isEntryBlock == (address == this->address()) + && "inconsistent entry block specification in function"); + m_blocks.emplace_back(*this, address, isEntryBlock); return m_blocks.back(); } +/* Sorts blocks by address. Invalidates pointers to blocks. */ +void Function::sortBasicBlocks() +{ + std::sort(m_blocks.begin(), m_blocks.end(), + [](auto &bb1, auto &bb2) { return bb1.address() < bb2.address(); }); + + /* Update entry block index */ + for(uint i = 0; i < m_blocks.size(); i++) { + if(m_blocks[i].isEntryBlock()) { + m_entryBlockIndex = i; + break; + } + } +} + /* Update the function's BinaryObject size by finding the last address covered by any instruction in the function. */ void Function::updateFunctionSize() @@ -195,16 +236,18 @@ bool Function::exploreFunctionAt(u32 functionAddress) blocks.push_back(std::move(sb)); } + /* Successors by addresses, before we get the pointers */ + std::map> successorAddresses; + /* Cut superblocks. The loop on b.leaders schedules the construction of new BasicBlock objects but the iteration is really the multi-part do loop using the iterator on b.addresses. */ for(auto &b: blocks) { auto it = b.addresses.begin(); - for(u32 _: b.leaders) { - (void)_; - BasicBlock bb0(*this, *it, *it == functionAddress); - BasicBlock &bb = addBasicBlock(std::move(bb0)); + for(u32 pc: b.leaders) { + assert(pc == *it); + BasicBlock &bb = createBasicBlock(*it, *it == functionAddress); do { // TODO: Support 32-bit instructions @@ -216,11 +259,35 @@ bool Function::exploreFunctionAt(u32 functionAddress) while(it != b.addresses.end() && !b.leaders.count(*it)); bb.finalizeBlock(); + successorAddresses[pc]; + + /* Find successors: either superblock's successors at end of + superblock, or next block in the same superblock */ + if(it == b.addresses.end()) { + if(b.staticTarget + 1) + successorAddresses[pc].push_back(b.staticTarget); + if(b.fallthroughTarget + 1) + successorAddresses[pc].push_back(b.fallthroughTarget); + } + else { + successorAddresses[pc].push_back(*it); + } + + // TODO: Set successors } } - // TODO: Set successors and predecessors + sortBasicBlocks(); + for(auto &[pc, succ]: successorAddresses) { + BasicBlock &bb = basicBlockByAddress(pc); + for(u32 a: succ) + bb.addSuccessor(&basicBlockByAddress(a)); + } + + // TODO: Set predecessors + + updateFunctionSize(); return true; } @@ -235,12 +302,13 @@ BasicBlock::BasicBlock(Function &function, u32 address, bool isEntryBlock): uint BasicBlock::blockIndex() const { - for(uint i = 0; i < m_function.blockCount(); i++) { - BasicBlock &bb = m_function.basicBlockByIndex(i); + for(uint i = 0; i < parentFunction().blockCount(); i++) { + BasicBlock const &bb = parentFunction().basicBlockByIndex(i); if(&bb == this) return i; } assert(false && "blockIndex: block not in its own parent"); + __builtin_unreachable(); } bool BasicBlock::mayFallthrough() const @@ -290,6 +358,8 @@ void BasicBlock::finalizeBlock() for(Instruction &insn: *this) { bool isReturn = insn.opcode().isBlockTerminator(); assert(!(term && isReturn) && "bb with multiple terminators"); + if(isReturn) + term = &insn; } /* The block must have a delay slot iff the terminator has one. */ @@ -307,7 +377,7 @@ void BasicBlock::finalizeBlock() m_flags |= Flags::HasDelaySlot; if(!term) m_flags |= Flags::NoTerminator; - if(term && term->opcode().isReturn()) + if(term && (term->opcode().isReturn() || term->opcode().isDynamicJump())) m_flags |= Flags::IsTerminator; if(hasDelaySlot) { @@ -316,6 +386,18 @@ void BasicBlock::finalizeBlock() } } +void BasicBlock::addSuccessor(BasicBlock *succ) +{ + assert(&succ->parentFunction() == &parentFunction()); + m_successors.push_back(succ->blockIndex()); +} + +void BasicBlock::addPredecessor(BasicBlock *pred) +{ + assert(&pred->parentFunction() == &parentFunction()); + m_predecessors.push_back(pred->blockIndex()); +} + //=== Instruction ===// Instruction::Instruction(Function &function, u32 address, u32 opcode): diff --git a/lib/util/bson.cpp b/lib/util/bson.cpp index 7e99e4e..4d8b906 100644 --- a/lib/util/bson.cpp +++ b/lib/util/bson.cpp @@ -139,6 +139,7 @@ BSON BSON::clone() const } assert(false && "BSON::clone: unsupported type"); + __builtin_unreachable(); } void BSON::dump(FILE *fp, int depth, bool noindent) const diff --git a/lib/view/assembly.cpp b/lib/view/assembly.cpp index c36bd90..f670fb1 100644 --- a/lib/view/assembly.cpp +++ b/lib/view/assembly.cpp @@ -293,7 +293,9 @@ static std::string objectsAt( objects.push_back(fmt::format("+{}", unnamed)); return std::accumulate(objects.begin(), objects.end(), std::string {}, - [](auto &l, auto const r) { return l + (l.empty() ? "" : " ") + r; }); + [](auto l, auto const r) { + return std::move(l) + (l.empty() ? "" : " ") + r; + }); } void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts) @@ -308,6 +310,26 @@ void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts) } printf(":\n"); + printf(" Successors:"); + for(u32 succ: bb.successorsByAddress()) + printf(" bb.%08x", succ); + if(bb.successorCount() == 0) + printf(" (none)"); + printf("\n"); + + printf(" Flags:"); + if(bb.isEntryBlock()) + printf(" IsEntryBlock"); + if(bb.isTerminator()) + printf(" IsTerminator"); + if(bb.hasDelaySlot()) + printf(" HasDelaySlot"); + if(bb.hasNoTerminator()) + printf(" NoTerminator"); + if(!(bb.getFlags() & BasicBlock::ValidFlags)) + printf(" (none)"); + printf("\n"); + for(Instruction const &ins: bb) viewAssemblyInstruction(ins, opts); diff --git a/shell/a.cpp b/shell/a.cpp index 5ecb700..51b4c73 100644 --- a/shell/a.cpp +++ b/shell/a.cpp @@ -43,6 +43,77 @@ static _af_args parse_af(Session &session, Parser &parser) return args; } +template +concept range_of + = std::ranges::range && std::same_as, T>; + +// TODO: Move this visualization function (also put spacing as a feature) +template + requires(range_of) +void viewStrings(R range, int maxColumns = 80) +{ + int columns = 0; + + for(std::string const &str: range) { + int length = str.size(); + if(columns != 0 && columns + length > maxColumns) { + fmt::print("\n"); + columns = 0; + } + + fmt::print("{}", str); + columns += length; + } + + if(columns > 0) + fmt::print("\n"); +} +template +void viewStrings(R range, F fun, int maxColumns = 80) +{ + return viewStrings( + std::views::all(range) | std::views::transform(fun), maxColumns); +} + +static void _af_consistency(Binary const &binary) +{ + /* List of functions with blocks before the function's entry point */ + std::vector earlyBlocks; + /* List of functions with no returning block */ + std::vector noReturn; + + for(Function const &f: binary.functions()) { + if(std::any_of(f.begin(), f.end(), + [&f](auto &bb) { return bb.address() < f.address(); })) + earlyBlocks.push_back(&f); + + if(!std::any_of(f.begin(), f.end(), + [&f](auto &bb) { return bb.isTerminator(); })) + noReturn.push_back(&f); + } + + if(earlyBlocks.size() > 0) { + fmt::print("{} functions have blocks before their entry point:\n", + earlyBlocks.size()); + viewStrings(earlyBlocks, [](Function const *f) { + if(f->name().size() > 0) + return fmt::format(" {}", f->name()); + else + return fmt::format(" fun.{:08x}", f->address()); + }); + } + + if(noReturn.size() > 0) { + fmt::print("{} functions do not return:\n", noReturn.size()); + viewStrings(noReturn, [](Function const *f) { + if(f->name().size() > 0) + return fmt::format(" {}", f->name()); + else + return fmt::format(" fun.{:08x}", f->address()); + }); + } +} + static void af_analyze(Binary &binary, _af_args const &args) { int successes = 0, skipped = 0, errors = 0; @@ -82,7 +153,7 @@ static void af_analyze(Binary &binary, _af_args const &args) printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n", successes, skipped, errors, timer.format_time().c_str()); - /* TODO: Check for overlapping functions etc */ + _af_consistency(binary); } void _af(Session &session, _af_args const &args) @@ -236,6 +307,7 @@ static ShellCommand _af_cmd( }, [](Session &s, Parser &p) { parse_af(s, p); }, "Analysis: Functions", R"( af [-u|--force] [-n ] [...] +af -c Explore and disassemble functions starting at the specified addresses. For each explored function, a binary object of Function type is created, and the @@ -249,6 +321,11 @@ information. When a single address is given, -n can specify the name of the function object to be created. + +With -c (consistency), does not create a new function, but checks the +consistency of the set of functions defined in the current binary, and reports +any suspicious occurrences, such as functions with blocks before the entry +address, overlapping functions, functions with no rts, etc. )"); static ShellCommand _afs_cmd( diff --git a/shell/e.cpp b/shell/e.cpp index 37c96b9..aa9c55e 100644 --- a/shell/e.cpp +++ b/shell/e.cpp @@ -53,8 +53,7 @@ void _e(Session &, std::string, std::vector const &values) /* Hexa format */ int length = (print_val <= (1ll << 32) ? 8 : 16) + 2 + (value < 0); - std::string format = fmt::format("{{:#0{}x}}", length); - fmt::print(format, value); + fmt::print("{:#0{}x}", value, length); if(print_val <= 100 || print_val % 100 <= 1 || print_val % 100 >= 99) fmt::print(" = {}", print_val); diff --git a/shell/errors.h b/shell/errors.h index 82edc8c..497fa78 100644 --- a/shell/errors.h +++ b/shell/errors.h @@ -19,7 +19,7 @@ public: /* Build directly from format arguments */ template CommandError(std::string const &format_str, Args&&... args): - m_what(fmt::format(format_str, args...)) {} + m_what(fmt::format(fmt::runtime(format_str), args...)) {} char const *what() const noexcept override { return m_what.c_str(); diff --git a/shell/i.cpp b/shell/i.cpp index be1700e..b3b07d2 100644 --- a/shell/i.cpp +++ b/shell/i.cpp @@ -405,7 +405,7 @@ static struct _isc_args parse_isc(Session &session, Parser &parser) binary = session.project().getBinary(args.binary_name); if(!binary) { std::string msg - = format("No binary “%s” in project!", args.binary_name); + = ::format("No binary “%s” in project!", args.binary_name); if(parser.completing()) throw Parser::CompletionRequest("_error", msg); else