fxos: prototype analysis, C++20 views
This commit is contained in:
parent
d65515b9ad
commit
ff2e9c2072
|
@ -4,7 +4,7 @@ find_package(FLEX 2.6)
|
|||
|
||||
add_compile_options(
|
||||
-Wall -Wextra -D_GNU_SOURCE
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-std=c++20>
|
||||
-fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}/=
|
||||
$<$<CONFIG:Debug>:-O0> $<$<CONFIG:Debug>:-g>
|
||||
$<$<CONFIG:Release>:-O2>)
|
||||
|
@ -49,6 +49,7 @@ add_custom_command(
|
|||
DEPENDS lib/syscalls_cg.def)
|
||||
|
||||
set(fxos_core_SOURCES
|
||||
lib/analysis.cpp
|
||||
lib/binary.cpp
|
||||
lib/disassembly.cpp
|
||||
lib/lang.cpp
|
||||
|
|
|
@ -28,6 +28,11 @@ template<typename T>
|
|||
class AbstractDomain
|
||||
{
|
||||
public:
|
||||
/* Lattice order, must be decidable */
|
||||
virtual bool le(T, T) const noexcept = 0;
|
||||
/* Lattice operations */
|
||||
virtual T join(T, T) const noexcept = 0;
|
||||
|
||||
/* Bottom and Top constants */
|
||||
virtual T bottom() const noexcept = 0;
|
||||
virtual T top() const noexcept = 0;
|
||||
|
|
|
@ -96,6 +96,9 @@ public:
|
|||
|
||||
/* Implementation of the AbstractDomain specification */
|
||||
|
||||
bool le(RelConst, RelConst) const noexcept override;
|
||||
RelConst join(RelConst, RelConst) const noexcept override;
|
||||
|
||||
RelConst bottom() const noexcept override;
|
||||
RelConst top() const noexcept override;
|
||||
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// 1100101 |_ mov #0, r4 __ //
|
||||
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
||||
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
||||
// |_ base# + offset |_| /_\_\___/__/ //
|
||||
//---------------------------------------------------------------------------//
|
||||
// fxos/analysis: Static analysis for assembler programs
|
||||
//
|
||||
// TODO: Designed to be an abstract interpreter, still WIP
|
||||
//---
|
||||
|
||||
#ifndef FXOS_ANALYSIS_H
|
||||
#define FXOS_ANALYSIS_H
|
||||
|
||||
#include <fxos/util/types.h>
|
||||
#include <fxos/ai/RelConst.h>
|
||||
#include <fxos/function.h>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
struct ProgramStateDiff;
|
||||
|
||||
/* Full description of a program state for the analyzer, at a known PC. */
|
||||
struct ProgramState
|
||||
{
|
||||
ProgramState()
|
||||
{
|
||||
setBottom();
|
||||
}
|
||||
|
||||
RelConst getRegister(int n) const
|
||||
{
|
||||
return ((unsigned)n >= 16) ? RelConstDomain().bottom() : m_regs[n];
|
||||
}
|
||||
|
||||
// TODO: More value in program state
|
||||
|
||||
/* Set to initial program state at entry of function. */
|
||||
void setFunctionInit();
|
||||
/* Set to initial non-entry-block state at entry of function (all bot). */
|
||||
void setBottom();
|
||||
/* Apply a diff. */
|
||||
void applyDiff(ProgramStateDiff const &diff);
|
||||
|
||||
/* Join with another program state. */
|
||||
void joinWith(ProgramState const &other);
|
||||
/* Lattice order. */
|
||||
bool le(ProgramState const &other) const;
|
||||
|
||||
private:
|
||||
/* Values for registers r0..r15 */
|
||||
RelConst m_regs[16];
|
||||
};
|
||||
|
||||
/* Change in program state over a single (contextually known) instruction. */
|
||||
struct ProgramStateDiff
|
||||
{
|
||||
enum class Target : int { None = -1, Unknown = -2 };
|
||||
|
||||
/* Number of the register that changes, or Target::*. */
|
||||
int target() const
|
||||
{
|
||||
return m_target;
|
||||
}
|
||||
/* New value for that register. */
|
||||
RelConst value() const
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
// TODO: Needs way more flexibility
|
||||
|
||||
/* Set the diff to changing register rn to new value v. */
|
||||
void setRegisterUpdate(int n, RelConst v)
|
||||
{
|
||||
m_target = n;
|
||||
m_value = v;
|
||||
}
|
||||
/* Set the diff to changing register rn to an unknown value. */
|
||||
void setRegisterTouched(int n)
|
||||
{
|
||||
setRegisterUpdate(n, RelConstDomain().top());
|
||||
}
|
||||
/* Set the diff to changing no register state. */
|
||||
void setNoop()
|
||||
{
|
||||
m_target = static_cast<int>(Target::None);
|
||||
}
|
||||
/* Set the diff to unknown effect on registers. */
|
||||
void setUnknown()
|
||||
{
|
||||
m_target = static_cast<int>(Target::Unknown);
|
||||
}
|
||||
|
||||
private:
|
||||
int m_target;
|
||||
RelConst m_value;
|
||||
};
|
||||
|
||||
/* Function's storage of program states at every control point. */
|
||||
struct StaticFunctionAnalysis
|
||||
{
|
||||
/* Information stored for each block */
|
||||
struct Block
|
||||
{
|
||||
ProgramState entry;
|
||||
std::vector<ProgramStateDiff> diffs;
|
||||
};
|
||||
|
||||
std::vector<Block> blocks;
|
||||
};
|
||||
|
||||
/* Analyze a function; returns analysis results if successful, a null pointer
|
||||
on error. Does not store the results in f itself. */
|
||||
std::unique_ptr<StaticFunctionAnalysis> analyzeFunction(Function const &f);
|
||||
|
||||
} // namespace FxOS
|
||||
|
||||
#endif /* FXOS_ANALYSIS_H */
|
|
@ -21,6 +21,7 @@
|
|||
#include <fxos/vspace.h>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <ranges>
|
||||
#include <map>
|
||||
|
||||
namespace FxOS {
|
||||
|
@ -31,72 +32,6 @@ struct Mark;
|
|||
struct Variable;
|
||||
struct Function;
|
||||
|
||||
struct Binary
|
||||
{
|
||||
/* Empty binary with an empty virtual space. */
|
||||
Binary() = default;
|
||||
BSON serialize() const;
|
||||
void deserialize(BSON const &);
|
||||
|
||||
VirtualSpace &vspace()
|
||||
{
|
||||
return m_vspace;
|
||||
}
|
||||
VirtualSpace const &vspace() const
|
||||
{
|
||||
return m_vspace;
|
||||
}
|
||||
|
||||
/* OS analysis (performed on-demand). Returns the new or cached OS
|
||||
analysis results, nullptr if analysis failed. */
|
||||
OS *OSAnalysis(bool force = false) const;
|
||||
|
||||
// TODO: Platform information in a binary
|
||||
// TODO: Implement OS analysis
|
||||
// TODO: Add and manage objects
|
||||
|
||||
std::multimap<u32, std::unique_ptr<BinaryObject>> const &objects() const
|
||||
{
|
||||
return m_objects;
|
||||
}
|
||||
|
||||
/* Add an object to the binary. */
|
||||
void addObject(std::unique_ptr<BinaryObject> &&obj);
|
||||
|
||||
/* Return the address of an object by name, if it exists. If there are
|
||||
multiple objects with the same name, returns an arbitrary one. */
|
||||
std::optional<u32> objectAddress(std::string const &name) const;
|
||||
|
||||
/* Return the address of an object defined at the specified address if
|
||||
there is one, nullptr otherwise. If multiple objects are defined at the
|
||||
specified address, an arbitrary one is returned. */
|
||||
BinaryObject *objectAt(u32 address);
|
||||
BinaryObject const *objectAt(u32 address) const;
|
||||
|
||||
/* Returns the list of all objects defined at the specified address. */
|
||||
std::vector<BinaryObject *> objectsAt(u32 address);
|
||||
std::vector<BinaryObject const *> objectsAt(u32 address) const;
|
||||
|
||||
/* Locate all objects that intersect an address. */
|
||||
std::vector<BinaryObject *> objectsCovering(u32 address);
|
||||
std::vector<BinaryObject const *> objectsCovering(u32 address) const;
|
||||
|
||||
/* Return one or all functions defined at a given address. */
|
||||
Function *functionAt(u32 address);
|
||||
Function const *functionAt(u32 address) const;
|
||||
std::vector<Function *> functionsAt(u32 address);
|
||||
std::vector<Function const *> functionsAt(u32 address) const;
|
||||
|
||||
private:
|
||||
VirtualSpace m_vspace;
|
||||
|
||||
/* OS analysis results */
|
||||
mutable std::unique_ptr<OS> m_os;
|
||||
|
||||
/* All binary objects */
|
||||
std::multimap<u32, std::unique_ptr<BinaryObject>> m_objects;
|
||||
};
|
||||
|
||||
/* Base structure for all /binary objets/, ie. program objects that can be
|
||||
declared in the program space. */
|
||||
struct BinaryObject
|
||||
|
@ -202,6 +137,107 @@ private:
|
|||
std::string m_comment;
|
||||
};
|
||||
|
||||
struct Binary
|
||||
{
|
||||
/* Empty binary with an empty virtual space. */
|
||||
Binary() = default;
|
||||
BSON serialize() const;
|
||||
void deserialize(BSON const &);
|
||||
|
||||
VirtualSpace &vspace()
|
||||
{
|
||||
return m_vspace;
|
||||
}
|
||||
VirtualSpace const &vspace() const
|
||||
{
|
||||
return m_vspace;
|
||||
}
|
||||
|
||||
/* OS analysis (performed on-demand). Returns the new or cached OS
|
||||
analysis results, nullptr if analysis failed. */
|
||||
OS *OSAnalysis(bool force = false) const;
|
||||
|
||||
// TODO: Platform information in a binary
|
||||
// TODO: Implement OS analysis
|
||||
// TODO: Add and manage objects
|
||||
|
||||
std::multimap<u32, std::unique_ptr<BinaryObject>> const &objects() const
|
||||
{
|
||||
return m_objects;
|
||||
}
|
||||
|
||||
auto functions() // -> [Function &]
|
||||
{
|
||||
return std::views::all(m_objects) | std::views::filter([](auto &pair) {
|
||||
return pair.second->isFunction();
|
||||
}) | std::views::transform([](auto &pair) -> Function & {
|
||||
return pair.second->getFunction();
|
||||
});
|
||||
}
|
||||
auto functions() const // -> [Function const &]
|
||||
{
|
||||
return std::views::all(m_objects) | std::views::filter([](auto &pair) {
|
||||
return pair.second->isFunction();
|
||||
}) | std::views::transform([](auto &pair) -> Function const & {
|
||||
return pair.second->getFunction();
|
||||
});
|
||||
}
|
||||
|
||||
/* Iterator on only variables; yields a series of Variable [const] &. */
|
||||
auto variables()
|
||||
{
|
||||
return std::views::all(m_objects) | std::views::filter([](auto &pair) {
|
||||
return pair.second->isVariable();
|
||||
}) | std::views::transform([](auto &pair) -> Variable & {
|
||||
return pair.second->getVariable();
|
||||
});
|
||||
}
|
||||
auto variables() const
|
||||
{
|
||||
return std::views::all(m_objects) | std::views::filter([](auto &pair) {
|
||||
return pair.second->isVariable();
|
||||
}) | std::views::transform([](auto &pair) -> Variable const & {
|
||||
return pair.second->getVariable();
|
||||
});
|
||||
}
|
||||
|
||||
/* Add an object to the binary. */
|
||||
void addObject(std::unique_ptr<BinaryObject> &&obj);
|
||||
|
||||
/* Return the address of an object by name, if it exists. If there are
|
||||
multiple objects with the same name, returns an arbitrary one. */
|
||||
std::optional<u32> objectAddress(std::string const &name) const;
|
||||
|
||||
/* Return the address of an object defined at the specified address if
|
||||
there is one, nullptr otherwise. If multiple objects are defined at the
|
||||
specified address, an arbitrary one is returned. */
|
||||
BinaryObject *objectAt(u32 address);
|
||||
BinaryObject const *objectAt(u32 address) const;
|
||||
|
||||
/* Returns the list of all objects defined at the specified address. */
|
||||
std::vector<BinaryObject *> objectsAt(u32 address);
|
||||
std::vector<BinaryObject const *> objectsAt(u32 address) const;
|
||||
|
||||
/* Locate all objects that intersect an address. */
|
||||
std::vector<BinaryObject *> objectsCovering(u32 address);
|
||||
std::vector<BinaryObject const *> objectsCovering(u32 address) const;
|
||||
|
||||
/* Return one or all functions defined at a given address. */
|
||||
Function *functionAt(u32 address);
|
||||
Function const *functionAt(u32 address) const;
|
||||
std::vector<Function *> functionsAt(u32 address);
|
||||
std::vector<Function const *> functionsAt(u32 address) const;
|
||||
|
||||
private:
|
||||
VirtualSpace m_vspace;
|
||||
|
||||
/* OS analysis results */
|
||||
mutable std::unique_ptr<OS> m_os;
|
||||
|
||||
/* All binary objects */
|
||||
std::multimap<u32, std::unique_ptr<BinaryObject>> m_objects;
|
||||
};
|
||||
|
||||
/* Basic, unattributed binary object used to mark out regions of code (eg.
|
||||
"kernel", "timer driver", "LINK app", "zero", "interrupt handlers"...). May
|
||||
alias with other binary objects. */
|
||||
|
|
|
@ -49,14 +49,22 @@ struct Function: public BinaryObject
|
|||
return m_blocks[index];
|
||||
}
|
||||
|
||||
/* Get basic block by its address. */
|
||||
BasicBlock &basicBlockByAddress(u32 pc);
|
||||
BasicBlock const &basicBlockByAddress(u32 pc) const;
|
||||
|
||||
/* Get the entry block. */
|
||||
int entryBlockIndex() const
|
||||
{
|
||||
return m_entryBlockIndex;
|
||||
}
|
||||
BasicBlock &entryBlock()
|
||||
{
|
||||
return basicBlockByIndex(0);
|
||||
return m_blocks[m_entryBlockIndex];
|
||||
}
|
||||
BasicBlock const &entryBlock() const
|
||||
{
|
||||
return basicBlockByIndex(0);
|
||||
return m_blocks[m_entryBlockIndex];
|
||||
}
|
||||
|
||||
/* Iterators over basic blocks. */
|
||||
|
@ -87,7 +95,8 @@ struct Function: public BinaryObject
|
|||
|
||||
/* Construction functions to be used only by the analysis pass. */
|
||||
bool exploreFunctionAt(u32 address);
|
||||
BasicBlock &addBasicBlock(BasicBlock &&bb);
|
||||
BasicBlock &createBasicBlock(u32 address, bool isEntryBlock);
|
||||
void sortBasicBlocks();
|
||||
void updateFunctionSize();
|
||||
void setAnalysisVersion(int version);
|
||||
|
||||
|
@ -96,6 +105,8 @@ private:
|
|||
std::vector<BasicBlock> m_blocks;
|
||||
/* Analysis version */
|
||||
int m_analysisVersion = 0;
|
||||
/* ID of the entry block */
|
||||
int m_entryBlockIndex;
|
||||
};
|
||||
|
||||
/* Basic block within a function. */
|
||||
|
@ -134,19 +145,19 @@ struct BasicBlock
|
|||
/* Binary and function that own the basic block. */
|
||||
Binary &parentBinary()
|
||||
{
|
||||
return m_function.parentBinary();
|
||||
return m_function.get().parentBinary();
|
||||
}
|
||||
Binary const &parentBinary() const
|
||||
{
|
||||
return m_function.parentBinary();
|
||||
return m_function.get().parentBinary();
|
||||
}
|
||||
Function &parentFunction()
|
||||
{
|
||||
return m_function;
|
||||
return m_function.get();
|
||||
}
|
||||
Function const &parentFunction() const
|
||||
{
|
||||
return m_function;
|
||||
return m_function.get();
|
||||
}
|
||||
|
||||
/* Block's index within function. */
|
||||
|
@ -279,28 +290,52 @@ struct BasicBlock
|
|||
|
||||
/* CFG navigation. */
|
||||
|
||||
std::vector<BasicBlock *> const &successors()
|
||||
auto successors() // -> [BasicBlock &]
|
||||
{
|
||||
return std::views::all(m_successors)
|
||||
| std::views::transform([this](int index) {
|
||||
return parentFunction().basicBlockByIndex(index);
|
||||
});
|
||||
}
|
||||
auto successors() const // -> [BasicBlock const &]
|
||||
{
|
||||
return std::views::all(m_successors)
|
||||
| std::views::transform([this](int index) {
|
||||
return parentFunction().basicBlockByIndex(index);
|
||||
});
|
||||
}
|
||||
std::vector<int> const &successorsByIndex() const
|
||||
{
|
||||
return m_successors;
|
||||
}
|
||||
std::vector<BasicBlock const *> successors() const
|
||||
auto successorsByAddress() const // -> [u32]
|
||||
{
|
||||
std::vector<BasicBlock const *> succ(m_successors.size());
|
||||
for(auto *bb: m_successors)
|
||||
succ.push_back(bb);
|
||||
return succ;
|
||||
return successors()
|
||||
| std::views::transform([](auto bb) { return bb.address(); });
|
||||
}
|
||||
|
||||
std::vector<BasicBlock *> const &predecessors()
|
||||
auto predecessors() // -> [BasicBlock &]
|
||||
{
|
||||
return std::views::all(m_predecessors)
|
||||
| std::views::transform([this](int index) {
|
||||
return parentFunction().basicBlockByIndex(index);
|
||||
});
|
||||
}
|
||||
auto predecessors() const // -> [BasicBlock const &]
|
||||
{
|
||||
return std::views::all(m_predecessors)
|
||||
| std::views::transform([this](int index) {
|
||||
return parentFunction().basicBlockByIndex(index);
|
||||
});
|
||||
}
|
||||
std::vector<int> const &predecessorsByIndex() const
|
||||
{
|
||||
return m_predecessors;
|
||||
}
|
||||
std::vector<BasicBlock const *> predecessors() const
|
||||
auto predecessorsByAddress() const // -> [u32]
|
||||
{
|
||||
std::vector<BasicBlock const *> pred(m_predecessors.size());
|
||||
for(auto *bb: m_predecessors)
|
||||
pred.push_back(bb);
|
||||
return pred;
|
||||
return predecessors()
|
||||
| std::views::transform([](auto bb) { return bb.address(); });
|
||||
}
|
||||
|
||||
uint successorCount() const
|
||||
|
@ -315,14 +350,15 @@ struct BasicBlock
|
|||
/* Construction functions to be used only by the cfg pass. */
|
||||
void addInstruction(Instruction &&insn);
|
||||
void finalizeBlock();
|
||||
// TODO: Set successors and predecessors
|
||||
void addSuccessor(BasicBlock *succ);
|
||||
void addPredecessor(BasicBlock *pred);
|
||||
|
||||
private:
|
||||
Function &m_function;
|
||||
std::reference_wrapper<Function> m_function;
|
||||
std::vector<Instruction> m_instructions;
|
||||
/* TODO: More compact storage for CFG edges, especially successors (≤ 2) */
|
||||
std::vector<BasicBlock *> m_successors;
|
||||
std::vector<BasicBlock *> m_predecessors;
|
||||
std::vector<int> m_successors;
|
||||
std::vector<int> m_predecessors;
|
||||
u32 m_address;
|
||||
u32 m_flags;
|
||||
};
|
||||
|
|
|
@ -45,6 +45,6 @@ void logmsg(int level, char const *file, int line, char const *func,
|
|||
|
||||
#define FxOS_log(level, fmt, ...) \
|
||||
FxOS::logmsg(FxOS::LOG_LEVEL_##level, __FILE__, __LINE__, __func__, \
|
||||
format(fmt, ##__VA_ARGS__))
|
||||
::format(fmt, ##__VA_ARGS__))
|
||||
|
||||
#endif /* FXOS_UTIL_LOG_H */
|
||||
|
|
|
@ -28,6 +28,43 @@ auto constexpr Bottom = RelConst::Bottom;
|
|||
return bottom(); \
|
||||
}
|
||||
|
||||
//---
|
||||
// Lattice basics
|
||||
//---
|
||||
|
||||
bool RelConstDomain::le(RelConst r1, RelConst r2) const noexcept
|
||||
{
|
||||
if(r1.spe == Bottom || r2.spe == Top)
|
||||
return true;
|
||||
if(r1.spe == Top || r2.spe == Bottom)
|
||||
return false;
|
||||
|
||||
/* Since this domain is non-relational, differing bases don't compare */
|
||||
if(r1.base != r2.base)
|
||||
return false;
|
||||
|
||||
/* And since offsets are also constants, they must match */
|
||||
if(r1.uval != r2.uval)
|
||||
return false;
|
||||
|
||||
/* Yes, this domain is pretty flat. It's just weirdly shaped constants. */
|
||||
return true;
|
||||
}
|
||||
|
||||
RelConst RelConstDomain::join(RelConst r1, RelConst r2) const noexcept
|
||||
{
|
||||
if(r1.spe == Bottom || r2.spe == Top)
|
||||
return r2;
|
||||
if(r1.spe == Top || r2.spe == Bottom)
|
||||
return r1;
|
||||
|
||||
if(r1.base != r2.base || r1.uval != r2.uval)
|
||||
return top();
|
||||
|
||||
/* r1 = r2 */
|
||||
return r1;
|
||||
}
|
||||
|
||||
RelConst RelConstDomain::bottom() const noexcept
|
||||
{
|
||||
RelConst b {};
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// 1100101 |_ mov #0, r4 __ //
|
||||
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
||||
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
||||
// |_ base# + offset |_| /_\_\___/__/ //
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
#include <fxos/analysis.h>
|
||||
#include <cassert>
|
||||
|
||||
namespace FxOS {
|
||||
|
||||
void ProgramState::setFunctionInit()
|
||||
{
|
||||
// TODO: Analysis: Set symbolic parameters at function entry
|
||||
for(int i = 0; i < 16; i++)
|
||||
m_regs[i] = RelConstDomain().top();
|
||||
}
|
||||
|
||||
void ProgramState::setBottom()
|
||||
{
|
||||
for(int i = 0; i < 16; i++)
|
||||
m_regs[i] = RelConstDomain().bottom();
|
||||
}
|
||||
|
||||
void ProgramState::applyDiff(ProgramStateDiff const &diff)
|
||||
{
|
||||
RelConstDomain RCD;
|
||||
int t = diff.target();
|
||||
|
||||
if(t == static_cast<int>(ProgramStateDiff::Target::None)) {
|
||||
/* Nothing */
|
||||
}
|
||||
else if(t == static_cast<int>(ProgramStateDiff::Target::Unknown)) {
|
||||
for(int i = 0; i < 16; i++)
|
||||
m_regs[i] = RCD.top();
|
||||
}
|
||||
else {
|
||||
assert((unsigned)t < 16 && "invalid register target");
|
||||
m_regs[t] = diff.value();
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramState::joinWith(ProgramState const &other)
|
||||
{
|
||||
RelConstDomain RCD;
|
||||
|
||||
for(int i = 0; i < 16; i++) {
|
||||
m_regs[i] = RCD.join(m_regs[i], other.getRegister(i));
|
||||
}
|
||||
}
|
||||
|
||||
bool ProgramState::le(ProgramState const &other) const
|
||||
{
|
||||
RelConstDomain RCD;
|
||||
|
||||
for(int i = 0; i < 16; i++) {
|
||||
if(!RCD.le(m_regs[i], other.getRegister(i)))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Information stored for each block during the fixpoint iteration */
|
||||
struct BlockStates
|
||||
{
|
||||
ProgramState entry;
|
||||
std::vector<ProgramStateDiff> diffs;
|
||||
ProgramState exit;
|
||||
ProgramState nextEntry;
|
||||
};
|
||||
|
||||
static ProgramStateDiff interpretInstruction(
|
||||
Instruction const &ins, ProgramState const &PS)
|
||||
{
|
||||
}
|
||||
|
||||
static void interpretBlock(BasicBlock const &bb, BlockStates &states)
|
||||
{
|
||||
ProgramState PS(states.entry);
|
||||
states.diffs.clear();
|
||||
|
||||
for(Instruction const &i: bb) {
|
||||
ProgramStateDiff diff = interpretInstruction(i, PS);
|
||||
states.diffs.push_back(diff);
|
||||
PS.applyDiff(diff);
|
||||
}
|
||||
|
||||
states.exit = PS;
|
||||
}
|
||||
|
||||
std::unique_ptr<StaticFunctionAnalysis> analyzeFunction(Function const &f)
|
||||
{
|
||||
std::vector<BlockStates> VBS;
|
||||
|
||||
/* Initialize all blocks' entry states */
|
||||
for(uint i = 0; i < f.blockCount(); i++) {
|
||||
BlockStates BS;
|
||||
if(i == 0)
|
||||
BS.entry.setFunctionInit();
|
||||
else
|
||||
BS.entry.setBottom();
|
||||
VBS.push_back(BS);
|
||||
}
|
||||
|
||||
/* The naive iteration strategy */
|
||||
while(true) {
|
||||
/* Interpret all blocks on their current states */
|
||||
for(uint i = 0; i < f.blockCount(); i++)
|
||||
interpretBlock(f.basicBlockByIndex(i), VBS[i]);
|
||||
|
||||
/* Compute the next entry state for each block */
|
||||
for(uint i = 0; i < f.blockCount(); i++) {
|
||||
BasicBlock const &bb = f.basicBlockByIndex(i);
|
||||
VBS[i].nextEntry.setBottom();
|
||||
|
||||
for(auto succ: bb.successors())
|
||||
VBS[i].nextEntry.joinWith(VBS[succ.blockIndex()].exit);
|
||||
}
|
||||
|
||||
/* Determine whether a fixpoint has been reached yet */
|
||||
bool pfp = std::all_of(VBS.begin(), VBS.end(),
|
||||
[](BlockStates &BS) { return BS.nextEntry.le(BS.entry); });
|
||||
|
||||
if(pfp)
|
||||
break;
|
||||
|
||||
/* Switch to next state */
|
||||
for(uint i = 0; i < f.blockCount(); i++)
|
||||
VBS[i].entry = VBS[i].nextEntry;
|
||||
}
|
||||
|
||||
auto an = std::make_unique<StaticFunctionAnalysis>();
|
||||
for(uint i = 0; i < f.blockCount(); i++) {
|
||||
StaticFunctionAnalysis::Block B;
|
||||
B.entry = VBS[i].entry;
|
||||
B.diffs = std::move(VBS[i].diffs);
|
||||
an->blocks.push_back(std::move(B));
|
||||
}
|
||||
|
||||
return an;
|
||||
}
|
||||
|
||||
// TODO_need_bb_successors;
|
||||
|
||||
} /* namespace FxOS */
|
104
lib/function.cpp
104
lib/function.cpp
|
@ -18,17 +18,58 @@ Function::Function(Binary &binary, u32 address):
|
|||
{
|
||||
/* Size is not determined at first. */
|
||||
|
||||
/* Entry block index is not determined at first. */
|
||||
m_entryBlockIndex = -1;
|
||||
|
||||
/* Default unambiguous name */
|
||||
setName(format("fun.%08x", address));
|
||||
}
|
||||
|
||||
/* Add a basic block to the function. The entry block must be added first. */
|
||||
BasicBlock &Function::addBasicBlock(BasicBlock &&bb)
|
||||
BasicBlock &Function::basicBlockByAddress(u32 pc)
|
||||
{
|
||||
m_blocks.push_back(bb);
|
||||
for(BasicBlock &bb: *this) {
|
||||
if(bb.address() == pc)
|
||||
return bb;
|
||||
}
|
||||
assert(false && "not the address of a basic block in this function");
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
BasicBlock const &Function::basicBlockByAddress(u32 pc) const
|
||||
{
|
||||
for(BasicBlock const &bb: *this) {
|
||||
if(bb.address() == pc)
|
||||
return bb;
|
||||
}
|
||||
assert(false && "not the address of a basic block in this function");
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
/* Create a new basic block and add it to the function. Invalidates previous
|
||||
pointers to other blocks! */
|
||||
BasicBlock &Function::createBasicBlock(u32 address, bool isEntryBlock)
|
||||
{
|
||||
assert(isEntryBlock == (address == this->address())
|
||||
&& "inconsistent entry block specification in function");
|
||||
m_blocks.emplace_back(*this, address, isEntryBlock);
|
||||
return m_blocks.back();
|
||||
}
|
||||
|
||||
/* Sorts blocks by address. Invalidates pointers to blocks. */
|
||||
void Function::sortBasicBlocks()
|
||||
{
|
||||
std::sort(m_blocks.begin(), m_blocks.end(),
|
||||
[](auto &bb1, auto &bb2) { return bb1.address() < bb2.address(); });
|
||||
|
||||
/* Update entry block index */
|
||||
for(uint i = 0; i < m_blocks.size(); i++) {
|
||||
if(m_blocks[i].isEntryBlock()) {
|
||||
m_entryBlockIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the function's BinaryObject size by finding the last address covered
|
||||
by any instruction in the function. */
|
||||
void Function::updateFunctionSize()
|
||||
|
@ -195,16 +236,18 @@ bool Function::exploreFunctionAt(u32 functionAddress)
|
|||
blocks.push_back(std::move(sb));
|
||||
}
|
||||
|
||||
/* Successors by addresses, before we get the pointers */
|
||||
std::map<u32, std::vector<u32>> successorAddresses;
|
||||
|
||||
/* Cut superblocks. The loop on b.leaders schedules the construction of new
|
||||
BasicBlock objects but the iteration is really the multi-part do loop
|
||||
using the iterator on b.addresses. */
|
||||
for(auto &b: blocks) {
|
||||
auto it = b.addresses.begin();
|
||||
|
||||
for(u32 _: b.leaders) {
|
||||
(void)_;
|
||||
BasicBlock bb0(*this, *it, *it == functionAddress);
|
||||
BasicBlock &bb = addBasicBlock(std::move(bb0));
|
||||
for(u32 pc: b.leaders) {
|
||||
assert(pc == *it);
|
||||
BasicBlock &bb = createBasicBlock(*it, *it == functionAddress);
|
||||
|
||||
do {
|
||||
// TODO: Support 32-bit instructions
|
||||
|
@ -216,11 +259,35 @@ bool Function::exploreFunctionAt(u32 functionAddress)
|
|||
while(it != b.addresses.end() && !b.leaders.count(*it));
|
||||
|
||||
bb.finalizeBlock();
|
||||
successorAddresses[pc];
|
||||
|
||||
/* Find successors: either superblock's successors at end of
|
||||
superblock, or next block in the same superblock */
|
||||
if(it == b.addresses.end()) {
|
||||
if(b.staticTarget + 1)
|
||||
successorAddresses[pc].push_back(b.staticTarget);
|
||||
if(b.fallthroughTarget + 1)
|
||||
successorAddresses[pc].push_back(b.fallthroughTarget);
|
||||
}
|
||||
else {
|
||||
successorAddresses[pc].push_back(*it);
|
||||
}
|
||||
|
||||
// TODO: Set successors
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Set successors and predecessors
|
||||
sortBasicBlocks();
|
||||
|
||||
for(auto &[pc, succ]: successorAddresses) {
|
||||
BasicBlock &bb = basicBlockByAddress(pc);
|
||||
for(u32 a: succ)
|
||||
bb.addSuccessor(&basicBlockByAddress(a));
|
||||
}
|
||||
|
||||
// TODO: Set predecessors
|
||||
|
||||
updateFunctionSize();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -235,12 +302,13 @@ BasicBlock::BasicBlock(Function &function, u32 address, bool isEntryBlock):
|
|||
|
||||
uint BasicBlock::blockIndex() const
|
||||
{
|
||||
for(uint i = 0; i < m_function.blockCount(); i++) {
|
||||
BasicBlock &bb = m_function.basicBlockByIndex(i);
|
||||
for(uint i = 0; i < parentFunction().blockCount(); i++) {
|
||||
BasicBlock const &bb = parentFunction().basicBlockByIndex(i);
|
||||
if(&bb == this)
|
||||
return i;
|
||||
}
|
||||
assert(false && "blockIndex: block not in its own parent");
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
bool BasicBlock::mayFallthrough() const
|
||||
|
@ -290,6 +358,8 @@ void BasicBlock::finalizeBlock()
|
|||
for(Instruction &insn: *this) {
|
||||
bool isReturn = insn.opcode().isBlockTerminator();
|
||||
assert(!(term && isReturn) && "bb with multiple terminators");
|
||||
if(isReturn)
|
||||
term = &insn;
|
||||
}
|
||||
|
||||
/* The block must have a delay slot iff the terminator has one. */
|
||||
|
@ -307,7 +377,7 @@ void BasicBlock::finalizeBlock()
|
|||
m_flags |= Flags::HasDelaySlot;
|
||||
if(!term)
|
||||
m_flags |= Flags::NoTerminator;
|
||||
if(term && term->opcode().isReturn())
|
||||
if(term && (term->opcode().isReturn() || term->opcode().isDynamicJump()))
|
||||
m_flags |= Flags::IsTerminator;
|
||||
|
||||
if(hasDelaySlot) {
|
||||
|
@ -316,6 +386,18 @@ void BasicBlock::finalizeBlock()
|
|||
}
|
||||
}
|
||||
|
||||
void BasicBlock::addSuccessor(BasicBlock *succ)
|
||||
{
|
||||
assert(&succ->parentFunction() == &parentFunction());
|
||||
m_successors.push_back(succ->blockIndex());
|
||||
}
|
||||
|
||||
void BasicBlock::addPredecessor(BasicBlock *pred)
|
||||
{
|
||||
assert(&pred->parentFunction() == &parentFunction());
|
||||
m_predecessors.push_back(pred->blockIndex());
|
||||
}
|
||||
|
||||
//=== Instruction ===//
|
||||
|
||||
Instruction::Instruction(Function &function, u32 address, u32 opcode):
|
||||
|
|
|
@ -139,6 +139,7 @@ BSON BSON::clone() const
|
|||
}
|
||||
|
||||
assert(false && "BSON::clone: unsupported type");
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
void BSON::dump(FILE *fp, int depth, bool noindent) const
|
||||
|
|
|
@ -293,7 +293,9 @@ static std::string objectsAt(
|
|||
objects.push_back(fmt::format("+{}", unnamed));
|
||||
|
||||
return std::accumulate(objects.begin(), objects.end(), std::string {},
|
||||
[](auto &l, auto const r) { return l + (l.empty() ? "" : " ") + r; });
|
||||
[](auto l, auto const r) {
|
||||
return std::move(l) + (l.empty() ? "" : " ") + r;
|
||||
});
|
||||
}
|
||||
|
||||
void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts)
|
||||
|
@ -308,6 +310,26 @@ void viewAssemblyBasicBlock(BasicBlock const &bb, ViewAssemblyOptions *opts)
|
|||
}
|
||||
printf(":\n");
|
||||
|
||||
printf(" Successors:");
|
||||
for(u32 succ: bb.successorsByAddress())
|
||||
printf(" bb.%08x", succ);
|
||||
if(bb.successorCount() == 0)
|
||||
printf(" (none)");
|
||||
printf("\n");
|
||||
|
||||
printf(" Flags:");
|
||||
if(bb.isEntryBlock())
|
||||
printf(" IsEntryBlock");
|
||||
if(bb.isTerminator())
|
||||
printf(" IsTerminator");
|
||||
if(bb.hasDelaySlot())
|
||||
printf(" HasDelaySlot");
|
||||
if(bb.hasNoTerminator())
|
||||
printf(" NoTerminator");
|
||||
if(!(bb.getFlags() & BasicBlock::ValidFlags))
|
||||
printf(" (none)");
|
||||
printf("\n");
|
||||
|
||||
for(Instruction const &ins: bb)
|
||||
viewAssemblyInstruction(ins, opts);
|
||||
|
||||
|
|
79
shell/a.cpp
79
shell/a.cpp
|
@ -43,6 +43,77 @@ static _af_args parse_af(Session &session, Parser &parser)
|
|||
return args;
|
||||
}
|
||||
|
||||
template<typename R, typename T>
|
||||
concept range_of
|
||||
= std::ranges::range<R> && std::same_as<std::ranges::range_value_t<R>, T>;
|
||||
|
||||
// TODO: Move this visualization function (also put spacing as a feature)
|
||||
template<typename R>
|
||||
requires(range_of<R, std::string>)
|
||||
void viewStrings(R range, int maxColumns = 80)
|
||||
{
|
||||
int columns = 0;
|
||||
|
||||
for(std::string const &str: range) {
|
||||
int length = str.size();
|
||||
if(columns != 0 && columns + length > maxColumns) {
|
||||
fmt::print("\n");
|
||||
columns = 0;
|
||||
}
|
||||
|
||||
fmt::print("{}", str);
|
||||
columns += length;
|
||||
}
|
||||
|
||||
if(columns > 0)
|
||||
fmt::print("\n");
|
||||
}
|
||||
template<typename R, typename F>
|
||||
void viewStrings(R range, F fun, int maxColumns = 80)
|
||||
{
|
||||
return viewStrings(
|
||||
std::views::all(range) | std::views::transform(fun), maxColumns);
|
||||
}
|
||||
|
||||
static void _af_consistency(Binary const &binary)
|
||||
{
|
||||
/* List of functions with blocks before the function's entry point */
|
||||
std::vector<Function const *> earlyBlocks;
|
||||
/* List of functions with no returning block */
|
||||
std::vector<Function const *> noReturn;
|
||||
|
||||
for(Function const &f: binary.functions()) {
|
||||
if(std::any_of(f.begin(), f.end(),
|
||||
[&f](auto &bb) { return bb.address() < f.address(); }))
|
||||
earlyBlocks.push_back(&f);
|
||||
|
||||
if(!std::any_of(f.begin(), f.end(),
|
||||
[&f](auto &bb) { return bb.isTerminator(); }))
|
||||
noReturn.push_back(&f);
|
||||
}
|
||||
|
||||
if(earlyBlocks.size() > 0) {
|
||||
fmt::print("{} functions have blocks before their entry point:\n",
|
||||
earlyBlocks.size());
|
||||
viewStrings(earlyBlocks, [](Function const *f) {
|
||||
if(f->name().size() > 0)
|
||||
return fmt::format(" {}", f->name());
|
||||
else
|
||||
return fmt::format(" fun.{:08x}", f->address());
|
||||
});
|
||||
}
|
||||
|
||||
if(noReturn.size() > 0) {
|
||||
fmt::print("{} functions do not return:\n", noReturn.size());
|
||||
viewStrings(noReturn, [](Function const *f) {
|
||||
if(f->name().size() > 0)
|
||||
return fmt::format(" {}", f->name());
|
||||
else
|
||||
return fmt::format(" fun.{:08x}", f->address());
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static void af_analyze(Binary &binary, _af_args const &args)
|
||||
{
|
||||
int successes = 0, skipped = 0, errors = 0;
|
||||
|
@ -82,7 +153,7 @@ static void af_analyze(Binary &binary, _af_args const &args)
|
|||
printf("\nAnalyzed %d functions (+%d skipped, +%d errors) in %s\n",
|
||||
successes, skipped, errors, timer.format_time().c_str());
|
||||
|
||||
/* TODO: Check for overlapping functions etc */
|
||||
_af_consistency(binary);
|
||||
}
|
||||
|
||||
void _af(Session &session, _af_args const &args)
|
||||
|
@ -236,6 +307,7 @@ static ShellCommand _af_cmd(
|
|||
},
|
||||
[](Session &s, Parser &p) { parse_af(s, p); }, "Analysis: Functions", R"(
|
||||
af [-u|--force] [-n <name>] [<addresses>...]
|
||||
af -c
|
||||
|
||||
Explore and disassemble functions starting at the specified addresses. For each
|
||||
explored function, a binary object of Function type is created, and the
|
||||
|
@ -249,6 +321,11 @@ information.
|
|||
|
||||
When a single address is given, -n can specify the name of the function object
|
||||
to be created.
|
||||
|
||||
With -c (consistency), does not create a new function, but checks the
|
||||
consistency of the set of functions defined in the current binary, and reports
|
||||
any suspicious occurrences, such as functions with blocks before the entry
|
||||
address, overlapping functions, functions with no rts, etc.
|
||||
)");
|
||||
|
||||
static ShellCommand _afs_cmd(
|
||||
|
|
|
@ -53,8 +53,7 @@ void _e(Session &, std::string, std::vector<long> const &values)
|
|||
|
||||
/* Hexa format */
|
||||
int length = (print_val <= (1ll << 32) ? 8 : 16) + 2 + (value < 0);
|
||||
std::string format = fmt::format("{{:#0{}x}}", length);
|
||||
fmt::print(format, value);
|
||||
fmt::print("{:#0{}x}", value, length);
|
||||
|
||||
if(print_val <= 100 || print_val % 100 <= 1 || print_val % 100 >= 99)
|
||||
fmt::print(" = {}", print_val);
|
||||
|
|
|
@ -19,7 +19,7 @@ public:
|
|||
/* Build directly from format arguments */
|
||||
template <typename... Args>
|
||||
CommandError(std::string const &format_str, Args&&... args):
|
||||
m_what(fmt::format(format_str, args...)) {}
|
||||
m_what(fmt::format(fmt::runtime(format_str), args...)) {}
|
||||
|
||||
char const *what() const noexcept override {
|
||||
return m_what.c_str();
|
||||
|
|
|
@ -405,7 +405,7 @@ static struct _isc_args parse_isc(Session &session, Parser &parser)
|
|||
binary = session.project().getBinary(args.binary_name);
|
||||
if(!binary) {
|
||||
std::string msg
|
||||
= format("No binary “%s” in project!", args.binary_name);
|
||||
= ::format("No binary “%s” in project!", args.binary_name);
|
||||
if(parser.completing())
|
||||
throw Parser::CompletionRequest("_error", msg);
|
||||
else
|
||||
|
|
Loading…
Reference in New Issue