From 5d63fb11e3b599249a0f05e1b22d1686babe3fd3 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Fri, 20 Dec 2019 11:17:09 +0100 Subject: [PATCH] implement the abstract domain of relative constants --- include/fxos/disassembly.h | 4 +- include/fxos/domains.h | 133 +++++++++++++ include/fxos/lang.h | 7 +- include/fxos/{operands.h => semantics.h} | 74 ++++---- lib/domains/relconst.cpp | 230 +++++++++++++++++++++++ 5 files changed, 403 insertions(+), 45 deletions(-) create mode 100644 include/fxos/domains.h rename include/fxos/{operands.h => semantics.h} (64%) create mode 100644 lib/domains/relconst.cpp diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index dab18be..84171c8 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -6,7 +6,7 @@ #define LIBFXOS_DISASSEMBLY_H #include -#include +#include #include #include @@ -28,7 +28,7 @@ private: Instruction &m_inst; /* Operands for arguments, if they have been determined */ - std::vector> args; +// std::vector> args; /* Jump targets, used for jump instructions only. The first jmp is for unconditional jumps; jmpt and jmpf are for conditional jumps. In diff --git a/include/fxos/domains.h b/include/fxos/domains.h new file mode 100644 index 0000000..9740378 --- /dev/null +++ b/include/fxos/domains.h @@ -0,0 +1,133 @@ +//--- +// fxos.domains: Abstract interpretation domains +//--- + +#ifndef FXOS_DOMAINS_H +#define FXOS_DOMAINS_H + +#include + +namespace FxOS { + +/* An abstract domain over any user-defined lattice. */ +template +class AbstractDomain +{ + /* Bottom and Top constants */ + virtual T bottom() const noexcept = 0; + virtual T top() const noexcept = 0; + + /* Construct abstract value from integer constant */ + virtual T constant(uint32_t value) const noexcept = 0; + + /* Basic arithmetic. Division and modulo are both non-trivial + instruction sequences usually isolated in easily-identifiable + subroutines, so we don't care about them. */ + virtual T minus(T) const noexcept = 0; + virtual T add(T, T) const noexcept = 0; + virtual T sub(T, T) const noexcept = 0; + virtual T smul(T, T) const noexcept = 0; + virtual T umul(T, T) const noexcept = 0; + + /* Sign extensions */ + virtual T extub(T) const noexcept = 0; + virtual T extsb(T) const noexcept = 0; + virtual T extuw(T) const noexcept = 0; + virtual T extsw(T) const noexcept = 0; + + /* Logical operations */ + virtual T lnot(T) const noexcept = 0; + virtual T land(T, T) const noexcept = 0; + virtual T lor(T, T) const noexcept = 0; + virtual T lxor(T, T) const noexcept = 0; + + /* Comparisons. This operation proceeds in two steps: + * First call cmp(x, y) to check if the values are comparable. If + this returns false, the test result should be Top. + * If the values are comparable, call cmpu(x, y) or cmps(x, y), which + returns a negative number if x < y, 0 if x == y, and a positive + number if x > y. */ + virtual bool cmp(T, T) const noexcept = 0; + virtual int cmpu(T, T) const noexcept = 0; + virtual int cmps(T, T) const noexcept = 0; +}; + +//--- +// Domain of relative constants +//--- + +/* The lattice of relative constants (base + offset) */ +struct RelConst +{ + enum { Bottom=1, Top=2 }; + + /* The following fields concurrently indicate the base. The order of + resolution is as follows (non-trivial types in parentheses): + * If [spe] is equal to Bottom or Top, this is the value. + * If [arg] is non-zero, the value of the arg-th argument is used. + * If [org] is non-zero, the original value of the associated + callee-saved register is used. (CpuRegister) + * If [reg] is non-zero, the base is that register. (CpuRegister) + + For efficiency, checking [base==0] will tell apart plain old + constants from values with bases (and specials but these are usually + handled first). */ + union { + struct { + uint8_t spe; + uint8_t arg; + uint8_t org; + uint8_t reg; + }; + + uint32_t base; + }; + + /* The constant value, or offset. The signedness of this value depends + on the context where it is used: + + * For special values, the members are unused. + * For [arg] and [org] and [reg] bases with additive offset + semantics, the signedness has no effect. Operations with + non-trivial effect on signs such as multiplication are not + supported with bases. + * For zero bases, the interpretation is instruction-dependent. */ + union { + int32_t ival; + uint32_t uval; + }; +}; + +class RelConstDomain: public AbstractDomain +{ + /* Implementation of the AbstractDomain specification */ + + RelConst bottom() const noexcept override; + RelConst top() const noexcept override; + + RelConst constant(uint32_t value) const noexcept override; + + RelConst minus(RelConst) const noexcept override; + RelConst add(RelConst, RelConst) const noexcept override; + RelConst sub(RelConst, RelConst) const noexcept override; + RelConst smul(RelConst, RelConst) const noexcept override; + RelConst umul(RelConst, RelConst) const noexcept override; + + RelConst extub(RelConst) const noexcept override; + RelConst extsb(RelConst) const noexcept override; + RelConst extuw(RelConst) const noexcept override; + RelConst extsw(RelConst) const noexcept override; + + RelConst lnot(RelConst) const noexcept override; + RelConst land(RelConst, RelConst) const noexcept override; + RelConst lor(RelConst, RelConst) const noexcept override; + RelConst lxor(RelConst, RelConst) const noexcept override; + + bool cmp(RelConst, RelConst) const noexcept override; + int cmpu(RelConst, RelConst) const noexcept override; + int cmps(RelConst, RelConst) const noexcept override; +}; + +} /* namespace FxOS */ + +#endif /* FXOS_DOMAINS_H */ diff --git a/include/fxos/lang.h b/include/fxos/lang.h index e6583d6..61b4181 100644 --- a/include/fxos/lang.h +++ b/include/fxos/lang.h @@ -16,6 +16,8 @@ class CpuRegister { public: enum CpuRegisterName { + /* Value 0 is reserved for special purposes such as "no reg" */ + UNDEFINED = 0, /* Caller-saved general-purpose registers */ R0, R1, R2, R3, R4, R5, R6, R7, /* Banked general-purpose registers. fxos does not account for @@ -78,19 +80,14 @@ struct Argument /* Addressing mode */ Kind kind; - /* Base register. Valid for all modes except Imm */ CpuRegister base; - /* Index register. Valid for ArrayDeref */ CpuRegister index; - /* Displacement in bytes. Valid for StructDeref, PcRel and PcJump */ int disp; - /* Operation size. Generally a multiplier for disp */ int opsize; - /* Immediate value. Valid for Imm */ int imm; }; diff --git a/include/fxos/operands.h b/include/fxos/semantics.h similarity index 64% rename from include/fxos/operands.h rename to include/fxos/semantics.h index 291a84d..fe58ef7 100644 --- a/include/fxos/operands.h +++ b/include/fxos/semantics.h @@ -1,5 +1,9 @@ -#ifndef LIBFXOS_OPERANDS_H -#define LIBFXOS_OPERANDS_H +//--- +// fxos.semantics: Analyzed data types and locations (OS semantics) +//--- + +#ifndef LIBFXOS_SEMANTICS_H +#define LIBFXOS_SEMANTICS_H #include @@ -58,46 +62,40 @@ private: }; }; -enum class OperandKind { - /* CPU-held registers accessed with instructions */ - CpuRegister, - /* Standard randomly-addressable memory */ - Memory, - /* Memory-mapped module registers with specific access */ - MappedModule, -}; +//--- +// Location representation +// +// The abstract interpreter keeps track of data stored at the following +// locations (attribute types in parentheses): +// Registers .reg (CpuRegister) +// Memory .addr (uint32_t) +// MappedModule .addr (uint32_t) +//--- -class Operand +struct Location { -public: - /* Returns the operand kind (which is also the subclass identity) */ - virtual OperandKind type() const noexcept = 0; + enum LocationType { + /* CPU-held registers accessed with instructions */ + Register, + /* Standard randomly-addressable memory */ + Memory, + /* Memory-mapped module registers with specific access */ + MappedModule, + }; + + LocationType location; + + union { + /* For registers: register identifier */ + CpuRegister reg; + /* For memory and mapped modules: addresses */ + uint32_t addr; + }; + /* String representation */ - virtual std::string str() const noexcept = 0; -}; - -class RegisterOperand: public Operand -{ -public: - RegisterOperand(CpuRegister name): m_name(name) {} - - OperandKind type() const noexcept override { - return OperandKind::CpuRegister; - } - - CpuRegister name() const noexcept { - return m_name; - } - - std::string str() const noexcept override { - return m_name.str(); - } - -private: - /* Register name for assembler listings */ - CpuRegister m_name; + std::string str() const noexcept; }; } /* namespace FxOS */ -#endif /* LIBFXOS_OPERANDS_H */ +#endif /* LIBFXOS_SEMANTICS_H */ diff --git a/lib/domains/relconst.cpp b/lib/domains/relconst.cpp new file mode 100644 index 0000000..0ca3d92 --- /dev/null +++ b/lib/domains/relconst.cpp @@ -0,0 +1,230 @@ +#include +#include + +namespace FxOS { + +//--- +// Quick helpers +//--- + +auto const Top = RelConst::Top; +auto const Bottom = RelConst::Bottom; + +/* General prelude that propagates Top, then Bottom */ +#define special(r1, r2) { \ + if((r1).spe == Top || (r2).spe == Top) return top(); \ + if((r1).spe || (r2).spe) return bottom(); \ +} + +RelConst RelConstDomain::bottom() const noexcept +{ + RelConst b {}; + b.spe = Bottom; + return b; +} + +RelConst RelConstDomain::top() const noexcept +{ + RelConst b {}; + b.spe = Top; + return b; +} + +RelConst RelConstDomain::constant(uint32_t value) const noexcept +{ + RelConst b {}; + b.uval = value; + return b; +} + +//--- +// Basic arithmetic +//--- + +RelConst RelConstDomain::minus(RelConst r) const noexcept +{ + /* Propagate Bottom and Top */ + if(r.spe) return r; + /* This domain does not support multiplicative coefficients for the + base. If the base is non-zero, return Top. */ + if(r.arg || r.org || r.reg) return top(); + + r.ival = -r.ival; + return r; +} + +RelConst RelConstDomain::add(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + + /* This domain does not support cumulative bases. The sum can only be + represented if at least one of the values has no base */ + if(r1.base && r2.base) return top(); + + RelConst r; + r.base = r1.base | r2.base; + r.uval = r1.uval + r2.uval; + return r; +} + +RelConst RelConstDomain::sub(RelConst r1, RelConst r2) const noexcept +{ + /* This domain does not support difference between bases. The + difference can only be represented in a few restricted cases. */ + special(r1, r2); + + /* If r2 has no base, keep r1's base. */ + if(!r2.base) + { + r1.uval -= r2.uval; + return r1; + } + + /* If r2 has exactly the same base as r1, cancel it. */ + if(r1.base == r2.base) + { + r1.base = 0; + r1.uval -= r2.uval; + return r1; + } + + /* Otherwise, the result cannot be represented. */ + return top(); +} + +RelConst RelConstDomain::smul(RelConst r1, RelConst r2) const noexcept +{ + /* No base can be multiplied except by 1. Typically there will be no + such constant because it would be optimized away. */ + special(r1, r2); + + /* Give up if there is any base */ + if(r1.base || r2.base) return top(); + + /* Multiply with sign */ + r1.ival *= r2.ival; + return r1; +} + +RelConst RelConstDomain::umul(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) return top(); + + r1.uval *= r2.uval; + return r1; +} + +//--- +// Sign extensions +//--- + +RelConst RelConstDomain::extub(RelConst r) const noexcept +{ + /* The representation does not support sign extensions on bases, so we + just return top whenever there is one. */ + if(r.spe) return r; + if(r.base) return top(); + + r.uval = (uint8_t)r.uval; + return r; +} + +RelConst RelConstDomain::extsb(RelConst r) const noexcept +{ + if(r.spe) return r; + if(r.base) return top(); + + r.ival = (int8_t)r.ival; + return r; +} + +RelConst RelConstDomain::extuw(RelConst r) const noexcept +{ + if(r.spe) return r; + if(r.base) return top(); + + r.uval = (uint16_t)r.uval; + return r; +} + +RelConst RelConstDomain::extsw(RelConst r) const noexcept +{ + if(r.spe) return r; + if(r.base) return top(); + + r.ival = (int16_t)r.ival; + return r; +} + +//--- +// Logical operations +//--- + +RelConst RelConstDomain::lnot(RelConst r) const noexcept +{ + /* Don't try to catch very special cases */ + if(r.spe) return r; + if(r.base) return top(); + + r.uval = ~r.uval; + return r; +} + +RelConst RelConstDomain::land(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) return top(); + + r1.uval &= r2.uval; + return r1; +} + +RelConst RelConstDomain::lor(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) return top(); + + r1.uval |= r2.uval; + return r1; +} + +RelConst RelConstDomain::lxor(RelConst r1, RelConst r2) const noexcept +{ + special(r1, r2); + if(r1.base || r2.base) return top(); + + r1.uval ^= r2.uval; + return r1; +} + +//--- +// Comparisons +//--- + +/* TODO: RelConst comparison improvements using typing + + Two values base+d1 and base+d2 (sharing the same base) can be proven to + compare as unsigned if the base has a known type and d1 and d2 are smaller + than the size of that type. This derives from the implicit assumption that a + full object cannot cross from P4 space to P0. */ + +bool RelConstDomain::cmp(RelConst r1, RelConst r2) const noexcept +{ + /* Not very good */ + return (r1.base == 0 && r2.base == 0); +} + +int RelConstDomain::cmpu(RelConst r1, RelConst r2) const noexcept +{ + /* We can't just substract because of overflows (information is lost + because we don't have the V bit) */ + return (r1.uval > r2.uval) - (r1.uval < r2.uval); +} + +int RelConstDomain::cmps(RelConst r1, RelConst r2) const noexcept +{ + return (r1.ival > r2.ival) - (r1.ival < r2.ival); +} + +} /* namespace FxOS */