implement the abstract domain of relative constants

This commit is contained in:
Lephenixnoir 2019-12-20 11:17:09 +01:00
parent 8812886e58
commit 5d63fb11e3
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
5 changed files with 403 additions and 45 deletions

View File

@ -6,7 +6,7 @@
#define LIBFXOS_DISASSEMBLY_H
#include <fxos/lang.h>
#include <fxos/operands.h>
#include <fxos/semantics.h>
#include <vector>
#include <optional>
@ -28,7 +28,7 @@ private:
Instruction &m_inst;
/* Operands for arguments, if they have been determined */
std::vector<std::optional<Operand>> args;
// std::vector<std::optional<Operand>> args;
/* Jump targets, used for jump instructions only. The first jmp is for
unconditional jumps; jmpt and jmpf are for conditional jumps. In

133
include/fxos/domains.h Normal file
View File

@ -0,0 +1,133 @@
//---
// fxos.domains: Abstract interpretation domains
//---
#ifndef FXOS_DOMAINS_H
#define FXOS_DOMAINS_H
#include <cstdint>
namespace FxOS {
/* An abstract domain over any user-defined lattice. */
template<typename T>
class AbstractDomain
{
/* Bottom and Top constants */
virtual T bottom() const noexcept = 0;
virtual T top() const noexcept = 0;
/* Construct abstract value from integer constant */
virtual T constant(uint32_t value) const noexcept = 0;
/* Basic arithmetic. Division and modulo are both non-trivial
instruction sequences usually isolated in easily-identifiable
subroutines, so we don't care about them. */
virtual T minus(T) const noexcept = 0;
virtual T add(T, T) const noexcept = 0;
virtual T sub(T, T) const noexcept = 0;
virtual T smul(T, T) const noexcept = 0;
virtual T umul(T, T) const noexcept = 0;
/* Sign extensions */
virtual T extub(T) const noexcept = 0;
virtual T extsb(T) const noexcept = 0;
virtual T extuw(T) const noexcept = 0;
virtual T extsw(T) const noexcept = 0;
/* Logical operations */
virtual T lnot(T) const noexcept = 0;
virtual T land(T, T) const noexcept = 0;
virtual T lor(T, T) const noexcept = 0;
virtual T lxor(T, T) const noexcept = 0;
/* Comparisons. This operation proceeds in two steps:
* First call cmp(x, y) to check if the values are comparable. If
this returns false, the test result should be Top.
* If the values are comparable, call cmpu(x, y) or cmps(x, y), which
returns a negative number if x < y, 0 if x == y, and a positive
number if x > y. */
virtual bool cmp(T, T) const noexcept = 0;
virtual int cmpu(T, T) const noexcept = 0;
virtual int cmps(T, T) const noexcept = 0;
};
//---
// Domain of relative constants
//---
/* The lattice of relative constants (base + offset) */
struct RelConst
{
enum { Bottom=1, Top=2 };
/* The following fields concurrently indicate the base. The order of
resolution is as follows (non-trivial types in parentheses):
* If [spe] is equal to Bottom or Top, this is the value.
* If [arg] is non-zero, the value of the arg-th argument is used.
* If [org] is non-zero, the original value of the associated
callee-saved register is used. (CpuRegister)
* If [reg] is non-zero, the base is that register. (CpuRegister)
For efficiency, checking [base==0] will tell apart plain old
constants from values with bases (and specials but these are usually
handled first). */
union {
struct {
uint8_t spe;
uint8_t arg;
uint8_t org;
uint8_t reg;
};
uint32_t base;
};
/* The constant value, or offset. The signedness of this value depends
on the context where it is used:
* For special values, the members are unused.
* For [arg] and [org] and [reg] bases with additive offset
semantics, the signedness has no effect. Operations with
non-trivial effect on signs such as multiplication are not
supported with bases.
* For zero bases, the interpretation is instruction-dependent. */
union {
int32_t ival;
uint32_t uval;
};
};
class RelConstDomain: public AbstractDomain<RelConst>
{
/* Implementation of the AbstractDomain specification */
RelConst bottom() const noexcept override;
RelConst top() const noexcept override;
RelConst constant(uint32_t value) const noexcept override;
RelConst minus(RelConst) const noexcept override;
RelConst add(RelConst, RelConst) const noexcept override;
RelConst sub(RelConst, RelConst) const noexcept override;
RelConst smul(RelConst, RelConst) const noexcept override;
RelConst umul(RelConst, RelConst) const noexcept override;
RelConst extub(RelConst) const noexcept override;
RelConst extsb(RelConst) const noexcept override;
RelConst extuw(RelConst) const noexcept override;
RelConst extsw(RelConst) const noexcept override;
RelConst lnot(RelConst) const noexcept override;
RelConst land(RelConst, RelConst) const noexcept override;
RelConst lor(RelConst, RelConst) const noexcept override;
RelConst lxor(RelConst, RelConst) const noexcept override;
bool cmp(RelConst, RelConst) const noexcept override;
int cmpu(RelConst, RelConst) const noexcept override;
int cmps(RelConst, RelConst) const noexcept override;
};
} /* namespace FxOS */
#endif /* FXOS_DOMAINS_H */

View File

@ -16,6 +16,8 @@ class CpuRegister
{
public:
enum CpuRegisterName {
/* Value 0 is reserved for special purposes such as "no reg" */
UNDEFINED = 0,
/* Caller-saved general-purpose registers */
R0, R1, R2, R3, R4, R5, R6, R7,
/* Banked general-purpose registers. fxos does not account for
@ -78,19 +80,14 @@ struct Argument
/* Addressing mode */
Kind kind;
/* Base register. Valid for all modes except Imm */
CpuRegister base;
/* Index register. Valid for ArrayDeref */
CpuRegister index;
/* Displacement in bytes. Valid for StructDeref, PcRel and PcJump */
int disp;
/* Operation size. Generally a multiplier for disp */
int opsize;
/* Immediate value. Valid for Imm */
int imm;
};

View File

@ -1,5 +1,9 @@
#ifndef LIBFXOS_OPERANDS_H
#define LIBFXOS_OPERANDS_H
//---
// fxos.semantics: Analyzed data types and locations (OS semantics)
//---
#ifndef LIBFXOS_SEMANTICS_H
#define LIBFXOS_SEMANTICS_H
#include <fxos/lang.h>
@ -58,46 +62,40 @@ private:
};
};
enum class OperandKind {
/* CPU-held registers accessed with instructions */
CpuRegister,
/* Standard randomly-addressable memory */
Memory,
/* Memory-mapped module registers with specific access */
MappedModule,
};
//---
// Location representation
//
// The abstract interpreter keeps track of data stored at the following
// locations (attribute types in parentheses):
// Registers .reg (CpuRegister)
// Memory .addr (uint32_t)
// MappedModule .addr (uint32_t)
//---
class Operand
struct Location
{
public:
/* Returns the operand kind (which is also the subclass identity) */
virtual OperandKind type() const noexcept = 0;
enum LocationType {
/* CPU-held registers accessed with instructions */
Register,
/* Standard randomly-addressable memory */
Memory,
/* Memory-mapped module registers with specific access */
MappedModule,
};
LocationType location;
union {
/* For registers: register identifier */
CpuRegister reg;
/* For memory and mapped modules: addresses */
uint32_t addr;
};
/* String representation */
virtual std::string str() const noexcept = 0;
};
class RegisterOperand: public Operand
{
public:
RegisterOperand(CpuRegister name): m_name(name) {}
OperandKind type() const noexcept override {
return OperandKind::CpuRegister;
}
CpuRegister name() const noexcept {
return m_name;
}
std::string str() const noexcept override {
return m_name.str();
}
private:
/* Register name for assembler listings */
CpuRegister m_name;
std::string str() const noexcept;
};
} /* namespace FxOS */
#endif /* LIBFXOS_OPERANDS_H */
#endif /* LIBFXOS_SEMANTICS_H */

230
lib/domains/relconst.cpp Normal file
View File

@ -0,0 +1,230 @@
#include <fxos/domains.h>
#include <stdexcept>
namespace FxOS {
//---
// Quick helpers
//---
auto const Top = RelConst::Top;
auto const Bottom = RelConst::Bottom;
/* General prelude that propagates Top, then Bottom */
#define special(r1, r2) { \
if((r1).spe == Top || (r2).spe == Top) return top(); \
if((r1).spe || (r2).spe) return bottom(); \
}
RelConst RelConstDomain::bottom() const noexcept
{
RelConst b {};
b.spe = Bottom;
return b;
}
RelConst RelConstDomain::top() const noexcept
{
RelConst b {};
b.spe = Top;
return b;
}
RelConst RelConstDomain::constant(uint32_t value) const noexcept
{
RelConst b {};
b.uval = value;
return b;
}
//---
// Basic arithmetic
//---
RelConst RelConstDomain::minus(RelConst r) const noexcept
{
/* Propagate Bottom and Top */
if(r.spe) return r;
/* This domain does not support multiplicative coefficients for the
base. If the base is non-zero, return Top. */
if(r.arg || r.org || r.reg) return top();
r.ival = -r.ival;
return r;
}
RelConst RelConstDomain::add(RelConst r1, RelConst r2) const noexcept
{
special(r1, r2);
/* This domain does not support cumulative bases. The sum can only be
represented if at least one of the values has no base */
if(r1.base && r2.base) return top();
RelConst r;
r.base = r1.base | r2.base;
r.uval = r1.uval + r2.uval;
return r;
}
RelConst RelConstDomain::sub(RelConst r1, RelConst r2) const noexcept
{
/* This domain does not support difference between bases. The
difference can only be represented in a few restricted cases. */
special(r1, r2);
/* If r2 has no base, keep r1's base. */
if(!r2.base)
{
r1.uval -= r2.uval;
return r1;
}
/* If r2 has exactly the same base as r1, cancel it. */
if(r1.base == r2.base)
{
r1.base = 0;
r1.uval -= r2.uval;
return r1;
}
/* Otherwise, the result cannot be represented. */
return top();
}
RelConst RelConstDomain::smul(RelConst r1, RelConst r2) const noexcept
{
/* No base can be multiplied except by 1. Typically there will be no
such constant because it would be optimized away. */
special(r1, r2);
/* Give up if there is any base */
if(r1.base || r2.base) return top();
/* Multiply with sign */
r1.ival *= r2.ival;
return r1;
}
RelConst RelConstDomain::umul(RelConst r1, RelConst r2) const noexcept
{
special(r1, r2);
if(r1.base || r2.base) return top();
r1.uval *= r2.uval;
return r1;
}
//---
// Sign extensions
//---
RelConst RelConstDomain::extub(RelConst r) const noexcept
{
/* The representation does not support sign extensions on bases, so we
just return top whenever there is one. */
if(r.spe) return r;
if(r.base) return top();
r.uval = (uint8_t)r.uval;
return r;
}
RelConst RelConstDomain::extsb(RelConst r) const noexcept
{
if(r.spe) return r;
if(r.base) return top();
r.ival = (int8_t)r.ival;
return r;
}
RelConst RelConstDomain::extuw(RelConst r) const noexcept
{
if(r.spe) return r;
if(r.base) return top();
r.uval = (uint16_t)r.uval;
return r;
}
RelConst RelConstDomain::extsw(RelConst r) const noexcept
{
if(r.spe) return r;
if(r.base) return top();
r.ival = (int16_t)r.ival;
return r;
}
//---
// Logical operations
//---
RelConst RelConstDomain::lnot(RelConst r) const noexcept
{
/* Don't try to catch very special cases */
if(r.spe) return r;
if(r.base) return top();
r.uval = ~r.uval;
return r;
}
RelConst RelConstDomain::land(RelConst r1, RelConst r2) const noexcept
{
special(r1, r2);
if(r1.base || r2.base) return top();
r1.uval &= r2.uval;
return r1;
}
RelConst RelConstDomain::lor(RelConst r1, RelConst r2) const noexcept
{
special(r1, r2);
if(r1.base || r2.base) return top();
r1.uval |= r2.uval;
return r1;
}
RelConst RelConstDomain::lxor(RelConst r1, RelConst r2) const noexcept
{
special(r1, r2);
if(r1.base || r2.base) return top();
r1.uval ^= r2.uval;
return r1;
}
//---
// Comparisons
//---
/* TODO: RelConst comparison improvements using typing
Two values base+d1 and base+d2 (sharing the same base) can be proven to
compare as unsigned if the base has a known type and d1 and d2 are smaller
than the size of that type. This derives from the implicit assumption that a
full object cannot cross from P4 space to P0. */
bool RelConstDomain::cmp(RelConst r1, RelConst r2) const noexcept
{
/* Not very good */
return (r1.base == 0 && r2.base == 0);
}
int RelConstDomain::cmpu(RelConst r1, RelConst r2) const noexcept
{
/* We can't just substract because of overflows (information is lost
because we don't have the V bit) */
return (r1.uval > r2.uval) - (r1.uval < r2.uval);
}
int RelConstDomain::cmps(RelConst r1, RelConst r2) const noexcept
{
return (r1.ival > r2.ival) - (r1.ival < r2.ival);
}
} /* namespace FxOS */