improve the semantic model and implement the pcrel pass

This commit is contained in:
Lephenixnoir 2020-02-12 16:33:08 +01:00
parent 4d9edecad9
commit 08e26aee2e
Signed by: Lephenixnoir
GPG Key ID: 1BBA026E13FC0495
10 changed files with 341 additions and 149 deletions

View File

@ -6,7 +6,7 @@ CC = gcc
CXX = g++
# Compiler flags
CFLAGS = -Wall -Wextra -I include -D_GNU_SOURCE -std=c++17 -O3 -flto \
CFLAGS = -Wall -Wextra -I include -D_GNU_SOURCE -std=c++17 -Og -g \
-fno-diagnostics-show-line-numbers
CXXFLAGS = $(CFLAGS)
# Linker flags
@ -78,7 +78,7 @@ bin/:
# Dependency generation
#
include $(wildcard build/*/*.d build/*/*/*.d)
include $(wildcard build/*/*.d build/*/*/*.d build/*/*/*/*.d)
.PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean

View File

@ -48,6 +48,10 @@ public:
} hide_movpc_address;
/* TODO: More print pass parameters */
private:
void pcrel(uint32_t pc, Argument const &a, Location const &l,
std::optional<DataValue> v);
};
} /* namespace FxOS */

View File

@ -35,9 +35,10 @@ struct ConcreteInstructionArg
/* Location in CPU or memory, if that can be determined */
Location location;
/* The pointed value, if it can be determined. Valid only if the
location is specified; under some conditions the location can't be
determined by the type can. */
/* Pointed value. If the exact value can't be determined, this object
evaluates to false. Sometimes, the type can be determined anyway,
and in this case its [type] attribute below is not null even though
the object evaluates to false. */
DataValue value;
//---

View File

@ -22,6 +22,9 @@ public:
/* Construct abstract value from integer constant */
virtual T constant(uint32_t value) const noexcept = 0;
/* Check if value is constant */
virtual bool is_constant(T) const noexcept = 0;
/* Basic arithmetic. Division and modulo are both non-trivial
instruction sequences usually isolated in easily-identifiable
subroutines, so we don't care about them. */
@ -126,6 +129,7 @@ public:
RelConst top() const noexcept override;
RelConst constant(uint32_t value) const noexcept override;
bool is_constant(RelConst) const noexcept override;
RelConst minus(RelConst) const noexcept override;
RelConst add(RelConst, RelConst) const noexcept override;

View File

@ -8,100 +8,161 @@
#include <fxos/lang.h>
#include <fxos/domains.h>
#include <memory>
#include <variant>
#include <algorithm>
namespace FxOS {
//---
// Data type representation
//
// The abstract interpreter supports the following data types when analyzing
// data movement and access:
// The abstract interpreter supports the following fixed-size data types when
// analyzing data movement and access:
// Integers i8 u8 i16 u16 i32 u32 (regs, mem)
// Bit fields over ints T { <fields } (regs, mem)
// Arrays of any type T[] T[n] (mem)
// Strings char[] char[n] (mem)
// Arrays (fixed-size) T[n] (mem)
// Strings (fixed-size) char[n] (mem)
// Structures struct { <fields> } (mem)
//---
struct DataType
class DataType;
/* Base type: common information for all types (mixin) */
struct BaseType
{
enum DataKind {
/* Base types */
Integral,
/* Bit fields over integers */
BitField,
/* Structures (can only reside in memory) */
Struct,
/* Arrays (can only reside in memory) */
Array,
};
/* Type kind */
DataKind kind;
/* Type size in bytes, as would be returned by sizeof(). Must be 1, 2
or 4 for integral types and bit fields. Might be 0 for arrays if the
size of the array is unknown */
uint16_t size;
or 4 for integral types and bit fields. Cannot be 0 because all
considered types are fixed-size and finite. */
size_t size;
/* Type alignment, can only be 1, 2 or 4 */
uint16_t align;
size_t align;
};
/* Type of fields in bit fields */
using Field = std::pair<int, std::string>;
/* Integer type; of byte, word or longword size. Plus signedness. This kind is
so small that it is enumerated. */
struct IntegerType: public BaseType
{
static DataType const *u8, *i8, *u16, *i16, *u32, *i32;
union {
/* For integer types of size 1, whether to display as char
(might be extended to more attributes later) */
bool ischar;
/* For array types, number of elements (0 if unknown or
variable-size NUL-terminated strings) */
int elements;
};
IntegerType(size_t _size, bool _issigned) {
size = align = _size;
issigned = _issigned;
}
/* The following members are not in the union because they have non-
trivial destructors/copy and I don't want to care. */
/* Whether the type is signed */
bool issigned;
};
/* For array */
std::shared_ptr<DataType> arraytype;
/* For bit field types */
/* Bit fields over bytes, words or longwords. This should satisfy the invariant
that the sum of the field sizes is equal to the type size. */
struct BitfieldType: public BaseType
{
/* Fields must have positive size; the name might be empty. */
using Field = std::pair<std::string, int>;
std::string name;
std::vector<Field> fields;
/* For struct types */
std::vector<DataType> attributes;
/* Get field by name (throws if not found) */
Field named_field(std::string name) const;
};
/* Homogeneous fixed-size arrays. The number of elements cannot be set to
non-zero, and the number of elements times the size of the object type
should equal the size of the array type. */
struct ArrayType: public BaseType
{
struct DataType *object_type;
int elements;
};
/* Fixed-length string. Size must be positive. */
struct StringType: public BaseType
{
int size;
/* Whether string stops at first NUL, or must account for all
characters up to the size regardless of NULs */
bool nul_terminated;
};
/* Heterogenous structure types. */
struct StructType: public BaseType
{
/* Fields can be of any type since all are fixed-size. */
using Field = std::pair<std::string, DataType>;
std::string name;
std::vector<Field> fields;
};
/* Sum-type-style union. Basically a variant with NAMES. Thank you. */
class DataType
{
public:
/* Variant identifier (think of it as a named sum type */
enum DataKind { Integer=0, Bitfield=1, Array=2, String=3, Struct=4 };
DataKind kind() const noexcept;
/* Common properties */
size_t size() const noexcept;
size_t align() const noexcept;
/* Access to type-specific data. Exactly one of these can be accessed,
depending on the type kind. */
IntegerType const &integer() const;
BitfieldType const &bitfield() const;
ArrayType const &array() const;
StringType const &string() const;
StructType const &structs() const;
/* Converting constructors from any of these types */
DataType(IntegerType t): v(t) {}
DataType(BitfieldType t): v(t) {}
DataType(ArrayType t): v(t) {}
DataType(StringType t): v(t) {}
DataType(StructType t): v(t) {}
private:
std::variant<IntegerType, BitfieldType, ArrayType, StringType,
StructType> v;
};
//---
// Data values
//
// These objects are instances of the types described by DataType.
// These objects are instances of the types described by DataType. All valid
// instances are expected to be fully determined with no uninitialized memory.
//---
struct DataValue
{
/* Default constructor, gives undetermined values */
DataValue();
/* Data type affected to the value */
/* Each byte in the array is stored on an int16_t so that uninitialized
bytes can be found and diagnosed. */
DataType const *type;
/* Whether the value can be determined. If this boolean is false, the
rest of the data must be ignored. */
bool determined;
std::vector<int16_t> mem;
union {
/* Unsigned integer (all sizes) and bit fields */
uint32_t uinteger;
/* Signed integer (all sizes) */
int32_t integer;
};
/* Create value with no memory and no tyê */
DataValue();
/* Create value with uninitialized memory for that data type */
DataValue(DataType const *type);
/* For arrays (homogeneous) and structures (heterogeneous) */
std::vector<DataValue> entries;
/* Check whether the value is fully defined and initialized */
bool defined() {
return std::find(mem.begin(), mem.end(), -1) == mem.end();
}
/* Perform a read operation at the specified offset from the start of
the object. */
uint32_t read(int offset, int size);
/* Checks that the access is correct and fits witin the value. */
void access(size_t offset, size_t size) const;
/* Read data from the value. Access must be 1, 2 or 4 bytes (possibly
unaligned) and must be in bounds. */
uint32_t read(size_t offset, size_t size) const;
/* Write data. Access must be 1, 2 or 4 bytes and in bounds. */
void write(size_t offset, size_t size, uint32_t contents);
/* Perform a write operation at the specified offset from the start of
the object. */
void write(int offset, int size, uint32_t contents);
/* Byte-based string representation */
std::string str() const noexcept;
};
//---

View File

@ -27,17 +27,16 @@ void register_instruction(Instruction ins)
//---
ConcreteInstructionArg::ConcreteInstructionArg():
value {}, syscall_id {-1}
value(), syscall_id(-1), reg_address((uint32_t)-1)
{
location = RelConstDomain().bottom();
reg_address = -1;
}
ConcreteInstruction::ConcreteInstruction(Instruction const &inst):
inst {inst}, args {},
leader {false}, delayslot {false},
terminal {false}, jump {false}, condjump {false},
jmptarget {0xffffffff}
inst(inst), args(),
leader(false), delayslot(false),
terminal(false), jump(false), condjump(false),
jmptarget(0xffffffff)
{
}

View File

@ -39,6 +39,11 @@ RelConst RelConstDomain::constant(uint32_t value) const noexcept
return b;
}
bool RelConstDomain::is_constant(RelConst r) const noexcept
{
return r.base == 0;
}
//---
// Basic arithmetic
//---

View File

@ -22,16 +22,34 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci)
if(a.kind == Argument::PcRel)
{
uint32_t target = (pc & ~(a.opsize - 1)) + 4 + a.disp;
ca.location = RelConstDomain().constant(target);
uint32_t addr = (pc & ~(a.opsize - 1)) + 4 + a.disp;
ca.location = RelConstDomain().constant(addr);
/* Also compute the value */
uint32_t value = m_disasm.target().
/* Also compute the value. This is sign-extended from
16-bit with mov.w. There is no mov.b for this
instruction. */
Target &t = m_disasm.target();
uint32_t v;
switch(i.opsize)
{
case 2:
v = t.read_i16(addr);
break;
case 4:
v = t.read_i32(addr);
break;
default:
throw std::runtime_error("Wrong pcrel opsize");
}
ca.value = DataValue(IntegerType::u32);
ca.value.write(0,4,v);
}
else if(a.kind == Argument::PcJump)
{
uint32_t target = pc + 4 + a.disp;
ca.location = RelConstDomain().constant(target);
uint32_t addr = pc + 4 + a.disp;
ca.location = RelConstDomain().constant(addr);
}
}

View File

@ -45,25 +45,57 @@ void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci)
{
auto &a = i.args[n];
Location &l = ci.args[n].location;
std::optional<DataValue> v = ci.args[n].value;
if(n) printf(", ");
if(a.kind == Argument::PcJump && l && hide_resolved_pcjump)
if(a.kind == Argument::PcJump)
{
printf("<%s>", l.str().c_str());
if(!l || !hide_resolved_pcjump)
printf("%s", a.str().c_str());
if(l)
printf("<%s>", l.str().c_str());
}
else if(a.kind == Argument::PcRel && l && hide_resolved_pcrel)
else if(a.kind == Argument::PcRel)
{
printf("<%s>", l.str().c_str());
pcrel(pc, a, l, v);
}
else
{
printf("%s", a.str().c_str());
if(l) printf(" <%s>", l.str().c_str());
}
}
printf("\n");
}
void PrintPass::pcrel(uint32_t pc, Argument const &a, Location const &l,
std::optional<DataValue> v)
{
if(!l || !hide_resolved_pcrel)
{
printf("%s", a.str().c_str());
}
if(!l || !RelConstDomain().is_constant(l)) return;
auto reg_code = MemoryRegion::region_for(pc);
auto reg_data = MemoryRegion::region_for(l.uval);
bool hma = hide_movpc_address;
bool same_region = (reg_code && reg_code == reg_data);
if(!v || hma == Hide_MovPC_Never ||
(hma == Hide_MovPC_Region && !same_region))
{
printf("<%s>", l.str().c_str());
if(v)
printf("(%s)", v->str().c_str());
}
else if(v)
{
printf("%s", v->str().c_str());
}
}
} /* namespace FxOS */

View File

@ -1,93 +1,161 @@
#include <fxos/semantics.h>
#include <fxos/util.h>
#include <exception>
namespace FxOS {
//---
// Data types
//---
static DataType _u8(IntegerType(1, false));
static DataType _i8(IntegerType(1, true));
static DataType _u16(IntegerType(2, false));
static DataType _i16(IntegerType(2, true));
static DataType _u32(IntegerType(4, false));
static DataType _i32(IntegerType(4, true));
DataType const *IntegerType::u8 = &_u8;
DataType const *IntegerType::i8 = &_i8;
DataType const *IntegerType::u16 = &_u16;
DataType const *IntegerType::i16 = &_i16;
DataType const *IntegerType::u32 = &_u32;
DataType const *IntegerType::i32 = &_i32;
BitfieldType::Field BitfieldType::named_field(std::string name) const
{
for(auto &f: fields)
{
if(f.first == name) return f;
}
throw std::domain_error("No such field name in bit field");
}
DataType::DataKind DataType::kind() const noexcept
{
return (DataKind)v.index();
}
size_t DataType::size() const noexcept
{
switch(kind())
{
case Integer: return integer().size;
case Bitfield: return bitfield().size;
case Array: return array().size;
case String: return string().size;
case Struct: return structs().size;
}
return 0;
};
IntegerType const &DataType::integer() const
{
return std::get<IntegerType>(v);
}
BitfieldType const &DataType::bitfield() const
{
return std::get<BitfieldType>(v);
}
ArrayType const &DataType::array() const
{
return std::get<ArrayType>(v);
}
StringType const &DataType::string() const
{
return std::get<StringType>(v);
}
StructType const &DataType::structs() const
{
return std::get<StructType>(v);
}
//---
// Data values
//---
DataValue::DataValue():
type(nullptr), determined(false)
type {nullptr}
{
}
uint32_t DataValue::read(int offset, int size)
DataValue::DataValue(DataType const *type):
type {type}, mem(type->size(), (int16_t)-1)
{
if(offset < 0 || size < 0 || (type->size && offset+size >= type->size))
throw std::logic_error("Invalid read into simulated data");
if(!size || size & (size - 1))
throw std::logic_error("Simulated read not a power of 2");
if(type->kind==DataType::Integral || type->kind==DataType::BitField)
{
int shift = (4 - size - offset) << 3;
uint32_t u = uinteger >> shift;
return (size == 4) ? u :
(size == 2) ? (uint16_t)u :
(uint8_t)u;
}
else if(type->kind == DataType::Array)
{
int elsize = type->arraytype->size;
int index = offset / elsize;
if(index >= (int)entries.size())
throw std::logic_error("Read out of array bounds");
/* Will throw if reading from two entries simultaneously */
return entries[index].read(offset % elsize, size);
}
else if(type->kind == DataType::Array)
{
int index = 0;
while(offset >= type->attributes[index].size)
{
index++;
offset -= type->attributes[index].size;
}
return entries[index].read(offset, size);
}
throw std::logic_error("Read into unknown DataValue kind");
}
void DataValue::write(int offset, int size, uint32_t contents)
void DataValue::access(size_t offset, size_t size) const
{
if(offset < 0 || size < 0 || (type->size && offset+size >= type->size))
throw std::logic_error("Invalid write into simulated data");
if(!size || size & (size - 1))
throw std::logic_error("Simulated write not a power of 2");
if(size != 1 && size != 2 && size != 4)
throw std::logic_error("Invalid simulated access size");
if(offset + size > mem.size())
throw std::logic_error("Access overflows from data");
}
if(type->kind==DataType::Integral || type->kind==DataType::BitField)
uint32_t DataValue::read(size_t offset, size_t size) const
{
access(offset, size);
uint32_t result = 0;
while(size--)
{
uint32_t mask = (size == 1) ? 0xff :
(size == 2) ? 0xffff :
0xffffffff;
int16_t byte = mem[offset++];
if(byte == -1)
throw std::logic_error("Read uninitialized value");
int shift = (4 - size - offset) << 3;
uinteger = (uinteger & ~(mask << shift)) | (contents << shift);
result = (result << 8) | byte;
}
else if(type->kind == DataType::Array)
return result;
}
void DataValue::write(size_t offset, size_t size, uint32_t contents)
{
access(offset, size);
offset += size;
while(size-- > 0)
{
int elsize = type->arraytype->size;
int index = offset / elsize;
if(index >= (int)entries.size())
throw std::logic_error("Write out of array bounds");
entries[index].write(offset % elsize, size, contents);
mem[--offset] = contents & 0xff;
contents >>= 8;
}
else if(type->kind == DataType::Array)
}
std::string DataValue::str() const noexcept
{
std::string result;
switch(type->kind())
{
int index = 0;
while(offset >= type->attributes[index].size)
/* Format all integers in hexadecimal */
case DataType::Integer:
return format("0x%0*x", 2*type->size(), read(0,type->size()));
/* TODO: Print data values of complex types */
case DataType::Bitfield:
case DataType::Array:
case DataType::String:
case DataType::Struct:
/* If the type is not supported, use hexadecimal notation */
default:
for(size_t i = 0; i < mem.size(); i++)
{
index++;
offset -= type->attributes[index].size;
int16_t byte = mem[i];
if(byte == -1) result += " UND";
else result += format(" %02x", byte);
}
entries[index].write(offset, size, contents);
result[0] = '{';
result += '}';
}
throw std::logic_error("Write into unknown DataValue kind");
return result;
}
} /* namespace FxOS */