fxos/include/fxos/semantics.h

220 lines
6.1 KiB
C++

// fxos/semantics: High-level data types and values, and location tracking
//
// TODO: This is a work in progress, not really tested/integrated yet.
//
// This header is intended to provide the tools needed to analyze disassembled
// code. The main idea is to assign to some *location* and *type* and possibly
// a *value*.
//
// The locations considered are defined by RelConst elements. The data types
// are 8-bit, 16-bit and 32-bit integers, arrays, strings, bit fields and
// structures.
//
// Notes:
// - Aliasing is a huge issue which might result in incorrect analysis. (!)
// - The current interface is inefficient and cumbersome. I don't like it
//---
#ifndef FXOS_SEMANTICS_H
#define FXOS_SEMANTICS_H
#include <fxos/lang.h>
#include <fxos/ai/RelConst.h>
#include <memory>
#include <variant>
#include <algorithm>
#include <vector>
namespace FxOS {
//---
// Data type representation
//
// The abstract interpreter supports the following fixed-size data types when
// analyzing data movement and access:
// Integers i8 u8 i16 u16 i32 u32 (regs, mem)
// Bit fields over ints T { <fields } (regs, mem)
// Arrays (fixed-size) T[n] (mem)
// Strings (fixed-size) char[n] (mem)
// Structures struct { <fields> } (mem)
//---
class DataType;
/* Base type: common information for all types (mixin) */
struct BaseType
{
/* Type size in bytes, as would be returned by sizeof(). Must be 1, 2
or 4 for integral types and bit fields. Cannot be 0 because all
considered types are fixed-size and finite. */
int size;
/* Type alignment, can only be 1, 2 or 4 */
int align;
};
/* Integer type; of byte, word or longword size. Plus signedness. This kind is
so small that it is enumerated. */
struct IntegerType: public BaseType
{
static DataType const *u8, *i8, *u16, *i16, *u32, *i32;
IntegerType(size_t _size, bool _issigned)
{
size = align = _size;
issigned = _issigned;
}
/* Whether the type is signed */
bool issigned;
};
/* Bit fields over bytes, words or longwords. This should satisfy the invariant
that the sum of the field sizes is equal to the type size. */
struct BitfieldType: public BaseType
{
/* Fields must have positive size; the name might be empty. */
using Field = std::pair<std::string, int>;
std::string name;
std::vector<Field> fields;
/* Get field by name (throws if not found) */
Field named_field(std::string name) const;
};
/* Homogeneous fixed-size arrays. The number of elements cannot be set to
non-zero, and the number of elements times the size of the object type
should equal the size of the array type. */
struct ArrayType: public BaseType
{
class DataType *object_type;
int elements;
};
/* Fixed-length string. Size must be positive. */
struct StringType: public BaseType
{
int size;
/* Whether string stops at first NUL, or must account for all
characters up to the size regardless of NULs */
bool nul_terminated;
};
/* Heterogeneous structure types. */
struct StructType: public BaseType
{
/* Fields can be of any type since all are fixed-size. */
using Field = std::pair<std::string, DataType>;
std::string name;
std::vector<Field> fields;
};
/* Sum-type-style union. Basically a variant with NAMES. Thank you. */
class DataType
{
public:
/* Variant identifier (think of it as a named sum type) */
enum DataKind {
Integer = 0,
Bitfield = 1,
Array = 2,
String = 3,
Struct = 4
};
DataKind kind() const noexcept;
/* Common properties */
size_t size() const noexcept;
size_t align() const noexcept;
/* Access to type-specific data. Exactly one of these can be accessed,
depending on the type kind. */
IntegerType const &integer() const;
BitfieldType const &bitfield() const;
ArrayType const &array() const;
StringType const &string() const;
StructType const &structure() const;
/* Converting constructors from any of these types */
DataType(IntegerType t): v(t)
{
}
DataType(BitfieldType t): v(t)
{
}
DataType(ArrayType t): v(t)
{
}
DataType(StringType t): v(t)
{
}
DataType(StructType t): v(t)
{
}
private:
std::variant<IntegerType, BitfieldType, ArrayType, StringType, StructType>
v;
};
//---
// Data values
//
// These objects are instances of the types described by DataType. All valid
// instances are expected to be fully determined with no uninitialized memory.
//---
struct DataValue
{
/* Each byte in the array is stored on an int16_t so that uninitialized
bytes can be found and diagnosed. */
DataType const *type;
std::vector<int16_t> mem;
/* Create value with no memory and no type */
DataValue();
/* Create value with uninitialized memory for that data type */
DataValue(DataType const *type);
/* Check whether the value is fully defined and initialized */
bool defined() const
{
return std::find(mem.begin(), mem.end(), -1) == mem.end();
}
operator bool() const
{
return defined();
}
/* Checks that the access is correct and fits within the value. */
void access(size_t offset, size_t size) const;
/* Read data from the value. Access must be 1, 2 or 4 bytes (possibly
unaligned) and must be in bounds. */
uint32_t read(size_t offset, size_t size) const;
/* Write data. Access must be 1, 2 or 4 bytes and in bounds. */
void write(size_t offset, size_t size, uint32_t contents);
/* Retrieve value as uin32_t - only valid for Integer types */
uint32_t uinteger() const;
/* Byte-based string representation */
std::string str() const noexcept;
};
//---
// Location representation
//
// The abstract interpreter keeps track of data stored at registers, memory
// addresses and mapped modules as long as the exact location fits within the
// expressive power of a RelConst.
//---
using Location = RelConst;
} /* namespace FxOS */
#endif /* FXOS_SEMANTICS_H */