fxos/include/fxos/semantics.h

181 lines
5.0 KiB
C++

//---
// fxos.semantics: Analyzed data types and locations (OS semantics)
//---
#ifndef LIBFXOS_SEMANTICS_H
#define LIBFXOS_SEMANTICS_H
#include <fxos/lang.h>
#include <fxos/domains.h>
#include <memory>
#include <variant>
#include <algorithm>
namespace FxOS {
//---
// Data type representation
//
// The abstract interpreter supports the following fixed-size data types when
// analyzing data movement and access:
// Integers i8 u8 i16 u16 i32 u32 (regs, mem)
// Bit fields over ints T { <fields } (regs, mem)
// Arrays (fixed-size) T[n] (mem)
// Strings (fixed-size) char[n] (mem)
// Structures struct { <fields> } (mem)
//---
class DataType;
/* Base type: common information for all types (mixin) */
struct BaseType
{
/* Type size in bytes, as would be returned by sizeof(). Must be 1, 2
or 4 for integral types and bit fields. Cannot be 0 because all
considered types are fixed-size and finite. */
size_t size;
/* Type alignment, can only be 1, 2 or 4 */
size_t align;
};
/* Integer type; of byte, word or longword size. Plus signedness. This kind is
so small that it is enumerated. */
struct IntegerType: public BaseType
{
static DataType const *u8, *i8, *u16, *i16, *u32, *i32;
IntegerType(size_t _size, bool _issigned) {
size = align = _size;
issigned = _issigned;
}
/* Whether the type is signed */
bool issigned;
};
/* Bit fields over bytes, words or longwords. This should satisfy the invariant
that the sum of the field sizes is equal to the type size. */
struct BitfieldType: public BaseType
{
/* Fields must have positive size; the name might be empty. */
using Field = std::pair<std::string, int>;
std::string name;
std::vector<Field> fields;
/* Get field by name (throws if not found) */
Field named_field(std::string name) const;
};
/* Homogeneous fixed-size arrays. The number of elements cannot be set to
non-zero, and the number of elements times the size of the object type
should equal the size of the array type. */
struct ArrayType: public BaseType
{
struct DataType *object_type;
int elements;
};
/* Fixed-length string. Size must be positive. */
struct StringType: public BaseType
{
int size;
/* Whether string stops at first NUL, or must account for all
characters up to the size regardless of NULs */
bool nul_terminated;
};
/* Heterogenous structure types. */
struct StructType: public BaseType
{
/* Fields can be of any type since all are fixed-size. */
using Field = std::pair<std::string, DataType>;
std::string name;
std::vector<Field> fields;
};
/* Sum-type-style union. Basically a variant with NAMES. Thank you. */
class DataType
{
public:
/* Variant identifier (think of it as a named sum type */
enum DataKind { Integer=0, Bitfield=1, Array=2, String=3, Struct=4 };
DataKind kind() const noexcept;
/* Common properties */
size_t size() const noexcept;
size_t align() const noexcept;
/* Access to type-specific data. Exactly one of these can be accessed,
depending on the type kind. */
IntegerType const &integer() const;
BitfieldType const &bitfield() const;
ArrayType const &array() const;
StringType const &string() const;
StructType const &structs() const;
/* Converting constructors from any of these types */
DataType(IntegerType t): v(t) {}
DataType(BitfieldType t): v(t) {}
DataType(ArrayType t): v(t) {}
DataType(StringType t): v(t) {}
DataType(StructType t): v(t) {}
private:
std::variant<IntegerType, BitfieldType, ArrayType, StringType,
StructType> v;
};
//---
// Data values
//
// These objects are instances of the types described by DataType. All valid
// instances are expected to be fully determined with no uninitialized memory.
//---
struct DataValue
{
/* Each byte in the array is stored on an int16_t so that uninitialized
bytes can be found and diagnosed. */
DataType const *type;
std::vector<int16_t> mem;
/* Create value with no memory and no tyê */
DataValue();
/* Create value with uninitialized memory for that data type */
DataValue(DataType const *type);
/* Check whether the value is fully defined and initialized */
bool defined() {
return std::find(mem.begin(), mem.end(), -1) == mem.end();
}
/* Checks that the access is correct and fits witin the value. */
void access(size_t offset, size_t size) const;
/* Read data from the value. Access must be 1, 2 or 4 bytes (possibly
unaligned) and must be in bounds. */
uint32_t read(size_t offset, size_t size) const;
/* Write data. Access must be 1, 2 or 4 bytes and in bounds. */
void write(size_t offset, size_t size, uint32_t contents);
/* Byte-based string representation */
std::string str() const noexcept;
};
//---
// Location representation
//
// The abstract interpreter keeps track of data stored at registers, memory
// addresses and mapped modules as long as the exact location fits within the
// expressive power of a RelConst.
//---
using Location = RelConst;
} /* namespace FxOS */
#endif /* LIBFXOS_SEMANTICS_H */