//--- // fxos.semantics: Analyzed data types and locations (OS semantics) //--- #ifndef LIBFXOS_SEMANTICS_H #define LIBFXOS_SEMANTICS_H #include #include #include #include #include namespace FxOS { //--- // Data type representation // // The abstract interpreter supports the following fixed-size data types when // analyzing data movement and access: // Integers i8 u8 i16 u16 i32 u32 (regs, mem) // Bit fields over ints T { } (mem) //--- class DataType; /* Base type: common information for all types (mixin) */ struct BaseType { /* Type size in bytes, as would be returned by sizeof(). Must be 1, 2 or 4 for integral types and bit fields. Cannot be 0 because all considered types are fixed-size and finite. */ size_t size; /* Type alignment, can only be 1, 2 or 4 */ size_t align; }; /* Integer type; of byte, word or longword size. Plus signedness. This kind is so small that it is enumerated. */ struct IntegerType: public BaseType { static DataType const *u8, *i8, *u16, *i16, *u32, *i32; IntegerType(size_t _size, bool _issigned) { size = align = _size; issigned = _issigned; } /* Whether the type is signed */ bool issigned; }; /* Bit fields over bytes, words or longwords. This should satisfy the invariant that the sum of the field sizes is equal to the type size. */ struct BitfieldType: public BaseType { /* Fields must have positive size; the name might be empty. */ using Field = std::pair; std::string name; std::vector fields; /* Get field by name (throws if not found) */ Field named_field(std::string name) const; }; /* Homogeneous fixed-size arrays. The number of elements cannot be set to non-zero, and the number of elements times the size of the object type should equal the size of the array type. */ struct ArrayType: public BaseType { struct DataType *object_type; int elements; }; /* Fixed-length string. Size must be positive. */ struct StringType: public BaseType { int size; /* Whether string stops at first NUL, or must account for all characters up to the size regardless of NULs */ bool nul_terminated; }; /* Heterogenous structure types. */ struct StructType: public BaseType { /* Fields can be of any type since all are fixed-size. */ using Field = std::pair; std::string name; std::vector fields; }; /* Sum-type-style union. Basically a variant with NAMES. Thank you. */ class DataType { public: /* Variant identifier (think of it as a named sum type */ enum DataKind { Integer=0, Bitfield=1, Array=2, String=3, Struct=4 }; DataKind kind() const noexcept; /* Common properties */ size_t size() const noexcept; size_t align() const noexcept; /* Access to type-specific data. Exactly one of these can be accessed, depending on the type kind. */ IntegerType const &integer() const; BitfieldType const &bitfield() const; ArrayType const &array() const; StringType const &string() const; StructType const &structs() const; /* Converting constructors from any of these types */ DataType(IntegerType t): v(t) {} DataType(BitfieldType t): v(t) {} DataType(ArrayType t): v(t) {} DataType(StringType t): v(t) {} DataType(StructType t): v(t) {} private: std::variant v; }; //--- // Data values // // These objects are instances of the types described by DataType. All valid // instances are expected to be fully determined with no uninitialized memory. //--- struct DataValue { /* Each byte in the array is stored on an int16_t so that uninitialized bytes can be found and diagnosed. */ DataType const *type; std::vector mem; /* Create value with no memory and no tyê */ DataValue(); /* Create value with uninitialized memory for that data type */ DataValue(DataType const *type); /* Check whether the value is fully defined and initialized */ bool defined() { return std::find(mem.begin(), mem.end(), -1) == mem.end(); } /* Checks that the access is correct and fits witin the value. */ void access(size_t offset, size_t size) const; /* Read data from the value. Access must be 1, 2 or 4 bytes (possibly unaligned) and must be in bounds. */ uint32_t read(size_t offset, size_t size) const; /* Write data. Access must be 1, 2 or 4 bytes and in bounds. */ void write(size_t offset, size_t size, uint32_t contents); /* Byte-based string representation */ std::string str() const noexcept; }; //--- // Location representation // // The abstract interpreter keeps track of data stored at registers, memory // addresses and mapped modules as long as the exact location fits within the // expressive power of a RelConst. //--- using Location = RelConst; } /* namespace FxOS */ #endif /* LIBFXOS_SEMANTICS_H */