// fxos/semantics: High-level data types and values, and location tracking // // TODO: This is a work in progress, not really tested/integrated yet. // // This header is intended to provide the tools needed to analyze disassembled // code. The main idea is to assign to some *location* and *type* and possibly // a *value*. // // The locations considered are defined by RelConst elements. The data types // are 8-bit, 16-bit and 32-bit integers, arrays, strings, bit fields and // structures. // // Notes: // - Aliasing is a huge issue which might result in incorrect analysis. (!) // - The current interface is inefficient and cumbersome. I don't like it //--- #ifndef FXOS_SEMANTICS_H #define FXOS_SEMANTICS_H #include #include #include #include #include #include namespace FxOS { //--- // Data type representation // // The abstract interpreter supports the following fixed-size data types when // analyzing data movement and access: // Integers i8 u8 i16 u16 i32 u32 (regs, mem) // Bit fields over ints T { } (mem) //--- class DataType; /* Base type: common information for all types (mixin) */ struct BaseType { /* Type size in bytes, as would be returned by sizeof(). Must be 1, 2 or 4 for integral types and bit fields. Cannot be 0 because all considered types are fixed-size and finite. */ int size; /* Type alignment, can only be 1, 2 or 4 */ int align; }; /* Integer type; of byte, word or longword size. Plus signedness. This kind is so small that it is enumerated. */ struct IntegerType: public BaseType { static DataType const *u8, *i8, *u16, *i16, *u32, *i32; IntegerType(size_t _size, bool _issigned) { size = align = _size; issigned = _issigned; } /* Whether the type is signed */ bool issigned; }; /* Bit fields over bytes, words or longwords. This should satisfy the invariant that the sum of the field sizes is equal to the type size. */ struct BitfieldType: public BaseType { /* Fields must have positive size; the name might be empty. */ using Field = std::pair; std::string name; std::vector fields; /* Get field by name (throws if not found) */ Field named_field(std::string name) const; }; /* Homogeneous fixed-size arrays. The number of elements cannot be set to non-zero, and the number of elements times the size of the object type should equal the size of the array type. */ struct ArrayType: public BaseType { class DataType *object_type; int elements; }; /* Fixed-length string. Size must be positive. */ struct StringType: public BaseType { int size; /* Whether string stops at first NUL, or must account for all characters up to the size regardless of NULs */ bool nul_terminated; }; /* Heterogeneous structure types. */ struct StructType: public BaseType { /* Fields can be of any type since all are fixed-size. */ using Field = std::pair; std::string name; std::vector fields; }; /* Sum-type-style union. Basically a variant with NAMES. Thank you. */ class DataType { public: /* Variant identifier (think of it as a named sum type) */ enum DataKind { Integer = 0, Bitfield = 1, Array = 2, String = 3, Struct = 4 }; DataKind kind() const noexcept; /* Common properties */ size_t size() const noexcept; size_t align() const noexcept; /* Access to type-specific data. Exactly one of these can be accessed, depending on the type kind. */ IntegerType const &integer() const; BitfieldType const &bitfield() const; ArrayType const &array() const; StringType const &string() const; StructType const &structure() const; /* Converting constructors from any of these types */ DataType(IntegerType t): v(t) { } DataType(BitfieldType t): v(t) { } DataType(ArrayType t): v(t) { } DataType(StringType t): v(t) { } DataType(StructType t): v(t) { } private: std::variant v; }; //--- // Data values // // These objects are instances of the types described by DataType. All valid // instances are expected to be fully determined with no uninitialized memory. //--- struct DataValue { /* Each byte in the array is stored on an int16_t so that uninitialized bytes can be found and diagnosed. */ DataType const *type; std::vector mem; /* Create value with no memory and no type */ DataValue(); /* Create value with uninitialized memory for that data type */ DataValue(DataType const *type); /* Check whether the value is fully defined and initialized */ bool defined() const { return std::find(mem.begin(), mem.end(), -1) == mem.end(); } operator bool() const { return defined(); } /* Checks that the access is correct and fits within the value. */ void access(size_t offset, size_t size) const; /* Read data from the value. Access must be 1, 2 or 4 bytes (possibly unaligned) and must be in bounds. */ uint32_t read(size_t offset, size_t size) const; /* Write data. Access must be 1, 2 or 4 bytes and in bounds. */ void write(size_t offset, size_t size, uint32_t contents); /* Retrieve value as uin32_t - only valid for Integer types */ uint32_t uinteger() const; /* Byte-based string representation */ std::string str() const noexcept; }; //--- // Location representation // // The abstract interpreter keeps track of data stored at registers, memory // addresses and mapped modules as long as the exact location fits within the // expressive power of a RelConst. //--- using Location = RelConst; } /* namespace FxOS */ #endif /* FXOS_SEMANTICS_H */