2022-03-27 18:32:19 +02:00
|
|
|
// fxos/semantics: High-level data types and values, and location tracking
|
|
|
|
//
|
|
|
|
// TODO: This is a work in progress, not really tested/integrated yet.
|
|
|
|
//
|
|
|
|
// This header is intended to provide the tools needed to analyze disassembled
|
|
|
|
// code. The main idea is to assign to some *location* and *type* and possibly
|
|
|
|
// a *value*.
|
|
|
|
//
|
|
|
|
// The locations considered are defined by RelConst elements. The data types
|
|
|
|
// are 8-bit, 16-bit and 32-bit integers, arrays, strings, bit fields and
|
|
|
|
// structures.
|
|
|
|
//
|
|
|
|
// Notes:
|
|
|
|
// - Aliasing is a huge issue which might result in incorrect analysis. (!)
|
|
|
|
// - The current interface is inefficient and cumbersome. I don't like it
|
2019-12-20 11:17:09 +01:00
|
|
|
//---
|
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
#ifndef FXOS_SEMANTICS_H
|
|
|
|
#define FXOS_SEMANTICS_H
|
2019-12-14 22:33:57 +01:00
|
|
|
|
2019-12-16 22:14:02 +01:00
|
|
|
#include <fxos/lang.h>
|
2022-03-27 17:10:13 +02:00
|
|
|
#include <fxos/ai/RelConst.h>
|
2020-02-12 07:53:00 +01:00
|
|
|
#include <memory>
|
2020-02-12 16:33:08 +01:00
|
|
|
#include <variant>
|
|
|
|
#include <algorithm>
|
2019-12-16 22:14:02 +01:00
|
|
|
|
2019-12-14 22:33:57 +01:00
|
|
|
namespace FxOS {
|
|
|
|
|
2019-12-16 22:14:02 +01:00
|
|
|
//---
|
|
|
|
// Data type representation
|
|
|
|
//
|
2020-02-12 16:33:08 +01:00
|
|
|
// The abstract interpreter supports the following fixed-size data types when
|
|
|
|
// analyzing data movement and access:
|
2019-12-16 22:14:02 +01:00
|
|
|
// Integers i8 u8 i16 u16 i32 u32 (regs, mem)
|
|
|
|
// Bit fields over ints T { <fields } (regs, mem)
|
2020-02-12 16:33:08 +01:00
|
|
|
// Arrays (fixed-size) T[n] (mem)
|
|
|
|
// Strings (fixed-size) char[n] (mem)
|
2019-12-16 22:14:02 +01:00
|
|
|
// Structures struct { <fields> } (mem)
|
|
|
|
//---
|
2019-12-14 22:33:57 +01:00
|
|
|
|
2020-02-12 16:33:08 +01:00
|
|
|
class DataType;
|
|
|
|
|
|
|
|
/* Base type: common information for all types (mixin) */
|
|
|
|
struct BaseType
|
2019-12-14 22:33:57 +01:00
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Type size in bytes, as would be returned by sizeof(). Must be 1, 2
|
|
|
|
or 4 for integral types and bit fields. Cannot be 0 because all
|
|
|
|
considered types are fixed-size and finite. */
|
2022-03-28 21:59:30 +02:00
|
|
|
int size;
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Type alignment, can only be 1, 2 or 4 */
|
2022-03-28 21:59:30 +02:00
|
|
|
int align;
|
2020-02-12 16:33:08 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Integer type; of byte, word or longword size. Plus signedness. This kind is
|
|
|
|
so small that it is enumerated. */
|
|
|
|
struct IntegerType: public BaseType
|
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
static DataType const *u8, *i8, *u16, *i16, *u32, *i32;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
IntegerType(size_t _size, bool _issigned) {
|
|
|
|
size = align = _size;
|
|
|
|
issigned = _issigned;
|
|
|
|
}
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Whether the type is signed */
|
|
|
|
bool issigned;
|
2020-02-12 16:33:08 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Bit fields over bytes, words or longwords. This should satisfy the invariant
|
|
|
|
that the sum of the field sizes is equal to the type size. */
|
|
|
|
struct BitfieldType: public BaseType
|
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Fields must have positive size; the name might be empty. */
|
|
|
|
using Field = std::pair<std::string, int>;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
std::string name;
|
|
|
|
std::vector<Field> fields;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Get field by name (throws if not found) */
|
|
|
|
Field named_field(std::string name) const;
|
2020-02-12 16:33:08 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Homogeneous fixed-size arrays. The number of elements cannot be set to
|
|
|
|
non-zero, and the number of elements times the size of the object type
|
|
|
|
should equal the size of the array type. */
|
|
|
|
struct ArrayType: public BaseType
|
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
struct DataType *object_type;
|
|
|
|
int elements;
|
2020-02-12 16:33:08 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Fixed-length string. Size must be positive. */
|
|
|
|
struct StringType: public BaseType
|
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
int size;
|
|
|
|
/* Whether string stops at first NUL, or must account for all
|
|
|
|
characters up to the size regardless of NULs */
|
|
|
|
bool nul_terminated;
|
2020-02-12 16:33:08 +01:00
|
|
|
};
|
|
|
|
|
2020-02-15 18:42:14 +01:00
|
|
|
/* Heterogeneous structure types. */
|
2020-02-12 16:33:08 +01:00
|
|
|
struct StructType: public BaseType
|
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Fields can be of any type since all are fixed-size. */
|
|
|
|
using Field = std::pair<std::string, DataType>;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
std::string name;
|
|
|
|
std::vector<Field> fields;
|
2020-02-12 16:33:08 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Sum-type-style union. Basically a variant with NAMES. Thank you. */
|
|
|
|
class DataType
|
|
|
|
{
|
|
|
|
public:
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Variant identifier (think of it as a named sum type) */
|
|
|
|
enum DataKind { Integer=0, Bitfield=1, Array=2, String=3, Struct=4 };
|
|
|
|
DataKind kind() const noexcept;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Common properties */
|
|
|
|
size_t size() const noexcept;
|
|
|
|
size_t align() const noexcept;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Access to type-specific data. Exactly one of these can be accessed,
|
|
|
|
depending on the type kind. */
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
IntegerType const &integer() const;
|
|
|
|
BitfieldType const &bitfield() const;
|
|
|
|
ArrayType const &array() const;
|
|
|
|
StringType const &string() const;
|
2022-03-28 21:59:30 +02:00
|
|
|
StructType const &structure() const;
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Converting constructors from any of these types */
|
2020-02-12 16:33:08 +01:00
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
DataType(IntegerType t): v(t) {}
|
|
|
|
DataType(BitfieldType t): v(t) {}
|
|
|
|
DataType(ArrayType t): v(t) {}
|
|
|
|
DataType(StringType t): v(t) {}
|
|
|
|
DataType(StructType t): v(t) {}
|
2020-02-12 16:33:08 +01:00
|
|
|
|
|
|
|
private:
|
2022-03-27 18:32:19 +02:00
|
|
|
std::variant<IntegerType,BitfieldType,ArrayType,StringType,StructType> v;
|
2019-12-14 22:33:57 +01:00
|
|
|
};
|
|
|
|
|
2019-12-20 11:17:09 +01:00
|
|
|
//---
|
2020-02-12 07:53:00 +01:00
|
|
|
// Data values
|
2019-12-20 11:17:09 +01:00
|
|
|
//
|
2020-02-12 16:33:08 +01:00
|
|
|
// These objects are instances of the types described by DataType. All valid
|
|
|
|
// instances are expected to be fully determined with no uninitialized memory.
|
2019-12-20 11:17:09 +01:00
|
|
|
//---
|
2019-12-14 22:33:57 +01:00
|
|
|
|
2020-02-12 07:53:00 +01:00
|
|
|
struct DataValue
|
2019-12-14 22:33:57 +01:00
|
|
|
{
|
2022-03-27 18:32:19 +02:00
|
|
|
/* Each byte in the array is stored on an int16_t so that uninitialized
|
|
|
|
bytes can be found and diagnosed. */
|
|
|
|
DataType const *type;
|
|
|
|
std::vector<int16_t> mem;
|
|
|
|
|
|
|
|
/* Create value with no memory and no type */
|
|
|
|
DataValue();
|
|
|
|
/* Create value with uninitialized memory for that data type */
|
|
|
|
DataValue(DataType const *type);
|
|
|
|
|
|
|
|
/* Check whether the value is fully defined and initialized */
|
|
|
|
bool defined() const {
|
|
|
|
return std::find(mem.begin(), mem.end(), -1) == mem.end();
|
|
|
|
}
|
|
|
|
operator bool() const {
|
|
|
|
return defined();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Checks that the access is correct and fits within the value. */
|
|
|
|
void access(size_t offset, size_t size) const;
|
|
|
|
/* Read data from the value. Access must be 1, 2 or 4 bytes (possibly
|
|
|
|
unaligned) and must be in bounds. */
|
|
|
|
uint32_t read(size_t offset, size_t size) const;
|
|
|
|
/* Write data. Access must be 1, 2 or 4 bytes and in bounds. */
|
|
|
|
void write(size_t offset, size_t size, uint32_t contents);
|
|
|
|
|
|
|
|
/* Retrieve value as uin32_t - only valid for Integer types */
|
|
|
|
uint32_t uinteger() const;
|
|
|
|
|
|
|
|
/* Byte-based string representation */
|
|
|
|
std::string str() const noexcept;
|
2019-12-14 22:33:57 +01:00
|
|
|
};
|
|
|
|
|
2020-02-12 07:53:00 +01:00
|
|
|
//---
|
|
|
|
// Location representation
|
|
|
|
//
|
|
|
|
// The abstract interpreter keeps track of data stored at registers, memory
|
|
|
|
// addresses and mapped modules as long as the exact location fits within the
|
|
|
|
// expressive power of a RelConst.
|
|
|
|
//---
|
|
|
|
|
|
|
|
using Location = RelConst;
|
|
|
|
|
2019-12-14 22:33:57 +01:00
|
|
|
} /* namespace FxOS */
|
|
|
|
|
2022-03-27 18:32:19 +02:00
|
|
|
#endif /* FXOS_SEMANTICS_H */
|