diff --git a/Makefile b/Makefile index a5d4c4c..a18179d 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CC = gcc CXX = g++ # Compiler flags -CFLAGS = -Wall -Wextra -I include -D_GNU_SOURCE -std=c++17 -O3 -flto \ +CFLAGS = -Wall -Wextra -I include -D_GNU_SOURCE -std=c++17 -Og -g \ -fno-diagnostics-show-line-numbers CXXFLAGS = $(CFLAGS) # Linker flags @@ -78,7 +78,7 @@ bin/: # Dependency generation # -include $(wildcard build/*/*.d build/*/*/*.d) +include $(wildcard build/*/*.d build/*/*/*.d build/*/*/*/*.d) .PHONY: all all-lib all-fxos clean clean-lib clean-fxos distclean diff --git a/include/fxos/disasm-passes/print.h b/include/fxos/disasm-passes/print.h index 7e3ef01..7b5f5ab 100644 --- a/include/fxos/disasm-passes/print.h +++ b/include/fxos/disasm-passes/print.h @@ -48,6 +48,10 @@ public: } hide_movpc_address; /* TODO: More print pass parameters */ + +private: + void pcrel(uint32_t pc, Argument const &a, Location const &l, + std::optional v); }; } /* namespace FxOS */ diff --git a/include/fxos/disassembly.h b/include/fxos/disassembly.h index 56d3fc3..6e78473 100644 --- a/include/fxos/disassembly.h +++ b/include/fxos/disassembly.h @@ -35,9 +35,10 @@ struct ConcreteInstructionArg /* Location in CPU or memory, if that can be determined */ Location location; - /* The pointed value, if it can be determined. Valid only if the - location is specified; under some conditions the location can't be - determined by the type can. */ + /* Pointed value. If the exact value can't be determined, this object + evaluates to false. Sometimes, the type can be determined anyway, + and in this case its [type] attribute below is not null even though + the object evaluates to false. */ DataValue value; //--- diff --git a/include/fxos/domains.h b/include/fxos/domains.h index 822da6e..85cee51 100644 --- a/include/fxos/domains.h +++ b/include/fxos/domains.h @@ -22,6 +22,9 @@ public: /* Construct abstract value from integer constant */ virtual T constant(uint32_t value) const noexcept = 0; + /* Check if value is constant */ + virtual bool is_constant(T) const noexcept = 0; + /* Basic arithmetic. Division and modulo are both non-trivial instruction sequences usually isolated in easily-identifiable subroutines, so we don't care about them. */ @@ -126,6 +129,7 @@ public: RelConst top() const noexcept override; RelConst constant(uint32_t value) const noexcept override; + bool is_constant(RelConst) const noexcept override; RelConst minus(RelConst) const noexcept override; RelConst add(RelConst, RelConst) const noexcept override; diff --git a/include/fxos/semantics.h b/include/fxos/semantics.h index c4443ff..66aafc5 100644 --- a/include/fxos/semantics.h +++ b/include/fxos/semantics.h @@ -8,100 +8,161 @@ #include #include #include +#include +#include namespace FxOS { //--- // Data type representation // -// The abstract interpreter supports the following data types when analyzing -// data movement and access: +// The abstract interpreter supports the following fixed-size data types when +// analyzing data movement and access: // Integers i8 u8 i16 u16 i32 u32 (regs, mem) // Bit fields over ints T { } (mem) //--- -struct DataType +class DataType; + +/* Base type: common information for all types (mixin) */ +struct BaseType { - enum DataKind { - /* Base types */ - Integral, - /* Bit fields over integers */ - BitField, - /* Structures (can only reside in memory) */ - Struct, - /* Arrays (can only reside in memory) */ - Array, - }; - - /* Type kind */ - DataKind kind; /* Type size in bytes, as would be returned by sizeof(). Must be 1, 2 - or 4 for integral types and bit fields. Might be 0 for arrays if the - size of the array is unknown */ - uint16_t size; + or 4 for integral types and bit fields. Cannot be 0 because all + considered types are fixed-size and finite. */ + size_t size; /* Type alignment, can only be 1, 2 or 4 */ - uint16_t align; + size_t align; +}; - /* Type of fields in bit fields */ - using Field = std::pair; +/* Integer type; of byte, word or longword size. Plus signedness. This kind is + so small that it is enumerated. */ +struct IntegerType: public BaseType +{ + static DataType const *u8, *i8, *u16, *i16, *u32, *i32; - union { - /* For integer types of size 1, whether to display as char - (might be extended to more attributes later) */ - bool ischar; - /* For array types, number of elements (0 if unknown or - variable-size NUL-terminated strings) */ - int elements; - }; + IntegerType(size_t _size, bool _issigned) { + size = align = _size; + issigned = _issigned; + } - /* The following members are not in the union because they have non- - trivial destructors/copy and I don't want to care. */ + /* Whether the type is signed */ + bool issigned; +}; - /* For array */ - std::shared_ptr arraytype; - /* For bit field types */ +/* Bit fields over bytes, words or longwords. This should satisfy the invariant + that the sum of the field sizes is equal to the type size. */ +struct BitfieldType: public BaseType +{ + /* Fields must have positive size; the name might be empty. */ + using Field = std::pair; + + std::string name; std::vector fields; - /* For struct types */ - std::vector attributes; + + /* Get field by name (throws if not found) */ + Field named_field(std::string name) const; +}; + +/* Homogeneous fixed-size arrays. The number of elements cannot be set to + non-zero, and the number of elements times the size of the object type + should equal the size of the array type. */ +struct ArrayType: public BaseType +{ + struct DataType *object_type; + int elements; +}; + +/* Fixed-length string. Size must be positive. */ +struct StringType: public BaseType +{ + int size; + /* Whether string stops at first NUL, or must account for all + characters up to the size regardless of NULs */ + bool nul_terminated; +}; + +/* Heterogenous structure types. */ +struct StructType: public BaseType +{ + /* Fields can be of any type since all are fixed-size. */ + using Field = std::pair; + + std::string name; + std::vector fields; +}; + +/* Sum-type-style union. Basically a variant with NAMES. Thank you. */ +class DataType +{ +public: + /* Variant identifier (think of it as a named sum type */ + enum DataKind { Integer=0, Bitfield=1, Array=2, String=3, Struct=4 }; + DataKind kind() const noexcept; + + /* Common properties */ + size_t size() const noexcept; + size_t align() const noexcept; + + /* Access to type-specific data. Exactly one of these can be accessed, + depending on the type kind. */ + + IntegerType const &integer() const; + BitfieldType const &bitfield() const; + ArrayType const &array() const; + StringType const &string() const; + StructType const &structs() const; + + /* Converting constructors from any of these types */ + + DataType(IntegerType t): v(t) {} + DataType(BitfieldType t): v(t) {} + DataType(ArrayType t): v(t) {} + DataType(StringType t): v(t) {} + DataType(StructType t): v(t) {} + +private: + std::variant v; }; //--- // Data values // -// These objects are instances of the types described by DataType. +// These objects are instances of the types described by DataType. All valid +// instances are expected to be fully determined with no uninitialized memory. //--- struct DataValue { - /* Default constructor, gives undetermined values */ - DataValue(); - - /* Data type affected to the value */ + /* Each byte in the array is stored on an int16_t so that uninitialized + bytes can be found and diagnosed. */ DataType const *type; - /* Whether the value can be determined. If this boolean is false, the - rest of the data must be ignored. */ - bool determined; + std::vector mem; - union { - /* Unsigned integer (all sizes) and bit fields */ - uint32_t uinteger; - /* Signed integer (all sizes) */ - int32_t integer; - }; + /* Create value with no memory and no tyê */ + DataValue(); + /* Create value with uninitialized memory for that data type */ + DataValue(DataType const *type); - /* For arrays (homogeneous) and structures (heterogeneous) */ - std::vector entries; + /* Check whether the value is fully defined and initialized */ + bool defined() { + return std::find(mem.begin(), mem.end(), -1) == mem.end(); + } - /* Perform a read operation at the specified offset from the start of - the object. */ - uint32_t read(int offset, int size); + /* Checks that the access is correct and fits witin the value. */ + void access(size_t offset, size_t size) const; + /* Read data from the value. Access must be 1, 2 or 4 bytes (possibly + unaligned) and must be in bounds. */ + uint32_t read(size_t offset, size_t size) const; + /* Write data. Access must be 1, 2 or 4 bytes and in bounds. */ + void write(size_t offset, size_t size, uint32_t contents); - /* Perform a write operation at the specified offset from the start of - the object. */ - void write(int offset, int size, uint32_t contents); + /* Byte-based string representation */ + std::string str() const noexcept; }; //--- diff --git a/lib/disassembly.cpp b/lib/disassembly.cpp index d437a58..fff8964 100644 --- a/lib/disassembly.cpp +++ b/lib/disassembly.cpp @@ -27,17 +27,16 @@ void register_instruction(Instruction ins) //--- ConcreteInstructionArg::ConcreteInstructionArg(): - value {}, syscall_id {-1} + value(), syscall_id(-1), reg_address((uint32_t)-1) { location = RelConstDomain().bottom(); - reg_address = -1; } ConcreteInstruction::ConcreteInstruction(Instruction const &inst): - inst {inst}, args {}, - leader {false}, delayslot {false}, - terminal {false}, jump {false}, condjump {false}, - jmptarget {0xffffffff} + inst(inst), args(), + leader(false), delayslot(false), + terminal(false), jump(false), condjump(false), + jmptarget(0xffffffff) { } diff --git a/lib/domains/relconst.cpp b/lib/domains/relconst.cpp index d062043..f651397 100644 --- a/lib/domains/relconst.cpp +++ b/lib/domains/relconst.cpp @@ -39,6 +39,11 @@ RelConst RelConstDomain::constant(uint32_t value) const noexcept return b; } +bool RelConstDomain::is_constant(RelConst r) const noexcept +{ + return r.base == 0; +} + //--- // Basic arithmetic //--- diff --git a/lib/passes/pcrel.cpp b/lib/passes/pcrel.cpp index b91f153..18ae315 100644 --- a/lib/passes/pcrel.cpp +++ b/lib/passes/pcrel.cpp @@ -22,16 +22,34 @@ void PcrelPass::analyze(uint32_t pc, ConcreteInstruction &ci) if(a.kind == Argument::PcRel) { - uint32_t target = (pc & ~(a.opsize - 1)) + 4 + a.disp; - ca.location = RelConstDomain().constant(target); + uint32_t addr = (pc & ~(a.opsize - 1)) + 4 + a.disp; + ca.location = RelConstDomain().constant(addr); - /* Also compute the value */ - uint32_t value = m_disasm.target(). + /* Also compute the value. This is sign-extended from + 16-bit with mov.w. There is no mov.b for this + instruction. */ + Target &t = m_disasm.target(); + uint32_t v; + + switch(i.opsize) + { + case 2: + v = t.read_i16(addr); + break; + case 4: + v = t.read_i32(addr); + break; + default: + throw std::runtime_error("Wrong pcrel opsize"); + } + + ca.value = DataValue(IntegerType::u32); + ca.value.write(0,4,v); } else if(a.kind == Argument::PcJump) { - uint32_t target = pc + 4 + a.disp; - ca.location = RelConstDomain().constant(target); + uint32_t addr = pc + 4 + a.disp; + ca.location = RelConstDomain().constant(addr); } } diff --git a/lib/passes/print.cpp b/lib/passes/print.cpp index 94b1288..2c60588 100644 --- a/lib/passes/print.cpp +++ b/lib/passes/print.cpp @@ -45,25 +45,57 @@ void PrintPass::analyze(uint32_t pc, ConcreteInstruction &ci) { auto &a = i.args[n]; Location &l = ci.args[n].location; + std::optional v = ci.args[n].value; if(n) printf(", "); - if(a.kind == Argument::PcJump && l && hide_resolved_pcjump) + if(a.kind == Argument::PcJump) { - printf("<%s>", l.str().c_str()); + if(!l || !hide_resolved_pcjump) + printf("%s", a.str().c_str()); + if(l) + printf("<%s>", l.str().c_str()); } - else if(a.kind == Argument::PcRel && l && hide_resolved_pcrel) + else if(a.kind == Argument::PcRel) { - printf("<%s>", l.str().c_str()); + pcrel(pc, a, l, v); } else { printf("%s", a.str().c_str()); - if(l) printf(" <%s>", l.str().c_str()); } } printf("\n"); } +void PrintPass::pcrel(uint32_t pc, Argument const &a, Location const &l, + std::optional v) +{ + if(!l || !hide_resolved_pcrel) + { + printf("%s", a.str().c_str()); + } + if(!l || !RelConstDomain().is_constant(l)) return; + + auto reg_code = MemoryRegion::region_for(pc); + auto reg_data = MemoryRegion::region_for(l.uval); + + bool hma = hide_movpc_address; + bool same_region = (reg_code && reg_code == reg_data); + + if(!v || hma == Hide_MovPC_Never || + (hma == Hide_MovPC_Region && !same_region)) + { + printf("<%s>", l.str().c_str()); + if(v) + printf("(%s)", v->str().c_str()); + } + else if(v) + { + printf("%s", v->str().c_str()); + } +} + + } /* namespace FxOS */ diff --git a/lib/semantics.cpp b/lib/semantics.cpp index 4696d97..60c10c5 100644 --- a/lib/semantics.cpp +++ b/lib/semantics.cpp @@ -1,93 +1,161 @@ #include +#include + +#include namespace FxOS { +//--- +// Data types +//--- + +static DataType _u8(IntegerType(1, false)); +static DataType _i8(IntegerType(1, true)); +static DataType _u16(IntegerType(2, false)); +static DataType _i16(IntegerType(2, true)); +static DataType _u32(IntegerType(4, false)); +static DataType _i32(IntegerType(4, true)); + +DataType const *IntegerType::u8 = &_u8; +DataType const *IntegerType::i8 = &_i8; +DataType const *IntegerType::u16 = &_u16; +DataType const *IntegerType::i16 = &_i16; +DataType const *IntegerType::u32 = &_u32; +DataType const *IntegerType::i32 = &_i32; + +BitfieldType::Field BitfieldType::named_field(std::string name) const +{ + for(auto &f: fields) + { + if(f.first == name) return f; + } + + throw std::domain_error("No such field name in bit field"); +} + +DataType::DataKind DataType::kind() const noexcept +{ + return (DataKind)v.index(); +} + +size_t DataType::size() const noexcept +{ + switch(kind()) + { + case Integer: return integer().size; + case Bitfield: return bitfield().size; + case Array: return array().size; + case String: return string().size; + case Struct: return structs().size; + } + + return 0; +}; + +IntegerType const &DataType::integer() const +{ + return std::get(v); +} +BitfieldType const &DataType::bitfield() const +{ + return std::get(v); +} +ArrayType const &DataType::array() const +{ + return std::get(v); +} +StringType const &DataType::string() const +{ + return std::get(v); +} +StructType const &DataType::structs() const +{ + return std::get(v); +} + +//--- +// Data values +//--- + DataValue::DataValue(): - type(nullptr), determined(false) + type {nullptr} { } -uint32_t DataValue::read(int offset, int size) +DataValue::DataValue(DataType const *type): + type {type}, mem(type->size(), (int16_t)-1) { - if(offset < 0 || size < 0 || (type->size && offset+size >= type->size)) - throw std::logic_error("Invalid read into simulated data"); - if(!size || size & (size - 1)) - throw std::logic_error("Simulated read not a power of 2"); - - if(type->kind==DataType::Integral || type->kind==DataType::BitField) - { - int shift = (4 - size - offset) << 3; - uint32_t u = uinteger >> shift; - return (size == 4) ? u : - (size == 2) ? (uint16_t)u : - (uint8_t)u; - } - else if(type->kind == DataType::Array) - { - int elsize = type->arraytype->size; - int index = offset / elsize; - - if(index >= (int)entries.size()) - throw std::logic_error("Read out of array bounds"); - - /* Will throw if reading from two entries simultaneously */ - return entries[index].read(offset % elsize, size); - } - else if(type->kind == DataType::Array) - { - int index = 0; - while(offset >= type->attributes[index].size) - { - index++; - offset -= type->attributes[index].size; - } - - return entries[index].read(offset, size); - } - - throw std::logic_error("Read into unknown DataValue kind"); } -void DataValue::write(int offset, int size, uint32_t contents) +void DataValue::access(size_t offset, size_t size) const { - if(offset < 0 || size < 0 || (type->size && offset+size >= type->size)) - throw std::logic_error("Invalid write into simulated data"); - if(!size || size & (size - 1)) - throw std::logic_error("Simulated write not a power of 2"); + if(size != 1 && size != 2 && size != 4) + throw std::logic_error("Invalid simulated access size"); + if(offset + size > mem.size()) + throw std::logic_error("Access overflows from data"); +} - if(type->kind==DataType::Integral || type->kind==DataType::BitField) +uint32_t DataValue::read(size_t offset, size_t size) const +{ + access(offset, size); + uint32_t result = 0; + + while(size--) { - uint32_t mask = (size == 1) ? 0xff : - (size == 2) ? 0xffff : - 0xffffffff; + int16_t byte = mem[offset++]; + if(byte == -1) + throw std::logic_error("Read uninitialized value"); - int shift = (4 - size - offset) << 3; - - uinteger = (uinteger & ~(mask << shift)) | (contents << shift); + result = (result << 8) | byte; } - else if(type->kind == DataType::Array) + + return result; +} + +void DataValue::write(size_t offset, size_t size, uint32_t contents) +{ + access(offset, size); + + offset += size; + while(size-- > 0) { - int elsize = type->arraytype->size; - int index = offset / elsize; - - if(index >= (int)entries.size()) - throw std::logic_error("Write out of array bounds"); - - entries[index].write(offset % elsize, size, contents); + mem[--offset] = contents & 0xff; + contents >>= 8; } - else if(type->kind == DataType::Array) +} + +std::string DataValue::str() const noexcept +{ + std::string result; + + switch(type->kind()) { - int index = 0; - while(offset >= type->attributes[index].size) + /* Format all integers in hexadecimal */ + case DataType::Integer: + return format("0x%0*x", 2*type->size(), read(0,type->size())); + + /* TODO: Print data values of complex types */ + case DataType::Bitfield: + case DataType::Array: + case DataType::String: + case DataType::Struct: + + /* If the type is not supported, use hexadecimal notation */ + default: + for(size_t i = 0; i < mem.size(); i++) { - index++; - offset -= type->attributes[index].size; + int16_t byte = mem[i]; + + if(byte == -1) result += " UND"; + else result += format(" %02x", byte); } - entries[index].write(offset, size, contents); + result[0] = '{'; + result += '}'; } - throw std::logic_error("Write into unknown DataValue kind"); + + return result; } } /* namespace FxOS */