fxos: add a small BSON serialization library

(I tested it obviously)
2023-09-09 23:19:10 +02:00 · 2023-09-09 23:19:10 +02:00 · 56a4800bbd
parent a4cda4cb66
commit 56a4800bbd
4 changed files with 1136 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,6 @@

 # Semantic exclude
 /exclude
+
+# IDE files
+/*.sublime-*
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -49,6 +49,7 @@ set(fxos_core_SOURCES
  lib/vspace.cpp

  lib/ai/RelConst.cpp
+  lib/util/bson.cpp
  lib/util/Buffer.cpp
  lib/util/log.cpp
  lib/util/Timer.cpp)
--- a/include/fxos/util/bson.h
+++ b/include/fxos/util/bson.h
@ -0,0 +1,400 @@
+//---------------------------------------------------------------------------//
+//  1100101 |_ mov #0, r4         __                                         //
+//     11   |_ <0xb380 %5c4>     / _|_ _____ ___                             //
+//     0110 |_ 3.50 -> 3.60     |  _\ \ / _ (_-<                             //
+//          |_ base# + offset   |_| /_\_\___/__/                             //
+//---------------------------------------------------------------------------//
+// fxos/util/bson: Binary serialization (BSON format)
+//
+// This header implementats a subset of the BSON format [1], which fxos uses
+// to save project and configuration files. The few aspects of note of this
+// implementation are:
+//
+// - Values and fields are immutable with RAII semantics. The only type of
+//   copy is a deep copy with .clone(); there is no copy constructor.
+// - Compact storage: 16 bytes per value + 8 bytes overhead for field names
+//   in documents. Strings and binary sequences are stored externally, with
+//   short string optimizations for string values of 13 bytes of less and
+//   document field names of 7 bytes or less.
+// - Only supports generic, JSON-like types of data; I don't care about the
+//   legacy or Javascript/MongoDB-specific stuff.
+//
+// [1] https://bsonspec.org
+//---
+
+#ifndef FXOS_UTIL_BSON_H
+#define FXOS_UTIL_BSON_H
+
+#include <fxos/util/types.h>
+#include <endian.h>
+#include <cassert>
+#include <cstdio>
+#include <string>
+
+struct BSONField;
+
+struct BSON
+{
+    enum Type : u8 {
+        // clang-format off
+        Double      = 0x01,
+        String      = 0x02,
+        Document    = 0x03,
+        Array       = 0x04,
+        Binary      = 0x05,
+        Bool        = 0x08,
+        Datetime    = 0x09,
+        Null        = 0x0a,
+        I32         = 0x10,
+        I64         = 0x12,
+        // clang-format on
+    };
+
+    /* Default constructor builds a Null value. */
+    BSON(): m_type {Type::Null}, m_value {._i64 = 0}
+    {
+    }
+
+    /* Copy is suppressed because deep copies are expensive and rarely needed.
+       Use the explicit .clone() method for a deep copy. */
+    BSON(BSON const &) = delete;
+    BSON &operator=(BSON const &) = delete;
+
+    /* Move constructor and move assignment will move the resources and make
+       the old value a Null value. */
+    BSON(BSON &&);
+    BSON &operator=(BSON &&);
+
+    ~BSON();
+
+    /* Deep clone a value. */
+    BSON clone() const;
+
+    /* Dump value recursively to stream (for debugging purposes). */
+    void dump(FILE *fp, int depth = 0, bool noindent = false) const;
+
+    /* Serialize to a file, which must be seekable and open in write mode. If
+       `name` is not NULL, specifies the name of the field where this value is.
+       `len` is the length of name, or -1 for NUL-terminated. Only documents
+       can be serialized without a field name. */
+    void serialize(FILE *fp, char const *name = NULL, int len = -1) const;
+
+    /* Parse a complete document from a file. Sets `error` (and returns a Null
+       value) if a parse error occurs. If `log` is set, also logs out a
+       recursive context for the error, which helps with analysis. */
+    static BSON parseDocumentFromFile(FILE *fp, bool *error, bool log);
+
+    /* Constructors */
+
+    static BSON mkDouble(double d)
+    {
+        BSON v;
+        v.m_type = Type::Double;
+        v.m_value._double = d;
+        return v;
+    }
+    static BSON mkBool(bool b)
+    {
+        BSON v;
+        v.m_type = Type::Bool;
+        v.m_subtype = b;
+        v.m_value._i64 = 0;
+        return v;
+    }
+    static BSON mkDatetime(i64 datetime)
+    {
+        BSON v;
+        v.m_type = Type::Datetime;
+        v.m_value._i64 = datetime;
+        return v;
+    }
+    static BSON mkNull()
+    {
+        BSON v;
+        v.m_type = Type::Null;
+        v.m_value._i64 = 0;
+        return v;
+    }
+    static BSON mkI32(i32 integer)
+    {
+        BSON v;
+        v.m_type = Type::I32;
+        v.m_value._i32 = integer;
+        return v;
+    }
+    static BSON mkI64(i64 integer)
+    {
+        BSON v;
+        v.m_type = Type::I64;
+        v.m_value._i64 = integer;
+        return v;
+    }
+
+    /* Construct a document from a literal list of pairs { name, value }. */
+    static BSON mkDocument(
+        std::initializer_list<std::pair<char const *, BSON &&>> pairs);
+    /* Construct a document by moving every field from the array without taking
+       ownership of the array. */
+    static BSON mkDocumentFromFields(BSONField *fields, size_t count);
+    /* Construct a document by taking ownership of a field array, which must be
+       in the heap. */
+    static BSON mkDocumentFromFieldArray(BSONField *fields, size_t count);
+
+    /* Construct an array by moving every value from the array without taking
+       ownership of the array. */
+    static BSON mkArrayFromValues(BSON *values, size_t count);
+    /* Construct an array by taking ownership of a value array, which must be
+       in the heap. */
+    static BSON mkArrayFromValueArray(BSON *values, size_t count);
+
+    /* Construct a binary object by copying the data region. */
+    static BSON mkBinaryCopy(int subtype, u8 const *data, size_t size);
+    /* Construct a binary object by taking ownership of the provided data
+       region, which must be in the heap. */
+    static BSON mkBinaryMove(int subtype, u8 *data, size_t size);
+
+    /* Construct a string by copying the input. */
+    static BSON mkStringCopy(char const *str, int len = -1);
+    static BSON mkStringCopy(std::string const &str);
+    /* Construct a string by taking ownership of the provided buffer, which
+       must be NUL-terminated and in the heap. */
+    static BSON mkStringMove(char *str);
+
+    /* Type checks */
+
+    bool isDouble() const
+    {
+        return m_type == Type::Double;
+    }
+    bool isString() const
+    {
+        return m_type == Type::String;
+    }
+    bool isDocument() const
+    {
+        return m_type == Type::Document;
+    };
+    bool isArray() const
+    {
+        return m_type == Type::Array;
+    }
+    bool isBinary() const
+    {
+        return m_type == Type::Binary;
+    }
+    bool isDatetime() const
+    {
+        return m_type == Type::Datetime;
+    }
+    bool isNull() const
+    {
+        return m_type == Type::Null;
+    }
+    bool isI32() const
+    {
+        return m_type == Type::I32;
+    }
+    bool isI64() const
+    {
+        return m_type == Type::I64;
+    }
+
+    /* Accessors */
+
+    double getDouble() const
+    {
+        assert(isDouble() && "wrong BSON accessor: getDouble");
+        return m_value._double;
+    }
+    BSONField *getDocumentFields()
+    {
+        assert(isDocument() && "wrong BSON accessor: getDocumentFields");
+        return m_value.fields;
+    }
+    BSONField const *getDocumentFields() const
+    {
+        assert(isDocument() && "wrong BSON accessor: getDocumentFields");
+        return m_value.fields;
+    }
+    BSON *getArrayElements()
+    {
+        assert(isArray() && "wrong BSON accessor: getArrayElements");
+        return m_value.values;
+    }
+    BSON const *getArrayElements() const
+    {
+        assert(isArray() && "wrong BSON accessor: getArrayElements");
+        return m_value.values;
+    }
+    u8 *getBinary(size_t *size, int *subtype) const
+    {
+        assert(isBinary() && "wrong BSON accessor: getBinary");
+        if(size)
+            *size = m_size;
+        if(subtype)
+            *subtype = m_subtype;
+        return m_value.binary;
+    }
+    i64 getDatetime() const
+    {
+        assert(isDatetime() && "wrong BSON accessor: getDatetime");
+        return m_value._i64;
+    }
+    i32 getI32() const
+    {
+        assert(isI32() && "wrong BSON accessor: getI32");
+        return m_value._i32;
+    }
+    i64 getI64() const
+    {
+        assert(isI64() && "wrong BSON accessor: getI64");
+        return m_value._i64;
+    }
+
+    /* Get pointer to NUL-terminated string, read-only */
+    char const *getStringReadOnly() const;
+    /* Get a copy of the NUL-terminated string, malloc() allocated */
+    char *getStringCopy() const;
+
+    /* Document/array size */
+    uint size() const
+    {
+        assert((isDocument() || isArray() || isBinary())
+               && "BSON::size: bad type");
+        return m_size;
+    }
+
+    /* Get binary subtype */
+    int binarySubtype() const
+    {
+        assert(isBinary() && "BSON::binarySubtype: not a Binary");
+        return m_subtype;
+    }
+
+    /* Get n-th element of array; must be in-bounds (or assertion failure) */
+    BSON &operator[](int i);
+    BSON const &operator[](int i) const;
+
+    /* Check whether a document has a particular field */
+    bool hasField(char const *str) const;
+    bool hasField(std::string str) const
+    {
+        return hasField(str.c_str());
+    }
+
+    /* Access document element by name; must exist (or assertion failure) */
+    BSON &operator[](char const *str);
+    BSON const &operator[](char const *str) const;
+    BSON &operator[](std::string str)
+    {
+        return (*this)[str.c_str()];
+    }
+    BSON const &operator[](std::string str) const
+    {
+        return (*this)[str.c_str()];
+    }
+
+public:
+    /* The members are public so that constructor functions can do their job.
+       Don't access this directly. The definition is also a bit misleading
+       because some types (strings) store stuff across multiple fields. */
+
+    Type m_type;
+    /* Subtype or memory layout for the object. */
+    u8 m_subtype = 0;
+    u16 m_zero = 0;
+    /* Extra metadata, generally a size. */
+    u32 m_size = 0;
+
+    union
+    {
+        double _double;    /* Double */
+        char *str;         /* String (unless short string optimization) */
+        BSONField *fields; /* Document */
+        BSON *values;      /* Array */
+        u8 *binary;        /* Binary */
+        i32 _i32;          /* I32 */
+        i64 _i64;          /* Datetime, I64 */
+    } m_value;
+
+private:
+    static bool parseElement(FILE *, BSON *, BSONField *, bool);
+    static bool parseDocument(FILE *, BSON &, std::string const &, bool);
+    static bool parseArray(FILE *, BSON &, std::string const &, bool);
+};
+
+/* A key/value pair used in documents. */
+struct BSONField
+{
+    /* Make a field out of a name and a value to be moved into the field. If
+       `len` is specified then it's the length of the name, otherwise the name
+       is assumed to be NUL-terminated. */
+    BSONField(char const *name, BSON &&value, int len = -1);
+
+    /* Same with an std::string. */
+    BSONField(std::string name, BSON &&value):
+        BSONField(name.c_str(), std::move(value))
+    {
+    }
+
+    /* Implicit copy is disabled, use .clone(). */
+    BSONField(BSONField const &) = delete;
+    BSONField &operator=(BSONField const &) = delete;
+
+    /* Moving will move both the name and value, leaving a field with a
+       placholder name "@" and a Null value. */
+    BSONField(BSONField &&);
+    BSONField &operator=(BSONField &&);
+
+    ~BSONField();
+
+    /* Deep clone a field and its value */
+    BSONField clone() const;
+
+    /* Compare against the name (which might be stored unconventionally). */
+    bool compareName(char const *str) const;
+
+    /* Get read-only access to the name. It is not guaranteed to be NUL-
+       terminated due to storage optimizations. */
+    char const *getNameReadOnly(size_t *len) const;
+
+    /* Get a NUL-terminated heap copy of the name. */
+    char *getNameCopy() const;
+
+    /* Dump field recursively to stream (for debugging purposes). */
+    void dump(FILE *fp, int depth = 0) const;
+
+    /* Get value */
+    BSON &value()
+    {
+        return m_value;
+    }
+    BSON const &value() const
+    {
+        return m_value;
+    }
+
+private:
+    /* Another short string optimization; if the full name fits on 7 bytes
+       (without a NUL terminator), store it directly in the field; otherwise
+       use an 8-byte pointer.
+
+       This optimization relies on pointer not using their top byte, which is
+       the case in practice and asserted at runtime for future-proofness. */
+    union
+    {
+        // clang-format off
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        struct { char m_literal[sizeof(char *) - 1]; u8 m_layout; };
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        struct { u8 m_layout; char m_literal[sizeof(char *) - 1]; };
+#else
+#error "bson.h: unknown endianness for compact field storage?!"
+#endif
+        // clang-format on
+        char *m_name;
+    };
+    BSON m_value;
+};
+
+#endif /* FXOS_UTIL_BSON_H */
--- a/lib/util/bson.cpp
+++ b/lib/util/bson.cpp
@ -0,0 +1,732 @@
+//---------------------------------------------------------------------------//
+//  1100101 |_ mov #0, r4         __                                         //
+//     11   |_ <0xb380 %5c4>     / _|_ _____ ___                             //
+//     0110 |_ 3.50 -> 3.60     |  _\ \ / _ (_-<                             //
+//          |_ base# + offset   |_| /_\_\___/__/                             //
+//---------------------------------------------------------------------------//
+// Reference: https://bsonspec.org/spec.html
+//
+// For the subset at hand:
+//   document ::=
+//     | int32 element* "\x00"   int32 is the total number of bytes
+//   element ::=
+//     | "\x01" e_name double    64-bit binary floating point
+//     | "\x02" e_name string    UTF-8 string
+//     | "\x03" e_name document  Embedded document
+//     | "\x04" e_name document  Array
+//     | "\x05" e_name binary    Binary data
+//     | "\x08" e_name "\x00"    Boolean "false"
+//     | "\x08" e_name "\x01"    Boolean "true"
+//     | "\x09" e_name int64     UTC datetime
+//     | "\x0A" e_name           Null value
+//     | "\x10" e_name int32     32-bit integer
+//     | "\x12" e_name int64     64-bit integer
+//   string ::=
+//     | int32 (byte*) "\x00"    int32 is the number of bytes, NUL included
+//   binary ::=
+//     | int32 subtype (byte*)   int32 is the number of bytes
+//---
+
+#include <fxos/util/bson.h>
+#include <vector>
+#include <cstring>
+#include <cstdio>
+
+/* Number of bytes available in a value after the type/subtype attributes */
+#define SSO_MAXLEN (sizeof(BSON) - 2)
+
+BSON::BSON(BSON &&other)
+{
+    *this = std::move(other);
+}
+
+BSON &BSON::operator=(BSON &&other)
+{
+    m_type = other.m_type;
+    m_subtype = other.m_subtype;
+    m_zero = other.m_zero;
+    m_size = other.m_size;
+    m_value = other.m_value;
+
+    other.m_type = Type::Null;
+    other.m_subtype = 0;
+    other.m_zero = 0;
+    other.m_size = 0;
+    other.m_value._i64 = 0;
+
+    return *this;
+}
+
+BSON::~BSON()
+{
+    if(m_type == Type::String && !m_subtype)
+        free(m_value.str);
+    else if(m_type == Type::Document) {
+        for(uint i = 0; i < m_size; i++)
+            m_value.fields[i].~BSONField();
+        free(m_value.fields);
+    }
+    else if(m_type == Type::Array) {
+        for(uint i = 0; i < m_size; i++)
+            m_value.values[i].~BSON();
+        free(m_value.values);
+    }
+    else if(m_type == Type::Binary)
+        free(m_value.binary);
+}
+
+BSON BSON::clone() const
+{
+    /* All subtypes with no referenced subvalues */
+    switch(m_type) {
+    case Type::Double:
+    case Type::Bool:
+    case Type::Datetime:
+    case Type::Null:
+    case Type::I32:
+    case Type::I64: {
+        BSON v;
+        v.m_type = m_type;
+        v.m_subtype = m_subtype;
+        v.m_zero = m_zero;
+        v.m_size = m_size;
+        v.m_value = m_value;
+        return v;
+    }
+
+    /* Strings need to be copied only if the SSO is not used */
+    case Type::String: {
+        BSON v;
+        v.m_type = m_type;
+        v.m_subtype = m_subtype;
+        v.m_zero = m_zero;
+        v.m_size = m_size;
+        v.m_value = m_value;
+        return v;
+        if(!m_subtype) {
+            v.m_value.str = strdup(v.m_value.str);
+            if(!v.m_value.str)
+                throw std::bad_alloc {};
+        }
+        return v;
+    }
+
+    /* Arrays and objects need to have their entries cloned */
+    case Type::Document: {
+        BSONField *fields
+            = static_cast<BSONField *>(malloc(m_size * sizeof *fields));
+        if(!fields)
+            throw std::bad_alloc {};
+        for(uint i = 0; i < m_size; i++)
+            fields[i] = m_value.fields[i].clone();
+        return mkDocumentFromFieldArray(fields, m_size);
+    }
+    case Type::Array: {
+        BSON *values = static_cast<BSON *>(malloc(m_size * sizeof *values));
+        if(!values)
+            throw std::bad_alloc {};
+        for(uint i = 0; i < m_size; i++)
+            values[i] = m_value.values[i].clone();
+        return mkArrayFromValueArray(values, m_size);
+    }
+
+    case Type::Binary:
+        return mkBinaryCopy(m_subtype, m_value.binary, m_size);
+    }
+
+    assert(false && "BSON::clone: unsupported type");
+}
+
+void BSON::dump(FILE *fp, int depth, bool noindent) const
+{
+    if(!noindent)
+        fprintf(fp, "%*s", 2 * depth, "");
+
+    switch(m_type) {
+    case Type::String:
+        if(m_subtype)
+            fprintf(fp, "string(%d) \"%s\"\n", m_subtype - 1, (char *)this + 2);
+        else
+            fprintf(fp, "string \"%s\"\n", m_value.str);
+        break;
+    case Type::Document:
+        fprintf(fp, "document\n");
+        for(uint i = 0; i < m_size; i++)
+            m_value.fields[i].dump(fp, depth + 1);
+        break;
+    case Type::Array:
+        fprintf(fp, "array\n");
+        for(uint i = 0; i < m_size; i++)
+            m_value.values[i].dump(fp, depth + 1);
+        break;
+    case Type::Double:
+        fprintf(fp, "double %f\n", m_value._double);
+        break;
+    case Type::Bool:
+        fprintf(fp, m_subtype ? "true\n" : "false\n");
+        break;
+    case Type::Datetime:
+        fprintf(fp, "datetime %ld\n", m_value._i64);
+        break;
+    case Type::Null:
+        fprintf(fp, "null\n");
+        break;
+    case Type::I32:
+        fprintf(fp, "i32 %d\n", m_value._i32);
+        break;
+    case Type::I64:
+        fprintf(fp, "i64 %ld\n", m_value._i64);
+        break;
+    default:
+        fprintf(fp, "UNKNOWN(%d/%d)\n", m_type, m_subtype);
+    }
+}
+
+void BSON::serialize(FILE *fp, char const *name, int len) const
+{
+    static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+                  && "BSON::serialize currently assumes little-endian");
+
+    assert((name || isDocument()) && "BSON::serialize: missing name");
+    if(name) {
+        fputc((int)m_type, fp);
+        if(len >= 0)
+            fwrite(name, len, 1, fp);
+        else
+            fputs(name, fp);
+        fputc('\x00', fp);
+    }
+
+    switch(m_type) {
+    case Type::Double:
+        fwrite(&m_value._double, 8, 1, fp);
+        return;
+
+    case Type::String: {
+        char const *str = getStringReadOnly();
+        int size = strlen(str) + 1;
+        fwrite(&size, 4, 1, fp);
+        fputs(str, fp);
+        fputc('\x00', fp);
+        return;
+    }
+
+    case Type::Document:
+    case Type::Array: {
+        long start_o = ftell(fp);
+        fputs("####", fp);
+
+        if(m_type == Type::Document) {
+            for(uint i = 0; i < m_size; i++) {
+                BSONField const &f = m_value.fields[i];
+                size_t flen;
+                char const *name = f.getNameReadOnly(&flen);
+                f.value().serialize(fp, name, flen);
+            }
+        }
+        else {
+            for(uint i = 0; i < m_size; i++) {
+                char str[16];
+                sprintf(str, "%u", i);
+                m_value.values[i].serialize(fp, str, -1);
+            }
+        }
+
+        fputc('\x00', fp);
+        long end_o = ftell(fp);
+        fseek(fp, start_o, SEEK_SET);
+        i32 size = end_o - start_o;
+        fwrite(&size, 4, 1, fp);
+        fseek(fp, end_o, SEEK_SET);
+        return;
+    }
+
+    case Type::Binary:
+        fwrite(&m_size, 4, 1, fp);
+        fputc(m_subtype, fp);
+        fwrite(m_value.binary, m_size, 1, fp);
+        fputc('\x00', fp);
+        return;
+
+    case Type::Bool:
+        fputc(m_subtype != 0, fp);
+        return;
+
+    case Type::Datetime:
+    case Type::I64:
+        fwrite(&m_value._i64, 8, 1, fp);
+        return;
+
+    case Type::Null:
+        return;
+
+    case Type::I32:
+        fwrite(&m_value._i32, 4, 1, fp);
+        return;
+    }
+
+    assert(false && "BSON::serialize: unsupported object type");
+}
+
+#define LOG(FMT, ...) \
+    ({ \
+        if(log) \
+            fprintf(stderr, "[bson::parse] " FMT "\n", ##__VA_ARGS__); \
+        false; \
+    })
+
+/* Parse an element. If f is non-NULL, record the name and fill *f. Otherwise,
+   b should be non-NULL and the value is stored in b. */
+bool BSON::parseElement(FILE *fp, BSON *b, BSONField *f, bool log)
+{
+    int type = fgetc(fp);
+    if(feof(fp))
+        return LOG("error: EOF where field was expected");
+
+    std::string name;
+    int c;
+    while((c = fgetc(fp))) {
+        if(feof(fp))
+            return LOG("error: EOF within field name");
+        name.push_back(c);
+    }
+
+    if(f)
+        new(f) BSONField(name, mkNull());
+
+    BSON &v = f ? f->value() : *b;
+    v = mkNull();
+    v.m_type = static_cast<BSON::Type>(type);
+    i32 len = 0;
+
+    switch(type) {
+    case Type::Double:
+        fread(&v.m_value._double, 8, 1, fp);
+        if(feof(fp))
+            return LOG("error: EOF within double (`%s')", name.c_str());
+        return true;
+
+    case Type::String:
+        fread(&len, 4, 1, fp);
+        if(feof(fp))
+            return LOG("error: EOF within string size (`%s')", name.c_str());
+
+        if(len <= (int)SSO_MAXLEN) {
+            v.m_subtype = len;
+            if(fread((char *)&v + 2, len, 1, fp) != 1)
+                return LOG("error: failed to read str (`%s')", name.c_str());
+        }
+        else {
+            v.m_value.str = new char[len];
+            if(!v.m_value.str)
+                throw std::bad_alloc {};
+            if(fread(v.m_value.str, len, 1, fp) != 1) {
+                delete[] v.m_value.str;
+                return LOG("error: failed to read str (`%s')", name.c_str());
+            }
+        }
+        return true;
+
+    case Type::Document:
+        return parseDocument(fp, v, name, log);
+    case Type::Array:
+        return parseArray(fp, v, name, log);
+
+    case Type::Binary:
+        fread(&v.m_size, 4, 1, fp);
+        if(feof(fp))
+            return LOG("error: EOF within binary size (`%s')", name.c_str());
+        v.m_subtype = fgetc(fp);
+        if(feof(fp))
+            return LOG("error: EOF at binary subtype (`%s')", name.c_str());
+        v.m_value.binary = new u8[v.m_size];
+        if(!v.m_value.binary)
+            throw std::bad_alloc();
+        if(fread(v.m_value.binary, v.m_size, 1, fp) != 1) {
+            delete[] v.m_value.binary;
+            return LOG("error: failed to read binary (`%s')", name.c_str());
+        }
+        return true;
+
+    case Type::Bool:
+        v.m_subtype = (fgetc(fp) != 0);
+        if(feof(fp))
+            return LOG("error: EOF within boolean (`%s')", name.c_str());
+        return true;
+
+    case Type::Datetime:
+        fread(&v.m_value._i64, 8, 1, fp);
+        if(feof(fp))
+            return LOG("error: EOF within datetime (`%s')", name.c_str());
+        return true;
+
+    case Type::Null:
+        return true;
+
+    case Type::I32:
+        fread(&v.m_value._i32, 4, 1, fp);
+        if(feof(fp))
+            return LOG("error: EOF within i32 (`%s')", name.c_str());
+        return true;
+
+    case Type::I64:
+        fread(&v.m_value._i64, 8, 1, fp);
+        if(feof(fp))
+            return LOG("error: EOF within i64 (`%s')", name.c_str());
+        return true;
+    }
+
+    /* Reset the value so that it can be destroyed without blowing up */
+    v = mkNull();
+
+    return LOG("error: unknown value type: 0x%02x (`%s')", type, name.c_str());
+}
+
+bool BSON::parseDocument(FILE *fp, BSON &v, std::string const &name, bool log)
+{
+    i32 len;
+    fread(&len, 4, 1, fp);
+    if(feof(fp))
+        return LOG("error: EOF within document size (`%s')", name.c_str());
+
+    std::vector<BSONField> fields;
+    while(true) {
+        int t = fgetc(fp);
+        if(feof(fp))
+            return LOG("error: EOF within document (`%s')", name.c_str());
+        if(t == '\x00')
+            break;
+        ungetc(t, fp);
+
+        BSONField f("@", mkNull());
+        if(!parseElement(fp, NULL, &f, log))
+            return LOG("within document `%s'", name.c_str());
+        fields.push_back(std::move(f));
+    }
+
+    v = mkDocumentFromFields(fields.data(), fields.size());
+    return true;
+}
+
+bool BSON::parseArray(FILE *fp, BSON &v, std::string const &name, bool log)
+{
+    i32 len;
+    fread(&len, 4, 1, fp);
+    if(feof(fp))
+        return LOG("error: EOF within array size (`%s')", name.c_str());
+
+    std::vector<BSON> values;
+    while(true) {
+        int t = fgetc(fp);
+        if(feof(fp))
+            return LOG("error: EOF within array (`%s')", name.c_str());
+        if(t == '\x00')
+            break;
+        ungetc(t, fp);
+
+        BSON v;
+        if(!parseElement(fp, &v, NULL, log))
+            return LOG("within array `%s'", name.c_str());
+        values.push_back(std::move(v));
+    }
+
+    v = mkArrayFromValues(values.data(), values.size());
+    return true;
+}
+
+BSON BSON::parseDocumentFromFile(FILE *fp, bool *error, bool log)
+{
+    BSON v;
+    bool rc = parseDocument(fp, v, "<file>", log);
+    if(error)
+        *error = rc;
+    if(!rc)
+        v = mkNull();
+    return v;
+}
+
+#undef LOG
+
+BSON BSON::mkDocument(
+    std::initializer_list<std::pair<char const *, BSON &&>> pairs)
+{
+    uint count = pairs.size();
+    BSONField *fields
+        = static_cast<BSONField *>(malloc(count * sizeof *fields));
+    if(!fields)
+        throw std::bad_alloc {};
+
+    uint i = 0;
+    for(auto const &pair: pairs) {
+        new(&fields[i]) BSONField(pair.first, std::move(pair.second));
+        i++;
+    }
+
+    return mkDocumentFromFieldArray(fields, count);
+}
+
+BSON BSON::mkDocumentFromFields(BSONField *fields_ro, size_t count)
+{
+    BSONField *fields
+        = static_cast<BSONField *>(malloc(count * sizeof *fields));
+    if(!fields)
+        throw std::bad_alloc {};
+    for(uint i = 0; i < count; i++)
+        fields[i] = std::move(fields_ro[i]);
+
+    return mkDocumentFromFieldArray(fields, count);
+}
+
+BSON BSON::mkDocumentFromFieldArray(BSONField *fields, size_t count)
+{
+    BSON v;
+    v.m_type = Type::Document;
+    v.m_size = count;
+    v.m_value.fields = fields;
+    return v;
+}
+
+BSON BSON::mkArrayFromValues(BSON *values_ro, size_t count)
+{
+    BSON *values = static_cast<BSON *>(malloc(count * sizeof *values));
+    if(!values)
+        throw std::bad_alloc {};
+
+    for(uint i = 0; i < count; i++)
+        values[i] = std::move(values_ro[i]);
+
+    return mkArrayFromValueArray(values, count);
+}
+
+BSON BSON::mkArrayFromValueArray(BSON *values, size_t count)
+{
+    BSON v;
+    v.m_type = Type::Array;
+    v.m_size = count;
+    v.m_value.values = values;
+    return v;
+}
+
+BSON BSON::mkBinaryCopy(int subtype, u8 const *data_ro, size_t size)
+{
+    u8 *data = new u8[size];
+    memcpy(data, data_ro, size);
+    return mkBinaryMove(subtype, data, size);
+}
+
+BSON BSON::mkBinaryMove(int subtype, u8 *data, size_t size)
+{
+    BSON v;
+    v.m_type = Type::Binary;
+    v.m_subtype = subtype;
+    v.m_size = size;
+    v.m_value.binary = data;
+    return v;
+}
+
+BSON BSON::mkStringCopy(char const *str, int len)
+{
+    BSON v;
+    v.m_type = Type::String;
+
+    if(len < 0)
+        len = strlen(str);
+
+    if(len < (int)SSO_MAXLEN) {
+        v.m_subtype = len;
+        memset((char *)&v + 2, 0, SSO_MAXLEN);
+        memcpy((char *)&v + 2, str, len);
+    }
+    else {
+        v.m_value.str = new char[len + 1];
+        memcpy(v.m_value.str, str, len);
+        v.m_value.str[len] = 0;
+    }
+
+    return v;
+}
+
+BSON BSON::mkStringCopy(std::string const &str)
+{
+    BSON v;
+    v.m_type = Type::String;
+
+    if(str.size() < SSO_MAXLEN) {
+        v.m_subtype = str.size();
+        strncpy((char *)&v + 2, str.c_str(), SSO_MAXLEN);
+    }
+    else {
+        v.m_value.str = new char[str.size() + 1];
+        strcpy(v.m_value.str, str.c_str());
+    }
+
+    return v;
+}
+
+BSON BSON::mkStringMove(char *str)
+{
+    BSON v;
+    v.m_type = Type::String;
+
+    int len = strlen(str);
+    if(len < (int)SSO_MAXLEN) {
+        v.m_subtype = len;
+        memset((char *)&v + 2, 0, SSO_MAXLEN);
+        memcpy((char *)&v + 2, str, len);
+        free(str);
+    }
+    else {
+        v.m_value.str = str;
+    }
+
+    return v;
+}
+
+char const *BSON::getStringReadOnly() const
+{
+    assert(isString() && "wrong BSON accessor: getStringReadOnly");
+    if(m_subtype)
+        return (char *)this + 2;
+    else
+        return m_value.str;
+}
+
+char *BSON::getStringCopy() const
+{
+    return strdup(getStringReadOnly());
+}
+
+BSON &BSON::operator[](int i)
+{
+    assert(isArray() && i >= 0 && (uint)i < m_size
+           && "BSON::operator[]: out-of-bounds");
+    return m_value.values[i];
+}
+
+BSON const &BSON::operator[](int i) const
+{
+    assert(isArray() && i >= 0 && (uint)i < m_size
+           && "BSON::operator[]: out-of-bounds");
+    return m_value.values[i];
+}
+
+static BSONField *getFieldWithName(BSONField *fields, char const *str, int n)
+{
+    for(int i = 0; i < n; i++) {
+        if(fields[i].compareName(str))
+            return &fields[i];
+    }
+    return NULL;
+}
+
+bool BSON::hasField(char const *str) const
+{
+    assert(isDocument() && "BSON::hasField: not a document");
+    return getFieldWithName(m_value.fields, str, m_size);
+}
+
+BSON &BSON::operator[](char const *str)
+{
+    assert(isDocument() && "BSON::operator[]: not a document");
+    BSONField *f = getFieldWithName(m_value.fields, str, m_size);
+    assert(f && "BSON::operator[]: key missing");
+    return f->value();
+}
+
+BSON const &BSON::operator[](char const *str) const
+{
+    assert(isDocument() && "BSON::operator[]: not a document");
+    BSONField *f = getFieldWithName(m_value.fields, str, m_size);
+    assert(f && "BSON::operator[]: key missing");
+    return f->value();
+}
+
+BSONField::BSONField(char const *name, BSON &&value, int len)
+{
+    size_t n = (len >= 0) ? len : strnlen(name, sizeof m_literal + 1);
+
+    if(n <= sizeof m_literal) {
+        m_layout = n;
+        memset(m_literal, 0, sizeof m_literal);
+        memcpy(m_literal, name, n);
+    }
+    else {
+        m_name = (len >= 0) ? strndup(name, len) : strdup(name);
+        /* Check that the top byte is unused */
+        assert((uintptr_t)m_name >> (8 * sizeof m_name - 8) == 0);
+    }
+
+    m_value = std::move(value);
+}
+
+BSONField::BSONField(BSONField &&other)
+{
+    *this = std::move(other);
+}
+
+BSONField &BSONField::operator=(BSONField &&other)
+{
+    m_name = other.m_name;
+    other.m_name = nullptr;
+    other.m_layout = 1;
+    other.m_literal[0] = '@';
+    m_value = std::move(other.m_value);
+    return *this;
+}
+
+BSONField::~BSONField()
+{
+    if(!m_layout)
+        free(m_name);
+}
+
+BSONField BSONField::clone() const
+{
+    if(m_layout)
+        return BSONField(m_literal, m_value.clone(), m_layout);
+    else
+        return BSONField(m_name, m_value.clone());
+}
+
+bool BSONField::compareName(char const *str) const
+{
+    if(m_layout) {
+        return !strncmp(m_literal, str, sizeof m_literal)
+               && strnlen(str, sizeof m_literal + 1) <= sizeof m_literal;
+    }
+    else {
+        return !strcmp(str, m_name);
+    }
+}
+
+char const *BSONField::getNameReadOnly(size_t *len) const
+{
+    if(m_layout) {
+        *len = m_layout;
+        return m_literal;
+    }
+    else {
+        *len = strlen(m_name);
+        return m_name;
+    }
+}
+
+char *BSONField::getNameCopy() const
+{
+    if(m_layout)
+        return strndup(m_literal, m_layout);
+    else
+        return strdup(m_name);
+}
+
+void BSONField::dump(FILE *fp, int depth) const
+{
+    fprintf(fp, "%*s", 2 * depth, "");
+
+    if(m_layout)
+        fprintf(fp, "'%.*s'(%d): ", m_layout, m_literal, m_layout);
+    else
+        fprintf(fp, "'%s': ", m_name);
+
+    m_value.dump(fp, depth, true);
+}