diff --git a/.gitignore b/.gitignore index 92f6ab5..17839b7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ # Semantic exclude /exclude + +# IDE files +/*.sublime-* diff --git a/CMakeLists.txt b/CMakeLists.txt index 858d32a..eb79eeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ set(fxos_core_SOURCES lib/vspace.cpp lib/ai/RelConst.cpp + lib/util/bson.cpp lib/util/Buffer.cpp lib/util/log.cpp lib/util/Timer.cpp) diff --git a/include/fxos/util/bson.h b/include/fxos/util/bson.h new file mode 100644 index 0000000..59a0275 --- /dev/null +++ b/include/fxos/util/bson.h @@ -0,0 +1,400 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// fxos/util/bson: Binary serialization (BSON format) +// +// This header implementats a subset of the BSON format [1], which fxos uses +// to save project and configuration files. The few aspects of note of this +// implementation are: +// +// - Values and fields are immutable with RAII semantics. The only type of +// copy is a deep copy with .clone(); there is no copy constructor. +// - Compact storage: 16 bytes per value + 8 bytes overhead for field names +// in documents. Strings and binary sequences are stored externally, with +// short string optimizations for string values of 13 bytes of less and +// document field names of 7 bytes or less. +// - Only supports generic, JSON-like types of data; I don't care about the +// legacy or Javascript/MongoDB-specific stuff. +// +// [1] https://bsonspec.org +//--- + +#ifndef FXOS_UTIL_BSON_H +#define FXOS_UTIL_BSON_H + +#include +#include +#include +#include +#include + +struct BSONField; + +struct BSON +{ + enum Type : u8 { + // clang-format off + Double = 0x01, + String = 0x02, + Document = 0x03, + Array = 0x04, + Binary = 0x05, + Bool = 0x08, + Datetime = 0x09, + Null = 0x0a, + I32 = 0x10, + I64 = 0x12, + // clang-format on + }; + + /* Default constructor builds a Null value. */ + BSON(): m_type {Type::Null}, m_value {._i64 = 0} + { + } + + /* Copy is suppressed because deep copies are expensive and rarely needed. + Use the explicit .clone() method for a deep copy. */ + BSON(BSON const &) = delete; + BSON &operator=(BSON const &) = delete; + + /* Move constructor and move assignment will move the resources and make + the old value a Null value. */ + BSON(BSON &&); + BSON &operator=(BSON &&); + + ~BSON(); + + /* Deep clone a value. */ + BSON clone() const; + + /* Dump value recursively to stream (for debugging purposes). */ + void dump(FILE *fp, int depth = 0, bool noindent = false) const; + + /* Serialize to a file, which must be seekable and open in write mode. If + `name` is not NULL, specifies the name of the field where this value is. + `len` is the length of name, or -1 for NUL-terminated. Only documents + can be serialized without a field name. */ + void serialize(FILE *fp, char const *name = NULL, int len = -1) const; + + /* Parse a complete document from a file. Sets `error` (and returns a Null + value) if a parse error occurs. If `log` is set, also logs out a + recursive context for the error, which helps with analysis. */ + static BSON parseDocumentFromFile(FILE *fp, bool *error, bool log); + + /* Constructors */ + + static BSON mkDouble(double d) + { + BSON v; + v.m_type = Type::Double; + v.m_value._double = d; + return v; + } + static BSON mkBool(bool b) + { + BSON v; + v.m_type = Type::Bool; + v.m_subtype = b; + v.m_value._i64 = 0; + return v; + } + static BSON mkDatetime(i64 datetime) + { + BSON v; + v.m_type = Type::Datetime; + v.m_value._i64 = datetime; + return v; + } + static BSON mkNull() + { + BSON v; + v.m_type = Type::Null; + v.m_value._i64 = 0; + return v; + } + static BSON mkI32(i32 integer) + { + BSON v; + v.m_type = Type::I32; + v.m_value._i32 = integer; + return v; + } + static BSON mkI64(i64 integer) + { + BSON v; + v.m_type = Type::I64; + v.m_value._i64 = integer; + return v; + } + + /* Construct a document from a literal list of pairs { name, value }. */ + static BSON mkDocument( + std::initializer_list> pairs); + /* Construct a document by moving every field from the array without taking + ownership of the array. */ + static BSON mkDocumentFromFields(BSONField *fields, size_t count); + /* Construct a document by taking ownership of a field array, which must be + in the heap. */ + static BSON mkDocumentFromFieldArray(BSONField *fields, size_t count); + + /* Construct an array by moving every value from the array without taking + ownership of the array. */ + static BSON mkArrayFromValues(BSON *values, size_t count); + /* Construct an array by taking ownership of a value array, which must be + in the heap. */ + static BSON mkArrayFromValueArray(BSON *values, size_t count); + + /* Construct a binary object by copying the data region. */ + static BSON mkBinaryCopy(int subtype, u8 const *data, size_t size); + /* Construct a binary object by taking ownership of the provided data + region, which must be in the heap. */ + static BSON mkBinaryMove(int subtype, u8 *data, size_t size); + + /* Construct a string by copying the input. */ + static BSON mkStringCopy(char const *str, int len = -1); + static BSON mkStringCopy(std::string const &str); + /* Construct a string by taking ownership of the provided buffer, which + must be NUL-terminated and in the heap. */ + static BSON mkStringMove(char *str); + + /* Type checks */ + + bool isDouble() const + { + return m_type == Type::Double; + } + bool isString() const + { + return m_type == Type::String; + } + bool isDocument() const + { + return m_type == Type::Document; + }; + bool isArray() const + { + return m_type == Type::Array; + } + bool isBinary() const + { + return m_type == Type::Binary; + } + bool isDatetime() const + { + return m_type == Type::Datetime; + } + bool isNull() const + { + return m_type == Type::Null; + } + bool isI32() const + { + return m_type == Type::I32; + } + bool isI64() const + { + return m_type == Type::I64; + } + + /* Accessors */ + + double getDouble() const + { + assert(isDouble() && "wrong BSON accessor: getDouble"); + return m_value._double; + } + BSONField *getDocumentFields() + { + assert(isDocument() && "wrong BSON accessor: getDocumentFields"); + return m_value.fields; + } + BSONField const *getDocumentFields() const + { + assert(isDocument() && "wrong BSON accessor: getDocumentFields"); + return m_value.fields; + } + BSON *getArrayElements() + { + assert(isArray() && "wrong BSON accessor: getArrayElements"); + return m_value.values; + } + BSON const *getArrayElements() const + { + assert(isArray() && "wrong BSON accessor: getArrayElements"); + return m_value.values; + } + u8 *getBinary(size_t *size, int *subtype) const + { + assert(isBinary() && "wrong BSON accessor: getBinary"); + if(size) + *size = m_size; + if(subtype) + *subtype = m_subtype; + return m_value.binary; + } + i64 getDatetime() const + { + assert(isDatetime() && "wrong BSON accessor: getDatetime"); + return m_value._i64; + } + i32 getI32() const + { + assert(isI32() && "wrong BSON accessor: getI32"); + return m_value._i32; + } + i64 getI64() const + { + assert(isI64() && "wrong BSON accessor: getI64"); + return m_value._i64; + } + + /* Get pointer to NUL-terminated string, read-only */ + char const *getStringReadOnly() const; + /* Get a copy of the NUL-terminated string, malloc() allocated */ + char *getStringCopy() const; + + /* Document/array size */ + uint size() const + { + assert((isDocument() || isArray() || isBinary()) + && "BSON::size: bad type"); + return m_size; + } + + /* Get binary subtype */ + int binarySubtype() const + { + assert(isBinary() && "BSON::binarySubtype: not a Binary"); + return m_subtype; + } + + /* Get n-th element of array; must be in-bounds (or assertion failure) */ + BSON &operator[](int i); + BSON const &operator[](int i) const; + + /* Check whether a document has a particular field */ + bool hasField(char const *str) const; + bool hasField(std::string str) const + { + return hasField(str.c_str()); + } + + /* Access document element by name; must exist (or assertion failure) */ + BSON &operator[](char const *str); + BSON const &operator[](char const *str) const; + BSON &operator[](std::string str) + { + return (*this)[str.c_str()]; + } + BSON const &operator[](std::string str) const + { + return (*this)[str.c_str()]; + } + +public: + /* The members are public so that constructor functions can do their job. + Don't access this directly. The definition is also a bit misleading + because some types (strings) store stuff across multiple fields. */ + + Type m_type; + /* Subtype or memory layout for the object. */ + u8 m_subtype = 0; + u16 m_zero = 0; + /* Extra metadata, generally a size. */ + u32 m_size = 0; + + union + { + double _double; /* Double */ + char *str; /* String (unless short string optimization) */ + BSONField *fields; /* Document */ + BSON *values; /* Array */ + u8 *binary; /* Binary */ + i32 _i32; /* I32 */ + i64 _i64; /* Datetime, I64 */ + } m_value; + +private: + static bool parseElement(FILE *, BSON *, BSONField *, bool); + static bool parseDocument(FILE *, BSON &, std::string const &, bool); + static bool parseArray(FILE *, BSON &, std::string const &, bool); +}; + +/* A key/value pair used in documents. */ +struct BSONField +{ + /* Make a field out of a name and a value to be moved into the field. If + `len` is specified then it's the length of the name, otherwise the name + is assumed to be NUL-terminated. */ + BSONField(char const *name, BSON &&value, int len = -1); + + /* Same with an std::string. */ + BSONField(std::string name, BSON &&value): + BSONField(name.c_str(), std::move(value)) + { + } + + /* Implicit copy is disabled, use .clone(). */ + BSONField(BSONField const &) = delete; + BSONField &operator=(BSONField const &) = delete; + + /* Moving will move both the name and value, leaving a field with a + placholder name "@" and a Null value. */ + BSONField(BSONField &&); + BSONField &operator=(BSONField &&); + + ~BSONField(); + + /* Deep clone a field and its value */ + BSONField clone() const; + + /* Compare against the name (which might be stored unconventionally). */ + bool compareName(char const *str) const; + + /* Get read-only access to the name. It is not guaranteed to be NUL- + terminated due to storage optimizations. */ + char const *getNameReadOnly(size_t *len) const; + + /* Get a NUL-terminated heap copy of the name. */ + char *getNameCopy() const; + + /* Dump field recursively to stream (for debugging purposes). */ + void dump(FILE *fp, int depth = 0) const; + + /* Get value */ + BSON &value() + { + return m_value; + } + BSON const &value() const + { + return m_value; + } + +private: + /* Another short string optimization; if the full name fits on 7 bytes + (without a NUL terminator), store it directly in the field; otherwise + use an 8-byte pointer. + + This optimization relies on pointer not using their top byte, which is + the case in practice and asserted at runtime for future-proofness. */ + union + { + // clang-format off +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + struct { char m_literal[sizeof(char *) - 1]; u8 m_layout; }; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + struct { u8 m_layout; char m_literal[sizeof(char *) - 1]; }; +#else +#error "bson.h: unknown endianness for compact field storage?!" +#endif + // clang-format on + char *m_name; + }; + BSON m_value; +}; + +#endif /* FXOS_UTIL_BSON_H */ diff --git a/lib/util/bson.cpp b/lib/util/bson.cpp new file mode 100644 index 0000000..e668349 --- /dev/null +++ b/lib/util/bson.cpp @@ -0,0 +1,732 @@ +//---------------------------------------------------------------------------// +// 1100101 |_ mov #0, r4 __ // +// 11 |_ <0xb380 %5c4> / _|_ _____ ___ // +// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // +// |_ base# + offset |_| /_\_\___/__/ // +//---------------------------------------------------------------------------// +// Reference: https://bsonspec.org/spec.html +// +// For the subset at hand: +// document ::= +// | int32 element* "\x00" int32 is the total number of bytes +// element ::= +// | "\x01" e_name double 64-bit binary floating point +// | "\x02" e_name string UTF-8 string +// | "\x03" e_name document Embedded document +// | "\x04" e_name document Array +// | "\x05" e_name binary Binary data +// | "\x08" e_name "\x00" Boolean "false" +// | "\x08" e_name "\x01" Boolean "true" +// | "\x09" e_name int64 UTC datetime +// | "\x0A" e_name Null value +// | "\x10" e_name int32 32-bit integer +// | "\x12" e_name int64 64-bit integer +// string ::= +// | int32 (byte*) "\x00" int32 is the number of bytes, NUL included +// binary ::= +// | int32 subtype (byte*) int32 is the number of bytes +//--- + +#include +#include +#include +#include + +/* Number of bytes available in a value after the type/subtype attributes */ +#define SSO_MAXLEN (sizeof(BSON) - 2) + +BSON::BSON(BSON &&other) +{ + *this = std::move(other); +} + +BSON &BSON::operator=(BSON &&other) +{ + m_type = other.m_type; + m_subtype = other.m_subtype; + m_zero = other.m_zero; + m_size = other.m_size; + m_value = other.m_value; + + other.m_type = Type::Null; + other.m_subtype = 0; + other.m_zero = 0; + other.m_size = 0; + other.m_value._i64 = 0; + + return *this; +} + +BSON::~BSON() +{ + if(m_type == Type::String && !m_subtype) + free(m_value.str); + else if(m_type == Type::Document) { + for(uint i = 0; i < m_size; i++) + m_value.fields[i].~BSONField(); + free(m_value.fields); + } + else if(m_type == Type::Array) { + for(uint i = 0; i < m_size; i++) + m_value.values[i].~BSON(); + free(m_value.values); + } + else if(m_type == Type::Binary) + free(m_value.binary); +} + +BSON BSON::clone() const +{ + /* All subtypes with no referenced subvalues */ + switch(m_type) { + case Type::Double: + case Type::Bool: + case Type::Datetime: + case Type::Null: + case Type::I32: + case Type::I64: { + BSON v; + v.m_type = m_type; + v.m_subtype = m_subtype; + v.m_zero = m_zero; + v.m_size = m_size; + v.m_value = m_value; + return v; + } + + /* Strings need to be copied only if the SSO is not used */ + case Type::String: { + BSON v; + v.m_type = m_type; + v.m_subtype = m_subtype; + v.m_zero = m_zero; + v.m_size = m_size; + v.m_value = m_value; + return v; + if(!m_subtype) { + v.m_value.str = strdup(v.m_value.str); + if(!v.m_value.str) + throw std::bad_alloc {}; + } + return v; + } + + /* Arrays and objects need to have their entries cloned */ + case Type::Document: { + BSONField *fields + = static_cast(malloc(m_size * sizeof *fields)); + if(!fields) + throw std::bad_alloc {}; + for(uint i = 0; i < m_size; i++) + fields[i] = m_value.fields[i].clone(); + return mkDocumentFromFieldArray(fields, m_size); + } + case Type::Array: { + BSON *values = static_cast(malloc(m_size * sizeof *values)); + if(!values) + throw std::bad_alloc {}; + for(uint i = 0; i < m_size; i++) + values[i] = m_value.values[i].clone(); + return mkArrayFromValueArray(values, m_size); + } + + case Type::Binary: + return mkBinaryCopy(m_subtype, m_value.binary, m_size); + } + + assert(false && "BSON::clone: unsupported type"); +} + +void BSON::dump(FILE *fp, int depth, bool noindent) const +{ + if(!noindent) + fprintf(fp, "%*s", 2 * depth, ""); + + switch(m_type) { + case Type::String: + if(m_subtype) + fprintf(fp, "string(%d) \"%s\"\n", m_subtype - 1, (char *)this + 2); + else + fprintf(fp, "string \"%s\"\n", m_value.str); + break; + case Type::Document: + fprintf(fp, "document\n"); + for(uint i = 0; i < m_size; i++) + m_value.fields[i].dump(fp, depth + 1); + break; + case Type::Array: + fprintf(fp, "array\n"); + for(uint i = 0; i < m_size; i++) + m_value.values[i].dump(fp, depth + 1); + break; + case Type::Double: + fprintf(fp, "double %f\n", m_value._double); + break; + case Type::Bool: + fprintf(fp, m_subtype ? "true\n" : "false\n"); + break; + case Type::Datetime: + fprintf(fp, "datetime %ld\n", m_value._i64); + break; + case Type::Null: + fprintf(fp, "null\n"); + break; + case Type::I32: + fprintf(fp, "i32 %d\n", m_value._i32); + break; + case Type::I64: + fprintf(fp, "i64 %ld\n", m_value._i64); + break; + default: + fprintf(fp, "UNKNOWN(%d/%d)\n", m_type, m_subtype); + } +} + +void BSON::serialize(FILE *fp, char const *name, int len) const +{ + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + && "BSON::serialize currently assumes little-endian"); + + assert((name || isDocument()) && "BSON::serialize: missing name"); + if(name) { + fputc((int)m_type, fp); + if(len >= 0) + fwrite(name, len, 1, fp); + else + fputs(name, fp); + fputc('\x00', fp); + } + + switch(m_type) { + case Type::Double: + fwrite(&m_value._double, 8, 1, fp); + return; + + case Type::String: { + char const *str = getStringReadOnly(); + int size = strlen(str) + 1; + fwrite(&size, 4, 1, fp); + fputs(str, fp); + fputc('\x00', fp); + return; + } + + case Type::Document: + case Type::Array: { + long start_o = ftell(fp); + fputs("####", fp); + + if(m_type == Type::Document) { + for(uint i = 0; i < m_size; i++) { + BSONField const &f = m_value.fields[i]; + size_t flen; + char const *name = f.getNameReadOnly(&flen); + f.value().serialize(fp, name, flen); + } + } + else { + for(uint i = 0; i < m_size; i++) { + char str[16]; + sprintf(str, "%u", i); + m_value.values[i].serialize(fp, str, -1); + } + } + + fputc('\x00', fp); + long end_o = ftell(fp); + fseek(fp, start_o, SEEK_SET); + i32 size = end_o - start_o; + fwrite(&size, 4, 1, fp); + fseek(fp, end_o, SEEK_SET); + return; + } + + case Type::Binary: + fwrite(&m_size, 4, 1, fp); + fputc(m_subtype, fp); + fwrite(m_value.binary, m_size, 1, fp); + fputc('\x00', fp); + return; + + case Type::Bool: + fputc(m_subtype != 0, fp); + return; + + case Type::Datetime: + case Type::I64: + fwrite(&m_value._i64, 8, 1, fp); + return; + + case Type::Null: + return; + + case Type::I32: + fwrite(&m_value._i32, 4, 1, fp); + return; + } + + assert(false && "BSON::serialize: unsupported object type"); +} + +#define LOG(FMT, ...) \ + ({ \ + if(log) \ + fprintf(stderr, "[bson::parse] " FMT "\n", ##__VA_ARGS__); \ + false; \ + }) + +/* Parse an element. If f is non-NULL, record the name and fill *f. Otherwise, + b should be non-NULL and the value is stored in b. */ +bool BSON::parseElement(FILE *fp, BSON *b, BSONField *f, bool log) +{ + int type = fgetc(fp); + if(feof(fp)) + return LOG("error: EOF where field was expected"); + + std::string name; + int c; + while((c = fgetc(fp))) { + if(feof(fp)) + return LOG("error: EOF within field name"); + name.push_back(c); + } + + if(f) + new(f) BSONField(name, mkNull()); + + BSON &v = f ? f->value() : *b; + v = mkNull(); + v.m_type = static_cast(type); + i32 len = 0; + + switch(type) { + case Type::Double: + fread(&v.m_value._double, 8, 1, fp); + if(feof(fp)) + return LOG("error: EOF within double (`%s')", name.c_str()); + return true; + + case Type::String: + fread(&len, 4, 1, fp); + if(feof(fp)) + return LOG("error: EOF within string size (`%s')", name.c_str()); + + if(len <= (int)SSO_MAXLEN) { + v.m_subtype = len; + if(fread((char *)&v + 2, len, 1, fp) != 1) + return LOG("error: failed to read str (`%s')", name.c_str()); + } + else { + v.m_value.str = new char[len]; + if(!v.m_value.str) + throw std::bad_alloc {}; + if(fread(v.m_value.str, len, 1, fp) != 1) { + delete[] v.m_value.str; + return LOG("error: failed to read str (`%s')", name.c_str()); + } + } + return true; + + case Type::Document: + return parseDocument(fp, v, name, log); + case Type::Array: + return parseArray(fp, v, name, log); + + case Type::Binary: + fread(&v.m_size, 4, 1, fp); + if(feof(fp)) + return LOG("error: EOF within binary size (`%s')", name.c_str()); + v.m_subtype = fgetc(fp); + if(feof(fp)) + return LOG("error: EOF at binary subtype (`%s')", name.c_str()); + v.m_value.binary = new u8[v.m_size]; + if(!v.m_value.binary) + throw std::bad_alloc(); + if(fread(v.m_value.binary, v.m_size, 1, fp) != 1) { + delete[] v.m_value.binary; + return LOG("error: failed to read binary (`%s')", name.c_str()); + } + return true; + + case Type::Bool: + v.m_subtype = (fgetc(fp) != 0); + if(feof(fp)) + return LOG("error: EOF within boolean (`%s')", name.c_str()); + return true; + + case Type::Datetime: + fread(&v.m_value._i64, 8, 1, fp); + if(feof(fp)) + return LOG("error: EOF within datetime (`%s')", name.c_str()); + return true; + + case Type::Null: + return true; + + case Type::I32: + fread(&v.m_value._i32, 4, 1, fp); + if(feof(fp)) + return LOG("error: EOF within i32 (`%s')", name.c_str()); + return true; + + case Type::I64: + fread(&v.m_value._i64, 8, 1, fp); + if(feof(fp)) + return LOG("error: EOF within i64 (`%s')", name.c_str()); + return true; + } + + /* Reset the value so that it can be destroyed without blowing up */ + v = mkNull(); + + return LOG("error: unknown value type: 0x%02x (`%s')", type, name.c_str()); +} + +bool BSON::parseDocument(FILE *fp, BSON &v, std::string const &name, bool log) +{ + i32 len; + fread(&len, 4, 1, fp); + if(feof(fp)) + return LOG("error: EOF within document size (`%s')", name.c_str()); + + std::vector fields; + while(true) { + int t = fgetc(fp); + if(feof(fp)) + return LOG("error: EOF within document (`%s')", name.c_str()); + if(t == '\x00') + break; + ungetc(t, fp); + + BSONField f("@", mkNull()); + if(!parseElement(fp, NULL, &f, log)) + return LOG("within document `%s'", name.c_str()); + fields.push_back(std::move(f)); + } + + v = mkDocumentFromFields(fields.data(), fields.size()); + return true; +} + +bool BSON::parseArray(FILE *fp, BSON &v, std::string const &name, bool log) +{ + i32 len; + fread(&len, 4, 1, fp); + if(feof(fp)) + return LOG("error: EOF within array size (`%s')", name.c_str()); + + std::vector values; + while(true) { + int t = fgetc(fp); + if(feof(fp)) + return LOG("error: EOF within array (`%s')", name.c_str()); + if(t == '\x00') + break; + ungetc(t, fp); + + BSON v; + if(!parseElement(fp, &v, NULL, log)) + return LOG("within array `%s'", name.c_str()); + values.push_back(std::move(v)); + } + + v = mkArrayFromValues(values.data(), values.size()); + return true; +} + +BSON BSON::parseDocumentFromFile(FILE *fp, bool *error, bool log) +{ + BSON v; + bool rc = parseDocument(fp, v, "", log); + if(error) + *error = rc; + if(!rc) + v = mkNull(); + return v; +} + +#undef LOG + +BSON BSON::mkDocument( + std::initializer_list> pairs) +{ + uint count = pairs.size(); + BSONField *fields + = static_cast(malloc(count * sizeof *fields)); + if(!fields) + throw std::bad_alloc {}; + + uint i = 0; + for(auto const &pair: pairs) { + new(&fields[i]) BSONField(pair.first, std::move(pair.second)); + i++; + } + + return mkDocumentFromFieldArray(fields, count); +} + +BSON BSON::mkDocumentFromFields(BSONField *fields_ro, size_t count) +{ + BSONField *fields + = static_cast(malloc(count * sizeof *fields)); + if(!fields) + throw std::bad_alloc {}; + for(uint i = 0; i < count; i++) + fields[i] = std::move(fields_ro[i]); + + return mkDocumentFromFieldArray(fields, count); +} + +BSON BSON::mkDocumentFromFieldArray(BSONField *fields, size_t count) +{ + BSON v; + v.m_type = Type::Document; + v.m_size = count; + v.m_value.fields = fields; + return v; +} + +BSON BSON::mkArrayFromValues(BSON *values_ro, size_t count) +{ + BSON *values = static_cast(malloc(count * sizeof *values)); + if(!values) + throw std::bad_alloc {}; + + for(uint i = 0; i < count; i++) + values[i] = std::move(values_ro[i]); + + return mkArrayFromValueArray(values, count); +} + +BSON BSON::mkArrayFromValueArray(BSON *values, size_t count) +{ + BSON v; + v.m_type = Type::Array; + v.m_size = count; + v.m_value.values = values; + return v; +} + +BSON BSON::mkBinaryCopy(int subtype, u8 const *data_ro, size_t size) +{ + u8 *data = new u8[size]; + memcpy(data, data_ro, size); + return mkBinaryMove(subtype, data, size); +} + +BSON BSON::mkBinaryMove(int subtype, u8 *data, size_t size) +{ + BSON v; + v.m_type = Type::Binary; + v.m_subtype = subtype; + v.m_size = size; + v.m_value.binary = data; + return v; +} + +BSON BSON::mkStringCopy(char const *str, int len) +{ + BSON v; + v.m_type = Type::String; + + if(len < 0) + len = strlen(str); + + if(len < (int)SSO_MAXLEN) { + v.m_subtype = len; + memset((char *)&v + 2, 0, SSO_MAXLEN); + memcpy((char *)&v + 2, str, len); + } + else { + v.m_value.str = new char[len + 1]; + memcpy(v.m_value.str, str, len); + v.m_value.str[len] = 0; + } + + return v; +} + +BSON BSON::mkStringCopy(std::string const &str) +{ + BSON v; + v.m_type = Type::String; + + if(str.size() < SSO_MAXLEN) { + v.m_subtype = str.size(); + strncpy((char *)&v + 2, str.c_str(), SSO_MAXLEN); + } + else { + v.m_value.str = new char[str.size() + 1]; + strcpy(v.m_value.str, str.c_str()); + } + + return v; +} + +BSON BSON::mkStringMove(char *str) +{ + BSON v; + v.m_type = Type::String; + + int len = strlen(str); + if(len < (int)SSO_MAXLEN) { + v.m_subtype = len; + memset((char *)&v + 2, 0, SSO_MAXLEN); + memcpy((char *)&v + 2, str, len); + free(str); + } + else { + v.m_value.str = str; + } + + return v; +} + +char const *BSON::getStringReadOnly() const +{ + assert(isString() && "wrong BSON accessor: getStringReadOnly"); + if(m_subtype) + return (char *)this + 2; + else + return m_value.str; +} + +char *BSON::getStringCopy() const +{ + return strdup(getStringReadOnly()); +} + +BSON &BSON::operator[](int i) +{ + assert(isArray() && i >= 0 && (uint)i < m_size + && "BSON::operator[]: out-of-bounds"); + return m_value.values[i]; +} + +BSON const &BSON::operator[](int i) const +{ + assert(isArray() && i >= 0 && (uint)i < m_size + && "BSON::operator[]: out-of-bounds"); + return m_value.values[i]; +} + +static BSONField *getFieldWithName(BSONField *fields, char const *str, int n) +{ + for(int i = 0; i < n; i++) { + if(fields[i].compareName(str)) + return &fields[i]; + } + return NULL; +} + +bool BSON::hasField(char const *str) const +{ + assert(isDocument() && "BSON::hasField: not a document"); + return getFieldWithName(m_value.fields, str, m_size); +} + +BSON &BSON::operator[](char const *str) +{ + assert(isDocument() && "BSON::operator[]: not a document"); + BSONField *f = getFieldWithName(m_value.fields, str, m_size); + assert(f && "BSON::operator[]: key missing"); + return f->value(); +} + +BSON const &BSON::operator[](char const *str) const +{ + assert(isDocument() && "BSON::operator[]: not a document"); + BSONField *f = getFieldWithName(m_value.fields, str, m_size); + assert(f && "BSON::operator[]: key missing"); + return f->value(); +} + +BSONField::BSONField(char const *name, BSON &&value, int len) +{ + size_t n = (len >= 0) ? len : strnlen(name, sizeof m_literal + 1); + + if(n <= sizeof m_literal) { + m_layout = n; + memset(m_literal, 0, sizeof m_literal); + memcpy(m_literal, name, n); + } + else { + m_name = (len >= 0) ? strndup(name, len) : strdup(name); + /* Check that the top byte is unused */ + assert((uintptr_t)m_name >> (8 * sizeof m_name - 8) == 0); + } + + m_value = std::move(value); +} + +BSONField::BSONField(BSONField &&other) +{ + *this = std::move(other); +} + +BSONField &BSONField::operator=(BSONField &&other) +{ + m_name = other.m_name; + other.m_name = nullptr; + other.m_layout = 1; + other.m_literal[0] = '@'; + m_value = std::move(other.m_value); + return *this; +} + +BSONField::~BSONField() +{ + if(!m_layout) + free(m_name); +} + +BSONField BSONField::clone() const +{ + if(m_layout) + return BSONField(m_literal, m_value.clone(), m_layout); + else + return BSONField(m_name, m_value.clone()); +} + +bool BSONField::compareName(char const *str) const +{ + if(m_layout) { + return !strncmp(m_literal, str, sizeof m_literal) + && strnlen(str, sizeof m_literal + 1) <= sizeof m_literal; + } + else { + return !strcmp(str, m_name); + } +} + +char const *BSONField::getNameReadOnly(size_t *len) const +{ + if(m_layout) { + *len = m_layout; + return m_literal; + } + else { + *len = strlen(m_name); + return m_name; + } +} + +char *BSONField::getNameCopy() const +{ + if(m_layout) + return strndup(m_literal, m_layout); + else + return strdup(m_name); +} + +void BSONField::dump(FILE *fp, int depth) const +{ + fprintf(fp, "%*s", 2 * depth, ""); + + if(m_layout) + fprintf(fp, "'%.*s'(%d): ", m_layout, m_literal, m_layout); + else + fprintf(fp, "'%s': ", m_name); + + m_value.dump(fp, depth, true); +}