427 lines
13 KiB
C++
427 lines
13 KiB
C++
//---------------------------------------------------------------------------//
|
|
// 1100101 |_ mov #0, r4 __ //
|
|
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
|
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
|
// |_ base# + offset |_| /_\_\___/__/ //
|
|
//---------------------------------------------------------------------------//
|
|
// fxos/util/bson: Binary serialization (BSON format)
|
|
//
|
|
// This header implements a subset of the BSON format [1], which fxos uses to
|
|
// save project and configuration files. The few aspects of note of this
|
|
// implementation are:
|
|
//
|
|
// - Values and fields are immutable with RAII semantics. The only type of
|
|
// copy is a deep copy with .clone(); there is no copy constructor.
|
|
// - Compact storage: 16 bytes per value + 8 bytes overhead for field names
|
|
// in documents. Strings and binary sequences are stored externally, with
|
|
// short string optimizations for string values of 13 bytes of less and
|
|
// document field names of 7 bytes or less.
|
|
// - Only supports generic, JSON-like types of data; I don't care about the
|
|
// legacy or Javascript/MongoDB-specific stuff.
|
|
//
|
|
// [1] https://bsonspec.org
|
|
//---
|
|
|
|
#ifndef FXOS_UTIL_BSON_H
|
|
#define FXOS_UTIL_BSON_H
|
|
|
|
#include <fxos/util/types.h>
|
|
#include <endian.h>
|
|
#include <cassert>
|
|
#include <cstdio>
|
|
#include <string>
|
|
|
|
struct BSONField;
|
|
|
|
struct BSON
|
|
{
|
|
enum Type : u8 {
|
|
// clang-format off
|
|
Double = 0x01,
|
|
String = 0x02,
|
|
Document = 0x03,
|
|
Array = 0x04,
|
|
Binary = 0x05,
|
|
Bool = 0x08,
|
|
Datetime = 0x09,
|
|
Null = 0x0a,
|
|
I32 = 0x10,
|
|
I64 = 0x12,
|
|
// clang-format on
|
|
};
|
|
|
|
/* Default constructor builds a Null value. */
|
|
BSON(): m_type {Type::Null}, m_value {._i64 = 0}
|
|
{
|
|
}
|
|
|
|
/* Copy is suppressed because deep copies are expensive and rarely needed.
|
|
Use the explicit .clone() method for a deep copy. */
|
|
BSON(BSON const &) = delete;
|
|
BSON &operator=(BSON const &) = delete;
|
|
|
|
/* Move constructor and move assignment will move the resources and make
|
|
the old value a Null value. */
|
|
BSON(BSON &&);
|
|
BSON &operator=(BSON &&);
|
|
|
|
~BSON();
|
|
|
|
/* Deep clone a value. */
|
|
BSON clone() const;
|
|
|
|
/* Dump value recursively to stream (for debugging purposes). */
|
|
void dump(FILE *fp, int depth = 0, bool noindent = false) const;
|
|
|
|
/* Serialize to a file, which must be seekable and open in write mode. If
|
|
`name` is not NULL, specifies the name of the field where this value is.
|
|
`len` is the length of name, or -1 for NUL-terminated. Only documents
|
|
can be serialized without a field name. */
|
|
void serialize(FILE *fp, char const *name = NULL, int len = -1) const;
|
|
|
|
/* Parse a complete document from a file. Sets `error` (and returns a Null
|
|
value) if a parse error occurs. If `log` is set, also logs out a
|
|
recursive context for the error, which helps with analysis. */
|
|
static BSON parseDocumentFromFile(FILE *fp, bool *error, bool log);
|
|
|
|
/* Load a file. The file should be a document. If a `expectedType` is
|
|
provided, the document should have a field "*" with that value. Return
|
|
a Null value in case of error. */
|
|
static BSON loadDocumentFromFile(std::string path, bool log, bool mustExist,
|
|
char const *expectedType = nullptr);
|
|
|
|
/* Constructors */
|
|
|
|
static BSON mkDouble(double d)
|
|
{
|
|
BSON v;
|
|
v.m_type = Type::Double;
|
|
v.m_value._double = d;
|
|
return v;
|
|
}
|
|
static BSON mkBool(bool b)
|
|
{
|
|
BSON v;
|
|
v.m_type = Type::Bool;
|
|
v.m_subtype = b;
|
|
v.m_value._i64 = 0;
|
|
return v;
|
|
}
|
|
static BSON mkDatetime(i64 datetime)
|
|
{
|
|
BSON v;
|
|
v.m_type = Type::Datetime;
|
|
v.m_value._i64 = datetime;
|
|
return v;
|
|
}
|
|
static BSON mkNull()
|
|
{
|
|
BSON v;
|
|
v.m_type = Type::Null;
|
|
v.m_value._i64 = 0;
|
|
return v;
|
|
}
|
|
static BSON mkI32(i32 integer)
|
|
{
|
|
BSON v;
|
|
v.m_type = Type::I32;
|
|
v.m_value._i32 = integer;
|
|
return v;
|
|
}
|
|
static BSON mkI64(i64 integer)
|
|
{
|
|
BSON v;
|
|
v.m_type = Type::I64;
|
|
v.m_value._i64 = integer;
|
|
return v;
|
|
}
|
|
|
|
/* Construct a document from a literal list of pairs { name, value }. */
|
|
static BSON mkDocument(
|
|
std::initializer_list<std::pair<char const *, BSON &&>> pairs);
|
|
/* Construct a document by moving every field from the array without taking
|
|
ownership of the array. */
|
|
static BSON mkDocumentFromFields(BSONField *fields, size_t count);
|
|
/* Construct a document by taking ownership of a field array, which must be
|
|
in the heap. */
|
|
static BSON mkDocumentFromFieldArray(BSONField *fields, size_t count);
|
|
|
|
/* Construct an array with count null values (which can be assigned). */
|
|
static BSON mkArray(size_t count);
|
|
/* Construct an array by moving every value from the array without taking
|
|
ownership of the array. */
|
|
static BSON mkArrayFromValues(BSON *values, size_t count);
|
|
/* Construct an array by taking ownership of a value array, which must be
|
|
in the heap. */
|
|
static BSON mkArrayFromValueArray(BSON *values, size_t count);
|
|
|
|
/* Construct a binary object by copying the data region. */
|
|
static BSON mkBinaryCopy(int subtype, u8 const *data, size_t size);
|
|
/* Construct a binary object by taking ownership of the provided data
|
|
region, which must be in the heap. */
|
|
static BSON mkBinaryMove(int subtype, u8 *data, size_t size);
|
|
|
|
/* Construct a string by copying the input. */
|
|
static BSON mkString(std::string const &str);
|
|
/* Construct a string by copying the input. */
|
|
static BSON mkStringCopy(char const *str, int len = -1);
|
|
/* Construct a string by taking ownership of the provided buffer, which
|
|
must be NUL-terminated and in the heap. */
|
|
static BSON mkStringMove(char *str);
|
|
|
|
/* Type checks */
|
|
|
|
bool isDouble() const
|
|
{
|
|
return m_type == Type::Double;
|
|
}
|
|
bool isString() const
|
|
{
|
|
return m_type == Type::String;
|
|
}
|
|
bool isDocument() const
|
|
{
|
|
return m_type == Type::Document;
|
|
};
|
|
bool isArray() const
|
|
{
|
|
return m_type == Type::Array;
|
|
}
|
|
bool isBinary() const
|
|
{
|
|
return m_type == Type::Binary;
|
|
}
|
|
bool isDatetime() const
|
|
{
|
|
return m_type == Type::Datetime;
|
|
}
|
|
bool isNull() const
|
|
{
|
|
return m_type == Type::Null;
|
|
}
|
|
bool isI32() const
|
|
{
|
|
return m_type == Type::I32;
|
|
}
|
|
bool isI64() const
|
|
{
|
|
return m_type == Type::I64;
|
|
}
|
|
|
|
/* Accessors */
|
|
|
|
double getDouble() const
|
|
{
|
|
assert(isDouble() && "wrong BSON accessor: getDouble");
|
|
return m_value._double;
|
|
}
|
|
BSONField *getDocumentFields()
|
|
{
|
|
assert(isDocument() && "wrong BSON accessor: getDocumentFields");
|
|
return m_value.fields;
|
|
}
|
|
BSONField const *getDocumentFields() const
|
|
{
|
|
assert(isDocument() && "wrong BSON accessor: getDocumentFields");
|
|
return m_value.fields;
|
|
}
|
|
BSON *getArrayElements()
|
|
{
|
|
assert(isArray() && "wrong BSON accessor: getArrayElements");
|
|
return m_value.values;
|
|
}
|
|
BSON const *getArrayElements() const
|
|
{
|
|
assert(isArray() && "wrong BSON accessor: getArrayElements");
|
|
return m_value.values;
|
|
}
|
|
u8 const *getBinary(size_t *size, int *subtype) const
|
|
{
|
|
assert(isBinary() && "wrong BSON accessor: getBinary");
|
|
if(size)
|
|
*size = m_size;
|
|
if(subtype)
|
|
*subtype = m_subtype;
|
|
return m_value.binary;
|
|
}
|
|
i64 getDatetime() const
|
|
{
|
|
assert(isDatetime() && "wrong BSON accessor: getDatetime");
|
|
return m_value._i64;
|
|
}
|
|
i32 getI32() const
|
|
{
|
|
assert(isI32() && "wrong BSON accessor: getI32");
|
|
return m_value._i32;
|
|
}
|
|
i64 getI64() const
|
|
{
|
|
assert(isI64() && "wrong BSON accessor: getI64");
|
|
return m_value._i64;
|
|
}
|
|
|
|
/* Get pointer to NUL-terminated string, read-only */
|
|
char const *getStringReadOnly() const;
|
|
/* Get a copy of the NUL-terminated string, malloc() allocated */
|
|
char *getStringCopy() const;
|
|
/* Get a copy of the NUL-terminated string */
|
|
std::string getString() const
|
|
{
|
|
return std::string(getStringReadOnly());
|
|
}
|
|
|
|
/* Document/array size */
|
|
uint size() const
|
|
{
|
|
assert((isDocument() || isArray() || isBinary())
|
|
&& "BSON::size: bad type");
|
|
return m_size;
|
|
}
|
|
|
|
/* Get binary subtype */
|
|
int binarySubtype() const
|
|
{
|
|
assert(isBinary() && "BSON::binarySubtype: not a Binary");
|
|
return m_subtype;
|
|
}
|
|
/* Move binary data out of a value */
|
|
u8 *moveBinary(size_t *size, int *subtype)
|
|
{
|
|
getBinary(size, subtype);
|
|
u8 *data = m_value.binary;
|
|
m_type = Type::Null;
|
|
m_value._i64 = 0;
|
|
return data;
|
|
}
|
|
|
|
/* Get n-th element of array; must be in-bounds (or assertion failure) */
|
|
BSON &operator[](int i);
|
|
BSON const &operator[](int i) const;
|
|
|
|
/* Check whether a document has a particular field */
|
|
bool hasField(char const *str) const;
|
|
bool hasField(std::string str) const
|
|
{
|
|
return hasField(str.c_str());
|
|
}
|
|
|
|
/* Access document element by name; must exist (or assertion failure) */
|
|
BSON &operator[](char const *str);
|
|
BSON const &operator[](char const *str) const;
|
|
BSON &operator[](std::string str)
|
|
{
|
|
return (*this)[str.c_str()];
|
|
}
|
|
BSON const &operator[](std::string str) const
|
|
{
|
|
return (*this)[str.c_str()];
|
|
}
|
|
|
|
public:
|
|
/* The members are public so that constructor functions can do their job.
|
|
Don't access this directly. The definition is also a bit misleading
|
|
because some types (strings) store stuff across multiple fields. */
|
|
|
|
Type m_type;
|
|
/* Subtype or memory layout for the object. */
|
|
u8 m_subtype = 0;
|
|
u16 m_zero = 0;
|
|
/* Extra metadata, generally a size. */
|
|
u32 m_size = 0;
|
|
|
|
union
|
|
{
|
|
double _double; /* Double */
|
|
char *str; /* String (unless short string optimization) */
|
|
BSONField *fields; /* Document */
|
|
BSON *values; /* Array */
|
|
u8 *binary; /* Binary */
|
|
i32 _i32; /* I32 */
|
|
i64 _i64; /* Datetime, I64 */
|
|
} m_value;
|
|
|
|
private:
|
|
static bool parseElement(FILE *, BSON *, BSONField *, bool);
|
|
static bool parseDocument(FILE *, BSON &, std::string const &, bool);
|
|
static bool parseArray(FILE *, BSON &, std::string const &, bool);
|
|
};
|
|
|
|
/* A key/value pair used in documents. */
|
|
struct BSONField
|
|
{
|
|
/* Make a field out of a name and a value to be moved into the field. If
|
|
`len` is specified then it's the length of the name, otherwise the name
|
|
is assumed to be NUL-terminated. */
|
|
BSONField(char const *name, BSON &&value, int len = -1);
|
|
|
|
/* Same with an std::string. */
|
|
BSONField(std::string name, BSON &&value):
|
|
BSONField(name.c_str(), std::move(value))
|
|
{
|
|
}
|
|
|
|
/* Implicit copy is disabled, use .clone(). */
|
|
BSONField(BSONField const &) = delete;
|
|
BSONField &operator=(BSONField const &) = delete;
|
|
|
|
/* Moving will move both the name and value, leaving a field with a
|
|
placholder name "@" and a Null value. */
|
|
BSONField(BSONField &&);
|
|
BSONField &operator=(BSONField &&);
|
|
|
|
~BSONField();
|
|
|
|
/* Deep clone a field and its value */
|
|
BSONField clone() const;
|
|
|
|
/* Compare against the name (which might be stored unconventionally). */
|
|
bool compareName(char const *str) const;
|
|
|
|
/* Get read-only access to the name. It is not guaranteed to be NUL-
|
|
terminated due to storage optimizations. */
|
|
char const *getNameReadOnly(size_t *len) const;
|
|
|
|
/* Get a NUL-terminated heap copy of the name. */
|
|
char *getNameCopy() const;
|
|
|
|
/* Get a copy of the name. */
|
|
std::string getName() const;
|
|
|
|
/* Dump field recursively to stream (for debugging purposes). */
|
|
void dump(FILE *fp, int depth = 0) const;
|
|
|
|
/* Get value */
|
|
BSON &value()
|
|
{
|
|
return m_value;
|
|
}
|
|
BSON const &value() const
|
|
{
|
|
return m_value;
|
|
}
|
|
|
|
private:
|
|
/* Another short string optimization; if the full name fits on 7 bytes
|
|
(without a NUL terminator), store it directly in the field; otherwise
|
|
use an 8-byte pointer.
|
|
|
|
This optimization relies on pointer not using their top byte, which is
|
|
the case in practice and asserted at runtime for future-proofness. */
|
|
union
|
|
{
|
|
// clang-format off
|
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
struct { char m_literal[sizeof(char *) - 1]; u8 m_layout; };
|
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
struct { u8 m_layout; char m_literal[sizeof(char *) - 1]; };
|
|
#else
|
|
#error "bson.h: unknown endianness for compact field storage?!"
|
|
#endif
|
|
// clang-format on
|
|
char *m_name;
|
|
};
|
|
BSON m_value;
|
|
};
|
|
|
|
#endif /* FXOS_UTIL_BSON_H */
|