fxos: add a small BSON serialization library
(I tested it obviously)
This commit is contained in:
parent
a4cda4cb66
commit
56a4800bbd
|
@ -4,3 +4,6 @@
|
|||
|
||||
# Semantic exclude
|
||||
/exclude
|
||||
|
||||
# IDE files
|
||||
/*.sublime-*
|
||||
|
|
|
@ -49,6 +49,7 @@ set(fxos_core_SOURCES
|
|||
lib/vspace.cpp
|
||||
|
||||
lib/ai/RelConst.cpp
|
||||
lib/util/bson.cpp
|
||||
lib/util/Buffer.cpp
|
||||
lib/util/log.cpp
|
||||
lib/util/Timer.cpp)
|
||||
|
|
|
@ -0,0 +1,400 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// 1100101 |_ mov #0, r4 __ //
|
||||
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
||||
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
||||
// |_ base# + offset |_| /_\_\___/__/ //
|
||||
//---------------------------------------------------------------------------//
|
||||
// fxos/util/bson: Binary serialization (BSON format)
|
||||
//
|
||||
// This header implementats a subset of the BSON format [1], which fxos uses
|
||||
// to save project and configuration files. The few aspects of note of this
|
||||
// implementation are:
|
||||
//
|
||||
// - Values and fields are immutable with RAII semantics. The only type of
|
||||
// copy is a deep copy with .clone(); there is no copy constructor.
|
||||
// - Compact storage: 16 bytes per value + 8 bytes overhead for field names
|
||||
// in documents. Strings and binary sequences are stored externally, with
|
||||
// short string optimizations for string values of 13 bytes of less and
|
||||
// document field names of 7 bytes or less.
|
||||
// - Only supports generic, JSON-like types of data; I don't care about the
|
||||
// legacy or Javascript/MongoDB-specific stuff.
|
||||
//
|
||||
// [1] https://bsonspec.org
|
||||
//---
|
||||
|
||||
#ifndef FXOS_UTIL_BSON_H
|
||||
#define FXOS_UTIL_BSON_H
|
||||
|
||||
#include <fxos/util/types.h>
|
||||
#include <endian.h>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct BSONField;
|
||||
|
||||
struct BSON
|
||||
{
|
||||
enum Type : u8 {
|
||||
// clang-format off
|
||||
Double = 0x01,
|
||||
String = 0x02,
|
||||
Document = 0x03,
|
||||
Array = 0x04,
|
||||
Binary = 0x05,
|
||||
Bool = 0x08,
|
||||
Datetime = 0x09,
|
||||
Null = 0x0a,
|
||||
I32 = 0x10,
|
||||
I64 = 0x12,
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
/* Default constructor builds a Null value. */
|
||||
BSON(): m_type {Type::Null}, m_value {._i64 = 0}
|
||||
{
|
||||
}
|
||||
|
||||
/* Copy is suppressed because deep copies are expensive and rarely needed.
|
||||
Use the explicit .clone() method for a deep copy. */
|
||||
BSON(BSON const &) = delete;
|
||||
BSON &operator=(BSON const &) = delete;
|
||||
|
||||
/* Move constructor and move assignment will move the resources and make
|
||||
the old value a Null value. */
|
||||
BSON(BSON &&);
|
||||
BSON &operator=(BSON &&);
|
||||
|
||||
~BSON();
|
||||
|
||||
/* Deep clone a value. */
|
||||
BSON clone() const;
|
||||
|
||||
/* Dump value recursively to stream (for debugging purposes). */
|
||||
void dump(FILE *fp, int depth = 0, bool noindent = false) const;
|
||||
|
||||
/* Serialize to a file, which must be seekable and open in write mode. If
|
||||
`name` is not NULL, specifies the name of the field where this value is.
|
||||
`len` is the length of name, or -1 for NUL-terminated. Only documents
|
||||
can be serialized without a field name. */
|
||||
void serialize(FILE *fp, char const *name = NULL, int len = -1) const;
|
||||
|
||||
/* Parse a complete document from a file. Sets `error` (and returns a Null
|
||||
value) if a parse error occurs. If `log` is set, also logs out a
|
||||
recursive context for the error, which helps with analysis. */
|
||||
static BSON parseDocumentFromFile(FILE *fp, bool *error, bool log);
|
||||
|
||||
/* Constructors */
|
||||
|
||||
static BSON mkDouble(double d)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Double;
|
||||
v.m_value._double = d;
|
||||
return v;
|
||||
}
|
||||
static BSON mkBool(bool b)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Bool;
|
||||
v.m_subtype = b;
|
||||
v.m_value._i64 = 0;
|
||||
return v;
|
||||
}
|
||||
static BSON mkDatetime(i64 datetime)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Datetime;
|
||||
v.m_value._i64 = datetime;
|
||||
return v;
|
||||
}
|
||||
static BSON mkNull()
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Null;
|
||||
v.m_value._i64 = 0;
|
||||
return v;
|
||||
}
|
||||
static BSON mkI32(i32 integer)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::I32;
|
||||
v.m_value._i32 = integer;
|
||||
return v;
|
||||
}
|
||||
static BSON mkI64(i64 integer)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::I64;
|
||||
v.m_value._i64 = integer;
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Construct a document from a literal list of pairs { name, value }. */
|
||||
static BSON mkDocument(
|
||||
std::initializer_list<std::pair<char const *, BSON &&>> pairs);
|
||||
/* Construct a document by moving every field from the array without taking
|
||||
ownership of the array. */
|
||||
static BSON mkDocumentFromFields(BSONField *fields, size_t count);
|
||||
/* Construct a document by taking ownership of a field array, which must be
|
||||
in the heap. */
|
||||
static BSON mkDocumentFromFieldArray(BSONField *fields, size_t count);
|
||||
|
||||
/* Construct an array by moving every value from the array without taking
|
||||
ownership of the array. */
|
||||
static BSON mkArrayFromValues(BSON *values, size_t count);
|
||||
/* Construct an array by taking ownership of a value array, which must be
|
||||
in the heap. */
|
||||
static BSON mkArrayFromValueArray(BSON *values, size_t count);
|
||||
|
||||
/* Construct a binary object by copying the data region. */
|
||||
static BSON mkBinaryCopy(int subtype, u8 const *data, size_t size);
|
||||
/* Construct a binary object by taking ownership of the provided data
|
||||
region, which must be in the heap. */
|
||||
static BSON mkBinaryMove(int subtype, u8 *data, size_t size);
|
||||
|
||||
/* Construct a string by copying the input. */
|
||||
static BSON mkStringCopy(char const *str, int len = -1);
|
||||
static BSON mkStringCopy(std::string const &str);
|
||||
/* Construct a string by taking ownership of the provided buffer, which
|
||||
must be NUL-terminated and in the heap. */
|
||||
static BSON mkStringMove(char *str);
|
||||
|
||||
/* Type checks */
|
||||
|
||||
bool isDouble() const
|
||||
{
|
||||
return m_type == Type::Double;
|
||||
}
|
||||
bool isString() const
|
||||
{
|
||||
return m_type == Type::String;
|
||||
}
|
||||
bool isDocument() const
|
||||
{
|
||||
return m_type == Type::Document;
|
||||
};
|
||||
bool isArray() const
|
||||
{
|
||||
return m_type == Type::Array;
|
||||
}
|
||||
bool isBinary() const
|
||||
{
|
||||
return m_type == Type::Binary;
|
||||
}
|
||||
bool isDatetime() const
|
||||
{
|
||||
return m_type == Type::Datetime;
|
||||
}
|
||||
bool isNull() const
|
||||
{
|
||||
return m_type == Type::Null;
|
||||
}
|
||||
bool isI32() const
|
||||
{
|
||||
return m_type == Type::I32;
|
||||
}
|
||||
bool isI64() const
|
||||
{
|
||||
return m_type == Type::I64;
|
||||
}
|
||||
|
||||
/* Accessors */
|
||||
|
||||
double getDouble() const
|
||||
{
|
||||
assert(isDouble() && "wrong BSON accessor: getDouble");
|
||||
return m_value._double;
|
||||
}
|
||||
BSONField *getDocumentFields()
|
||||
{
|
||||
assert(isDocument() && "wrong BSON accessor: getDocumentFields");
|
||||
return m_value.fields;
|
||||
}
|
||||
BSONField const *getDocumentFields() const
|
||||
{
|
||||
assert(isDocument() && "wrong BSON accessor: getDocumentFields");
|
||||
return m_value.fields;
|
||||
}
|
||||
BSON *getArrayElements()
|
||||
{
|
||||
assert(isArray() && "wrong BSON accessor: getArrayElements");
|
||||
return m_value.values;
|
||||
}
|
||||
BSON const *getArrayElements() const
|
||||
{
|
||||
assert(isArray() && "wrong BSON accessor: getArrayElements");
|
||||
return m_value.values;
|
||||
}
|
||||
u8 *getBinary(size_t *size, int *subtype) const
|
||||
{
|
||||
assert(isBinary() && "wrong BSON accessor: getBinary");
|
||||
if(size)
|
||||
*size = m_size;
|
||||
if(subtype)
|
||||
*subtype = m_subtype;
|
||||
return m_value.binary;
|
||||
}
|
||||
i64 getDatetime() const
|
||||
{
|
||||
assert(isDatetime() && "wrong BSON accessor: getDatetime");
|
||||
return m_value._i64;
|
||||
}
|
||||
i32 getI32() const
|
||||
{
|
||||
assert(isI32() && "wrong BSON accessor: getI32");
|
||||
return m_value._i32;
|
||||
}
|
||||
i64 getI64() const
|
||||
{
|
||||
assert(isI64() && "wrong BSON accessor: getI64");
|
||||
return m_value._i64;
|
||||
}
|
||||
|
||||
/* Get pointer to NUL-terminated string, read-only */
|
||||
char const *getStringReadOnly() const;
|
||||
/* Get a copy of the NUL-terminated string, malloc() allocated */
|
||||
char *getStringCopy() const;
|
||||
|
||||
/* Document/array size */
|
||||
uint size() const
|
||||
{
|
||||
assert((isDocument() || isArray() || isBinary())
|
||||
&& "BSON::size: bad type");
|
||||
return m_size;
|
||||
}
|
||||
|
||||
/* Get binary subtype */
|
||||
int binarySubtype() const
|
||||
{
|
||||
assert(isBinary() && "BSON::binarySubtype: not a Binary");
|
||||
return m_subtype;
|
||||
}
|
||||
|
||||
/* Get n-th element of array; must be in-bounds (or assertion failure) */
|
||||
BSON &operator[](int i);
|
||||
BSON const &operator[](int i) const;
|
||||
|
||||
/* Check whether a document has a particular field */
|
||||
bool hasField(char const *str) const;
|
||||
bool hasField(std::string str) const
|
||||
{
|
||||
return hasField(str.c_str());
|
||||
}
|
||||
|
||||
/* Access document element by name; must exist (or assertion failure) */
|
||||
BSON &operator[](char const *str);
|
||||
BSON const &operator[](char const *str) const;
|
||||
BSON &operator[](std::string str)
|
||||
{
|
||||
return (*this)[str.c_str()];
|
||||
}
|
||||
BSON const &operator[](std::string str) const
|
||||
{
|
||||
return (*this)[str.c_str()];
|
||||
}
|
||||
|
||||
public:
|
||||
/* The members are public so that constructor functions can do their job.
|
||||
Don't access this directly. The definition is also a bit misleading
|
||||
because some types (strings) store stuff across multiple fields. */
|
||||
|
||||
Type m_type;
|
||||
/* Subtype or memory layout for the object. */
|
||||
u8 m_subtype = 0;
|
||||
u16 m_zero = 0;
|
||||
/* Extra metadata, generally a size. */
|
||||
u32 m_size = 0;
|
||||
|
||||
union
|
||||
{
|
||||
double _double; /* Double */
|
||||
char *str; /* String (unless short string optimization) */
|
||||
BSONField *fields; /* Document */
|
||||
BSON *values; /* Array */
|
||||
u8 *binary; /* Binary */
|
||||
i32 _i32; /* I32 */
|
||||
i64 _i64; /* Datetime, I64 */
|
||||
} m_value;
|
||||
|
||||
private:
|
||||
static bool parseElement(FILE *, BSON *, BSONField *, bool);
|
||||
static bool parseDocument(FILE *, BSON &, std::string const &, bool);
|
||||
static bool parseArray(FILE *, BSON &, std::string const &, bool);
|
||||
};
|
||||
|
||||
/* A key/value pair used in documents. */
|
||||
struct BSONField
|
||||
{
|
||||
/* Make a field out of a name and a value to be moved into the field. If
|
||||
`len` is specified then it's the length of the name, otherwise the name
|
||||
is assumed to be NUL-terminated. */
|
||||
BSONField(char const *name, BSON &&value, int len = -1);
|
||||
|
||||
/* Same with an std::string. */
|
||||
BSONField(std::string name, BSON &&value):
|
||||
BSONField(name.c_str(), std::move(value))
|
||||
{
|
||||
}
|
||||
|
||||
/* Implicit copy is disabled, use .clone(). */
|
||||
BSONField(BSONField const &) = delete;
|
||||
BSONField &operator=(BSONField const &) = delete;
|
||||
|
||||
/* Moving will move both the name and value, leaving a field with a
|
||||
placholder name "@" and a Null value. */
|
||||
BSONField(BSONField &&);
|
||||
BSONField &operator=(BSONField &&);
|
||||
|
||||
~BSONField();
|
||||
|
||||
/* Deep clone a field and its value */
|
||||
BSONField clone() const;
|
||||
|
||||
/* Compare against the name (which might be stored unconventionally). */
|
||||
bool compareName(char const *str) const;
|
||||
|
||||
/* Get read-only access to the name. It is not guaranteed to be NUL-
|
||||
terminated due to storage optimizations. */
|
||||
char const *getNameReadOnly(size_t *len) const;
|
||||
|
||||
/* Get a NUL-terminated heap copy of the name. */
|
||||
char *getNameCopy() const;
|
||||
|
||||
/* Dump field recursively to stream (for debugging purposes). */
|
||||
void dump(FILE *fp, int depth = 0) const;
|
||||
|
||||
/* Get value */
|
||||
BSON &value()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
BSON const &value() const
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
private:
|
||||
/* Another short string optimization; if the full name fits on 7 bytes
|
||||
(without a NUL terminator), store it directly in the field; otherwise
|
||||
use an 8-byte pointer.
|
||||
|
||||
This optimization relies on pointer not using their top byte, which is
|
||||
the case in practice and asserted at runtime for future-proofness. */
|
||||
union
|
||||
{
|
||||
// clang-format off
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
struct { char m_literal[sizeof(char *) - 1]; u8 m_layout; };
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
struct { u8 m_layout; char m_literal[sizeof(char *) - 1]; };
|
||||
#else
|
||||
#error "bson.h: unknown endianness for compact field storage?!"
|
||||
#endif
|
||||
// clang-format on
|
||||
char *m_name;
|
||||
};
|
||||
BSON m_value;
|
||||
};
|
||||
|
||||
#endif /* FXOS_UTIL_BSON_H */
|
|
@ -0,0 +1,732 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
// 1100101 |_ mov #0, r4 __ //
|
||||
// 11 |_ <0xb380 %5c4> / _|_ _____ ___ //
|
||||
// 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< //
|
||||
// |_ base# + offset |_| /_\_\___/__/ //
|
||||
//---------------------------------------------------------------------------//
|
||||
// Reference: https://bsonspec.org/spec.html
|
||||
//
|
||||
// For the subset at hand:
|
||||
// document ::=
|
||||
// | int32 element* "\x00" int32 is the total number of bytes
|
||||
// element ::=
|
||||
// | "\x01" e_name double 64-bit binary floating point
|
||||
// | "\x02" e_name string UTF-8 string
|
||||
// | "\x03" e_name document Embedded document
|
||||
// | "\x04" e_name document Array
|
||||
// | "\x05" e_name binary Binary data
|
||||
// | "\x08" e_name "\x00" Boolean "false"
|
||||
// | "\x08" e_name "\x01" Boolean "true"
|
||||
// | "\x09" e_name int64 UTC datetime
|
||||
// | "\x0A" e_name Null value
|
||||
// | "\x10" e_name int32 32-bit integer
|
||||
// | "\x12" e_name int64 64-bit integer
|
||||
// string ::=
|
||||
// | int32 (byte*) "\x00" int32 is the number of bytes, NUL included
|
||||
// binary ::=
|
||||
// | int32 subtype (byte*) int32 is the number of bytes
|
||||
//---
|
||||
|
||||
#include <fxos/util/bson.h>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
|
||||
/* Number of bytes available in a value after the type/subtype attributes */
|
||||
#define SSO_MAXLEN (sizeof(BSON) - 2)
|
||||
|
||||
BSON::BSON(BSON &&other)
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
BSON &BSON::operator=(BSON &&other)
|
||||
{
|
||||
m_type = other.m_type;
|
||||
m_subtype = other.m_subtype;
|
||||
m_zero = other.m_zero;
|
||||
m_size = other.m_size;
|
||||
m_value = other.m_value;
|
||||
|
||||
other.m_type = Type::Null;
|
||||
other.m_subtype = 0;
|
||||
other.m_zero = 0;
|
||||
other.m_size = 0;
|
||||
other.m_value._i64 = 0;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
BSON::~BSON()
|
||||
{
|
||||
if(m_type == Type::String && !m_subtype)
|
||||
free(m_value.str);
|
||||
else if(m_type == Type::Document) {
|
||||
for(uint i = 0; i < m_size; i++)
|
||||
m_value.fields[i].~BSONField();
|
||||
free(m_value.fields);
|
||||
}
|
||||
else if(m_type == Type::Array) {
|
||||
for(uint i = 0; i < m_size; i++)
|
||||
m_value.values[i].~BSON();
|
||||
free(m_value.values);
|
||||
}
|
||||
else if(m_type == Type::Binary)
|
||||
free(m_value.binary);
|
||||
}
|
||||
|
||||
BSON BSON::clone() const
|
||||
{
|
||||
/* All subtypes with no referenced subvalues */
|
||||
switch(m_type) {
|
||||
case Type::Double:
|
||||
case Type::Bool:
|
||||
case Type::Datetime:
|
||||
case Type::Null:
|
||||
case Type::I32:
|
||||
case Type::I64: {
|
||||
BSON v;
|
||||
v.m_type = m_type;
|
||||
v.m_subtype = m_subtype;
|
||||
v.m_zero = m_zero;
|
||||
v.m_size = m_size;
|
||||
v.m_value = m_value;
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Strings need to be copied only if the SSO is not used */
|
||||
case Type::String: {
|
||||
BSON v;
|
||||
v.m_type = m_type;
|
||||
v.m_subtype = m_subtype;
|
||||
v.m_zero = m_zero;
|
||||
v.m_size = m_size;
|
||||
v.m_value = m_value;
|
||||
return v;
|
||||
if(!m_subtype) {
|
||||
v.m_value.str = strdup(v.m_value.str);
|
||||
if(!v.m_value.str)
|
||||
throw std::bad_alloc {};
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Arrays and objects need to have their entries cloned */
|
||||
case Type::Document: {
|
||||
BSONField *fields
|
||||
= static_cast<BSONField *>(malloc(m_size * sizeof *fields));
|
||||
if(!fields)
|
||||
throw std::bad_alloc {};
|
||||
for(uint i = 0; i < m_size; i++)
|
||||
fields[i] = m_value.fields[i].clone();
|
||||
return mkDocumentFromFieldArray(fields, m_size);
|
||||
}
|
||||
case Type::Array: {
|
||||
BSON *values = static_cast<BSON *>(malloc(m_size * sizeof *values));
|
||||
if(!values)
|
||||
throw std::bad_alloc {};
|
||||
for(uint i = 0; i < m_size; i++)
|
||||
values[i] = m_value.values[i].clone();
|
||||
return mkArrayFromValueArray(values, m_size);
|
||||
}
|
||||
|
||||
case Type::Binary:
|
||||
return mkBinaryCopy(m_subtype, m_value.binary, m_size);
|
||||
}
|
||||
|
||||
assert(false && "BSON::clone: unsupported type");
|
||||
}
|
||||
|
||||
void BSON::dump(FILE *fp, int depth, bool noindent) const
|
||||
{
|
||||
if(!noindent)
|
||||
fprintf(fp, "%*s", 2 * depth, "");
|
||||
|
||||
switch(m_type) {
|
||||
case Type::String:
|
||||
if(m_subtype)
|
||||
fprintf(fp, "string(%d) \"%s\"\n", m_subtype - 1, (char *)this + 2);
|
||||
else
|
||||
fprintf(fp, "string \"%s\"\n", m_value.str);
|
||||
break;
|
||||
case Type::Document:
|
||||
fprintf(fp, "document\n");
|
||||
for(uint i = 0; i < m_size; i++)
|
||||
m_value.fields[i].dump(fp, depth + 1);
|
||||
break;
|
||||
case Type::Array:
|
||||
fprintf(fp, "array\n");
|
||||
for(uint i = 0; i < m_size; i++)
|
||||
m_value.values[i].dump(fp, depth + 1);
|
||||
break;
|
||||
case Type::Double:
|
||||
fprintf(fp, "double %f\n", m_value._double);
|
||||
break;
|
||||
case Type::Bool:
|
||||
fprintf(fp, m_subtype ? "true\n" : "false\n");
|
||||
break;
|
||||
case Type::Datetime:
|
||||
fprintf(fp, "datetime %ld\n", m_value._i64);
|
||||
break;
|
||||
case Type::Null:
|
||||
fprintf(fp, "null\n");
|
||||
break;
|
||||
case Type::I32:
|
||||
fprintf(fp, "i32 %d\n", m_value._i32);
|
||||
break;
|
||||
case Type::I64:
|
||||
fprintf(fp, "i64 %ld\n", m_value._i64);
|
||||
break;
|
||||
default:
|
||||
fprintf(fp, "UNKNOWN(%d/%d)\n", m_type, m_subtype);
|
||||
}
|
||||
}
|
||||
|
||||
void BSON::serialize(FILE *fp, char const *name, int len) const
|
||||
{
|
||||
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
&& "BSON::serialize currently assumes little-endian");
|
||||
|
||||
assert((name || isDocument()) && "BSON::serialize: missing name");
|
||||
if(name) {
|
||||
fputc((int)m_type, fp);
|
||||
if(len >= 0)
|
||||
fwrite(name, len, 1, fp);
|
||||
else
|
||||
fputs(name, fp);
|
||||
fputc('\x00', fp);
|
||||
}
|
||||
|
||||
switch(m_type) {
|
||||
case Type::Double:
|
||||
fwrite(&m_value._double, 8, 1, fp);
|
||||
return;
|
||||
|
||||
case Type::String: {
|
||||
char const *str = getStringReadOnly();
|
||||
int size = strlen(str) + 1;
|
||||
fwrite(&size, 4, 1, fp);
|
||||
fputs(str, fp);
|
||||
fputc('\x00', fp);
|
||||
return;
|
||||
}
|
||||
|
||||
case Type::Document:
|
||||
case Type::Array: {
|
||||
long start_o = ftell(fp);
|
||||
fputs("####", fp);
|
||||
|
||||
if(m_type == Type::Document) {
|
||||
for(uint i = 0; i < m_size; i++) {
|
||||
BSONField const &f = m_value.fields[i];
|
||||
size_t flen;
|
||||
char const *name = f.getNameReadOnly(&flen);
|
||||
f.value().serialize(fp, name, flen);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(uint i = 0; i < m_size; i++) {
|
||||
char str[16];
|
||||
sprintf(str, "%u", i);
|
||||
m_value.values[i].serialize(fp, str, -1);
|
||||
}
|
||||
}
|
||||
|
||||
fputc('\x00', fp);
|
||||
long end_o = ftell(fp);
|
||||
fseek(fp, start_o, SEEK_SET);
|
||||
i32 size = end_o - start_o;
|
||||
fwrite(&size, 4, 1, fp);
|
||||
fseek(fp, end_o, SEEK_SET);
|
||||
return;
|
||||
}
|
||||
|
||||
case Type::Binary:
|
||||
fwrite(&m_size, 4, 1, fp);
|
||||
fputc(m_subtype, fp);
|
||||
fwrite(m_value.binary, m_size, 1, fp);
|
||||
fputc('\x00', fp);
|
||||
return;
|
||||
|
||||
case Type::Bool:
|
||||
fputc(m_subtype != 0, fp);
|
||||
return;
|
||||
|
||||
case Type::Datetime:
|
||||
case Type::I64:
|
||||
fwrite(&m_value._i64, 8, 1, fp);
|
||||
return;
|
||||
|
||||
case Type::Null:
|
||||
return;
|
||||
|
||||
case Type::I32:
|
||||
fwrite(&m_value._i32, 4, 1, fp);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(false && "BSON::serialize: unsupported object type");
|
||||
}
|
||||
|
||||
#define LOG(FMT, ...) \
|
||||
({ \
|
||||
if(log) \
|
||||
fprintf(stderr, "[bson::parse] " FMT "\n", ##__VA_ARGS__); \
|
||||
false; \
|
||||
})
|
||||
|
||||
/* Parse an element. If f is non-NULL, record the name and fill *f. Otherwise,
|
||||
b should be non-NULL and the value is stored in b. */
|
||||
bool BSON::parseElement(FILE *fp, BSON *b, BSONField *f, bool log)
|
||||
{
|
||||
int type = fgetc(fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF where field was expected");
|
||||
|
||||
std::string name;
|
||||
int c;
|
||||
while((c = fgetc(fp))) {
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within field name");
|
||||
name.push_back(c);
|
||||
}
|
||||
|
||||
if(f)
|
||||
new(f) BSONField(name, mkNull());
|
||||
|
||||
BSON &v = f ? f->value() : *b;
|
||||
v = mkNull();
|
||||
v.m_type = static_cast<BSON::Type>(type);
|
||||
i32 len = 0;
|
||||
|
||||
switch(type) {
|
||||
case Type::Double:
|
||||
fread(&v.m_value._double, 8, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within double (`%s')", name.c_str());
|
||||
return true;
|
||||
|
||||
case Type::String:
|
||||
fread(&len, 4, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within string size (`%s')", name.c_str());
|
||||
|
||||
if(len <= (int)SSO_MAXLEN) {
|
||||
v.m_subtype = len;
|
||||
if(fread((char *)&v + 2, len, 1, fp) != 1)
|
||||
return LOG("error: failed to read str (`%s')", name.c_str());
|
||||
}
|
||||
else {
|
||||
v.m_value.str = new char[len];
|
||||
if(!v.m_value.str)
|
||||
throw std::bad_alloc {};
|
||||
if(fread(v.m_value.str, len, 1, fp) != 1) {
|
||||
delete[] v.m_value.str;
|
||||
return LOG("error: failed to read str (`%s')", name.c_str());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
case Type::Document:
|
||||
return parseDocument(fp, v, name, log);
|
||||
case Type::Array:
|
||||
return parseArray(fp, v, name, log);
|
||||
|
||||
case Type::Binary:
|
||||
fread(&v.m_size, 4, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within binary size (`%s')", name.c_str());
|
||||
v.m_subtype = fgetc(fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF at binary subtype (`%s')", name.c_str());
|
||||
v.m_value.binary = new u8[v.m_size];
|
||||
if(!v.m_value.binary)
|
||||
throw std::bad_alloc();
|
||||
if(fread(v.m_value.binary, v.m_size, 1, fp) != 1) {
|
||||
delete[] v.m_value.binary;
|
||||
return LOG("error: failed to read binary (`%s')", name.c_str());
|
||||
}
|
||||
return true;
|
||||
|
||||
case Type::Bool:
|
||||
v.m_subtype = (fgetc(fp) != 0);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within boolean (`%s')", name.c_str());
|
||||
return true;
|
||||
|
||||
case Type::Datetime:
|
||||
fread(&v.m_value._i64, 8, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within datetime (`%s')", name.c_str());
|
||||
return true;
|
||||
|
||||
case Type::Null:
|
||||
return true;
|
||||
|
||||
case Type::I32:
|
||||
fread(&v.m_value._i32, 4, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within i32 (`%s')", name.c_str());
|
||||
return true;
|
||||
|
||||
case Type::I64:
|
||||
fread(&v.m_value._i64, 8, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within i64 (`%s')", name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Reset the value so that it can be destroyed without blowing up */
|
||||
v = mkNull();
|
||||
|
||||
return LOG("error: unknown value type: 0x%02x (`%s')", type, name.c_str());
|
||||
}
|
||||
|
||||
bool BSON::parseDocument(FILE *fp, BSON &v, std::string const &name, bool log)
|
||||
{
|
||||
i32 len;
|
||||
fread(&len, 4, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within document size (`%s')", name.c_str());
|
||||
|
||||
std::vector<BSONField> fields;
|
||||
while(true) {
|
||||
int t = fgetc(fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within document (`%s')", name.c_str());
|
||||
if(t == '\x00')
|
||||
break;
|
||||
ungetc(t, fp);
|
||||
|
||||
BSONField f("@", mkNull());
|
||||
if(!parseElement(fp, NULL, &f, log))
|
||||
return LOG("within document `%s'", name.c_str());
|
||||
fields.push_back(std::move(f));
|
||||
}
|
||||
|
||||
v = mkDocumentFromFields(fields.data(), fields.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BSON::parseArray(FILE *fp, BSON &v, std::string const &name, bool log)
|
||||
{
|
||||
i32 len;
|
||||
fread(&len, 4, 1, fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within array size (`%s')", name.c_str());
|
||||
|
||||
std::vector<BSON> values;
|
||||
while(true) {
|
||||
int t = fgetc(fp);
|
||||
if(feof(fp))
|
||||
return LOG("error: EOF within array (`%s')", name.c_str());
|
||||
if(t == '\x00')
|
||||
break;
|
||||
ungetc(t, fp);
|
||||
|
||||
BSON v;
|
||||
if(!parseElement(fp, &v, NULL, log))
|
||||
return LOG("within array `%s'", name.c_str());
|
||||
values.push_back(std::move(v));
|
||||
}
|
||||
|
||||
v = mkArrayFromValues(values.data(), values.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
BSON BSON::parseDocumentFromFile(FILE *fp, bool *error, bool log)
|
||||
{
|
||||
BSON v;
|
||||
bool rc = parseDocument(fp, v, "<file>", log);
|
||||
if(error)
|
||||
*error = rc;
|
||||
if(!rc)
|
||||
v = mkNull();
|
||||
return v;
|
||||
}
|
||||
|
||||
#undef LOG
|
||||
|
||||
BSON BSON::mkDocument(
|
||||
std::initializer_list<std::pair<char const *, BSON &&>> pairs)
|
||||
{
|
||||
uint count = pairs.size();
|
||||
BSONField *fields
|
||||
= static_cast<BSONField *>(malloc(count * sizeof *fields));
|
||||
if(!fields)
|
||||
throw std::bad_alloc {};
|
||||
|
||||
uint i = 0;
|
||||
for(auto const &pair: pairs) {
|
||||
new(&fields[i]) BSONField(pair.first, std::move(pair.second));
|
||||
i++;
|
||||
}
|
||||
|
||||
return mkDocumentFromFieldArray(fields, count);
|
||||
}
|
||||
|
||||
BSON BSON::mkDocumentFromFields(BSONField *fields_ro, size_t count)
|
||||
{
|
||||
BSONField *fields
|
||||
= static_cast<BSONField *>(malloc(count * sizeof *fields));
|
||||
if(!fields)
|
||||
throw std::bad_alloc {};
|
||||
for(uint i = 0; i < count; i++)
|
||||
fields[i] = std::move(fields_ro[i]);
|
||||
|
||||
return mkDocumentFromFieldArray(fields, count);
|
||||
}
|
||||
|
||||
BSON BSON::mkDocumentFromFieldArray(BSONField *fields, size_t count)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Document;
|
||||
v.m_size = count;
|
||||
v.m_value.fields = fields;
|
||||
return v;
|
||||
}
|
||||
|
||||
BSON BSON::mkArrayFromValues(BSON *values_ro, size_t count)
|
||||
{
|
||||
BSON *values = static_cast<BSON *>(malloc(count * sizeof *values));
|
||||
if(!values)
|
||||
throw std::bad_alloc {};
|
||||
|
||||
for(uint i = 0; i < count; i++)
|
||||
values[i] = std::move(values_ro[i]);
|
||||
|
||||
return mkArrayFromValueArray(values, count);
|
||||
}
|
||||
|
||||
BSON BSON::mkArrayFromValueArray(BSON *values, size_t count)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Array;
|
||||
v.m_size = count;
|
||||
v.m_value.values = values;
|
||||
return v;
|
||||
}
|
||||
|
||||
BSON BSON::mkBinaryCopy(int subtype, u8 const *data_ro, size_t size)
|
||||
{
|
||||
u8 *data = new u8[size];
|
||||
memcpy(data, data_ro, size);
|
||||
return mkBinaryMove(subtype, data, size);
|
||||
}
|
||||
|
||||
BSON BSON::mkBinaryMove(int subtype, u8 *data, size_t size)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::Binary;
|
||||
v.m_subtype = subtype;
|
||||
v.m_size = size;
|
||||
v.m_value.binary = data;
|
||||
return v;
|
||||
}
|
||||
|
||||
BSON BSON::mkStringCopy(char const *str, int len)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::String;
|
||||
|
||||
if(len < 0)
|
||||
len = strlen(str);
|
||||
|
||||
if(len < (int)SSO_MAXLEN) {
|
||||
v.m_subtype = len;
|
||||
memset((char *)&v + 2, 0, SSO_MAXLEN);
|
||||
memcpy((char *)&v + 2, str, len);
|
||||
}
|
||||
else {
|
||||
v.m_value.str = new char[len + 1];
|
||||
memcpy(v.m_value.str, str, len);
|
||||
v.m_value.str[len] = 0;
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
BSON BSON::mkStringCopy(std::string const &str)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::String;
|
||||
|
||||
if(str.size() < SSO_MAXLEN) {
|
||||
v.m_subtype = str.size();
|
||||
strncpy((char *)&v + 2, str.c_str(), SSO_MAXLEN);
|
||||
}
|
||||
else {
|
||||
v.m_value.str = new char[str.size() + 1];
|
||||
strcpy(v.m_value.str, str.c_str());
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
BSON BSON::mkStringMove(char *str)
|
||||
{
|
||||
BSON v;
|
||||
v.m_type = Type::String;
|
||||
|
||||
int len = strlen(str);
|
||||
if(len < (int)SSO_MAXLEN) {
|
||||
v.m_subtype = len;
|
||||
memset((char *)&v + 2, 0, SSO_MAXLEN);
|
||||
memcpy((char *)&v + 2, str, len);
|
||||
free(str);
|
||||
}
|
||||
else {
|
||||
v.m_value.str = str;
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
char const *BSON::getStringReadOnly() const
|
||||
{
|
||||
assert(isString() && "wrong BSON accessor: getStringReadOnly");
|
||||
if(m_subtype)
|
||||
return (char *)this + 2;
|
||||
else
|
||||
return m_value.str;
|
||||
}
|
||||
|
||||
char *BSON::getStringCopy() const
|
||||
{
|
||||
return strdup(getStringReadOnly());
|
||||
}
|
||||
|
||||
BSON &BSON::operator[](int i)
|
||||
{
|
||||
assert(isArray() && i >= 0 && (uint)i < m_size
|
||||
&& "BSON::operator[]: out-of-bounds");
|
||||
return m_value.values[i];
|
||||
}
|
||||
|
||||
BSON const &BSON::operator[](int i) const
|
||||
{
|
||||
assert(isArray() && i >= 0 && (uint)i < m_size
|
||||
&& "BSON::operator[]: out-of-bounds");
|
||||
return m_value.values[i];
|
||||
}
|
||||
|
||||
static BSONField *getFieldWithName(BSONField *fields, char const *str, int n)
|
||||
{
|
||||
for(int i = 0; i < n; i++) {
|
||||
if(fields[i].compareName(str))
|
||||
return &fields[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool BSON::hasField(char const *str) const
|
||||
{
|
||||
assert(isDocument() && "BSON::hasField: not a document");
|
||||
return getFieldWithName(m_value.fields, str, m_size);
|
||||
}
|
||||
|
||||
BSON &BSON::operator[](char const *str)
|
||||
{
|
||||
assert(isDocument() && "BSON::operator[]: not a document");
|
||||
BSONField *f = getFieldWithName(m_value.fields, str, m_size);
|
||||
assert(f && "BSON::operator[]: key missing");
|
||||
return f->value();
|
||||
}
|
||||
|
||||
BSON const &BSON::operator[](char const *str) const
|
||||
{
|
||||
assert(isDocument() && "BSON::operator[]: not a document");
|
||||
BSONField *f = getFieldWithName(m_value.fields, str, m_size);
|
||||
assert(f && "BSON::operator[]: key missing");
|
||||
return f->value();
|
||||
}
|
||||
|
||||
BSONField::BSONField(char const *name, BSON &&value, int len)
|
||||
{
|
||||
size_t n = (len >= 0) ? len : strnlen(name, sizeof m_literal + 1);
|
||||
|
||||
if(n <= sizeof m_literal) {
|
||||
m_layout = n;
|
||||
memset(m_literal, 0, sizeof m_literal);
|
||||
memcpy(m_literal, name, n);
|
||||
}
|
||||
else {
|
||||
m_name = (len >= 0) ? strndup(name, len) : strdup(name);
|
||||
/* Check that the top byte is unused */
|
||||
assert((uintptr_t)m_name >> (8 * sizeof m_name - 8) == 0);
|
||||
}
|
||||
|
||||
m_value = std::move(value);
|
||||
}
|
||||
|
||||
BSONField::BSONField(BSONField &&other)
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
BSONField &BSONField::operator=(BSONField &&other)
|
||||
{
|
||||
m_name = other.m_name;
|
||||
other.m_name = nullptr;
|
||||
other.m_layout = 1;
|
||||
other.m_literal[0] = '@';
|
||||
m_value = std::move(other.m_value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
BSONField::~BSONField()
|
||||
{
|
||||
if(!m_layout)
|
||||
free(m_name);
|
||||
}
|
||||
|
||||
BSONField BSONField::clone() const
|
||||
{
|
||||
if(m_layout)
|
||||
return BSONField(m_literal, m_value.clone(), m_layout);
|
||||
else
|
||||
return BSONField(m_name, m_value.clone());
|
||||
}
|
||||
|
||||
bool BSONField::compareName(char const *str) const
|
||||
{
|
||||
if(m_layout) {
|
||||
return !strncmp(m_literal, str, sizeof m_literal)
|
||||
&& strnlen(str, sizeof m_literal + 1) <= sizeof m_literal;
|
||||
}
|
||||
else {
|
||||
return !strcmp(str, m_name);
|
||||
}
|
||||
}
|
||||
|
||||
char const *BSONField::getNameReadOnly(size_t *len) const
|
||||
{
|
||||
if(m_layout) {
|
||||
*len = m_layout;
|
||||
return m_literal;
|
||||
}
|
||||
else {
|
||||
*len = strlen(m_name);
|
||||
return m_name;
|
||||
}
|
||||
}
|
||||
|
||||
char *BSONField::getNameCopy() const
|
||||
{
|
||||
if(m_layout)
|
||||
return strndup(m_literal, m_layout);
|
||||
else
|
||||
return strdup(m_name);
|
||||
}
|
||||
|
||||
void BSONField::dump(FILE *fp, int depth) const
|
||||
{
|
||||
fprintf(fp, "%*s", 2 * depth, "");
|
||||
|
||||
if(m_layout)
|
||||
fprintf(fp, "'%.*s'(%d): ", m_layout, m_literal, m_layout);
|
||||
else
|
||||
fprintf(fp, "'%s': ", m_name);
|
||||
|
||||
m_value.dump(fp, depth, true);
|
||||
}
|
Loading…
Reference in New Issue