fxos/lib/util/bson.cpp

//---------------------------------------------------------------------------//
//  1100101 |_ mov #0, r4         __                                         //
//     11   |_ <0xb380 %5c4>     / _|_ _____ ___                             //
//     0110 |_ 3.50 -> 3.60     |  _\ \ / _ (_-<                             //
//          |_ base# + offset   |_| /_\_\___/__/                             //
//---------------------------------------------------------------------------//
// Reference: https://bsonspec.org/spec.html
//
// For the subset at hand:
//   document ::=
//     | int32 element* "\x00"   int32 is the total number of bytes
//   element ::=
//     | "\x01" e_name double    64-bit binary floating point
//     | "\x02" e_name string    UTF-8 string
//     | "\x03" e_name document  Embedded document
//     | "\x04" e_name document  Array
//     | "\x05" e_name binary    Binary data
//     | "\x08" e_name "\x00"    Boolean "false"
//     | "\x08" e_name "\x01"    Boolean "true"
//     | "\x09" e_name int64     UTC datetime
//     | "\x0A" e_name           Null value
//     | "\x10" e_name int32     32-bit integer
//     | "\x12" e_name int64     64-bit integer
//   string ::=
//     | int32 (byte*) "\x00"    int32 is the number of bytes, NUL included
//   binary ::=
//     | int32 subtype (byte*)   int32 is the number of bytes
//---

#include <fxos/util/bson.h>
#include <fxos/util/log.h>
#include <vector>
#include <cstring>
#include <cstdio>

/* Number of bytes available in a value after the type/subtype attributes */
#define SSO_MAXLEN (sizeof(BSON) - 2)

BSON::BSON(BSON &&other)
{
    *this = std::move(other);
}

BSON &BSON::operator=(BSON &&other)
{
    m_type = other.m_type;
    m_subtype = other.m_subtype;
    m_zero = other.m_zero;
    m_size = other.m_size;
    m_value = other.m_value;

    other.m_type = Type::Null;
    other.m_subtype = 0;
    other.m_zero = 0;
    other.m_size = 0;
    other.m_value._i64 = 0;

    return *this;
}

BSON::~BSON()
{
    if(m_type == Type::String && !m_subtype)
        free(m_value.str);
    else if(m_type == Type::Document) {
        for(uint i = 0; i < m_size; i++)
            m_value.fields[i].~BSONField();
        free(m_value.fields);
    }
    else if(m_type == Type::Array) {
        for(uint i = 0; i < m_size; i++)
            m_value.values[i].~BSON();
        free(m_value.values);
    }
    else if(m_type == Type::Binary)
        free(m_value.binary);
}

BSON BSON::clone() const
{
    /* All subtypes with no referenced subvalues */
    switch(m_type) {
    case Type::Double:
    case Type::Bool:
    case Type::Datetime:
    case Type::Null:
    case Type::I32:
    case Type::I64: {
        BSON v;
        v.m_type = m_type;
        v.m_subtype = m_subtype;
        v.m_zero = m_zero;
        v.m_size = m_size;
        v.m_value = m_value;
        return v;
    }

    /* Strings need to be copied only if the SSO is not used */
    case Type::String: {
        BSON v;
        v.m_type = m_type;
        v.m_subtype = m_subtype;
        v.m_zero = m_zero;
        v.m_size = m_size;
        v.m_value = m_value;
        return v;
        if(!m_subtype) {
            v.m_value.str = strdup(v.m_value.str);
            if(!v.m_value.str)
                throw std::bad_alloc {};
        }
        return v;
    }

    /* Arrays and objects need to have their entries cloned */
    case Type::Document: {
        BSONField *fields
            = static_cast<BSONField *>(malloc(m_size * sizeof *fields));
        if(!fields)
            throw std::bad_alloc {};
        for(uint i = 0; i < m_size; i++)
            fields[i] = m_value.fields[i].clone();
        return mkDocumentFromFieldArray(fields, m_size);
    }
    case Type::Array: {
        BSON *values = static_cast<BSON *>(malloc(m_size * sizeof *values));
        if(!values)
            throw std::bad_alloc {};
        for(uint i = 0; i < m_size; i++)
            values[i] = m_value.values[i].clone();
        return mkArrayFromValueArray(values, m_size);
    }

    case Type::Binary:
        return mkBinaryCopy(m_subtype, m_value.binary, m_size);
    }

    assert(false && "BSON::clone: unsupported type");
}

void BSON::dump(FILE *fp, int depth, bool noindent) const
{
    if(!noindent)
        fprintf(fp, "%*s", 2 * depth, "");

    switch(m_type) {
    case Type::String:
        if(m_subtype)
            fprintf(fp, "string(%d) \"%s\"\n", m_subtype - 1, (char *)this + 2);
        else
            fprintf(fp, "string \"%s\"\n", m_value.str);
        break;
    case Type::Document:
        fprintf(fp, "document\n");
        for(uint i = 0; i < m_size; i++)
            m_value.fields[i].dump(fp, depth + 1);
        break;
    case Type::Array:
        fprintf(fp, "array\n");
        for(uint i = 0; i < m_size; i++)
            m_value.values[i].dump(fp, depth + 1);
        break;
    case Type::Double:
        fprintf(fp, "double %f\n", m_value._double);
        break;
    case Type::Bool:
        fprintf(fp, m_subtype ? "true\n" : "false\n");
        break;
    case Type::Datetime:
        fprintf(fp, "datetime %ld\n", m_value._i64);
        break;
    case Type::Null:
        fprintf(fp, "null\n");
        break;
    case Type::I32:
        fprintf(fp, "i32 %d\n", m_value._i32);
        break;
    case Type::I64:
        fprintf(fp, "i64 %ld\n", m_value._i64);
        break;
    default:
        fprintf(fp, "UNKNOWN(%d/%d)\n", m_type, m_subtype);
    }
}

void BSON::serialize(FILE *fp, char const *name, int len) const
{
    static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
                  && "BSON::serialize currently assumes little-endian");

    assert((name || isDocument()) && "BSON::serialize: missing name");
    if(name) {
        fputc((int)m_type, fp);
        if(len >= 0)
            fwrite(name, len, 1, fp);
        else
            fputs(name, fp);
        fputc('\x00', fp);
    }

    switch(m_type) {
    case Type::Double:
        fwrite(&m_value._double, 8, 1, fp);
        return;

    case Type::String: {
        char const *str = getStringReadOnly();
        int size = strlen(str) + 1;
        fwrite(&size, 4, 1, fp);
        fputs(str, fp);
        fputc('\x00', fp);
        return;
    }

    case Type::Document:
    case Type::Array: {
        long start_o = ftell(fp);
        fputs("####", fp);

        if(m_type == Type::Document) {
            for(uint i = 0; i < m_size; i++) {
                BSONField const &f = m_value.fields[i];
                size_t flen;
                char const *name = f.getNameReadOnly(&flen);
                f.value().serialize(fp, name, flen);
            }
        }
        else {
            for(uint i = 0; i < m_size; i++) {
                char str[16];
                sprintf(str, "%u", i);
                m_value.values[i].serialize(fp, str, -1);
            }
        }

        fputc('\x00', fp);
        long end_o = ftell(fp);
        fseek(fp, start_o, SEEK_SET);
        i32 size = end_o - start_o;
        fwrite(&size, 4, 1, fp);
        fseek(fp, end_o, SEEK_SET);
        return;
    }

    case Type::Binary:
        fwrite(&m_size, 4, 1, fp);
        fputc(m_subtype, fp);
        fwrite(m_value.binary, m_size, 1, fp);
        fputc('\x00', fp);
        return;

    case Type::Bool:
        fputc(m_subtype != 0, fp);
        return;

    case Type::Datetime:
    case Type::I64:
        fwrite(&m_value._i64, 8, 1, fp);
        return;

    case Type::Null:
        return;

    case Type::I32:
        fwrite(&m_value._i32, 4, 1, fp);
        return;
    }

    assert(false && "BSON::serialize: unsupported object type");
}

#define LOG(FMT, ...) \
    ({ \
        if(log) \
            fprintf(stderr, "[bson::parse] " FMT "\n", ##__VA_ARGS__); \
        false; \
    })

/* Parse an element. If f is non-NULL, record the name and fill *f. Otherwise,
   b should be non-NULL and the value is stored in b. */
bool BSON::parseElement(FILE *fp, BSON *b, BSONField *f, bool log)
{
    int type = fgetc(fp);
    if(feof(fp))
        return LOG("error: EOF where field was expected");

    std::string name;
    int c;
    while((c = fgetc(fp))) {
        if(feof(fp))
            return LOG("error: EOF within field name");
        name.push_back(c);
    }

    if(f)
        new(f) BSONField(name, mkNull());

    BSON &v = f ? f->value() : *b;
    v = mkNull();
    v.m_type = static_cast<BSON::Type>(type);
    i32 len = 0;

    switch(type) {
    case Type::Double:
        fread(&v.m_value._double, 8, 1, fp);
        if(feof(fp))
            return LOG("error: EOF within double (`%s')", name.c_str());
        return true;

    case Type::String:
        fread(&len, 4, 1, fp);
        if(feof(fp))
            return LOG("error: EOF within string size (`%s')", name.c_str());

        if(len <= (int)SSO_MAXLEN) {
            v.m_subtype = len;
            if(fread((char *)&v + 2, len, 1, fp) != 1)
                return LOG("error: failed to read str (`%s')", name.c_str());
        }
        else {
            v.m_value.str = new char[len];
            if(!v.m_value.str)
                throw std::bad_alloc {};
            if(fread(v.m_value.str, len, 1, fp) != 1) {
                delete[] v.m_value.str;
                return LOG("error: failed to read str (`%s')", name.c_str());
            }
        }
        return true;

    case Type::Document:
        return parseDocument(fp, v, name, log);
    case Type::Array:
        return parseArray(fp, v, name, log);

    case Type::Binary:
        fread(&v.m_size, 4, 1, fp);
        if(feof(fp))
            return LOG("error: EOF within binary size (`%s')", name.c_str());
        v.m_subtype = fgetc(fp);
        if(feof(fp))
            return LOG("error: EOF at binary subtype (`%s')", name.c_str());
        v.m_value.binary = new u8[v.m_size];
        if(!v.m_value.binary)
            throw std::bad_alloc();
        if(fread(v.m_value.binary, v.m_size, 1, fp) != 1) {
            delete[] v.m_value.binary;
            return LOG("error: failed to read binary (`%s')", name.c_str());
        }
        return true;

    case Type::Bool:
        v.m_subtype = (fgetc(fp) != 0);
        if(feof(fp))
            return LOG("error: EOF within boolean (`%s')", name.c_str());
        return true;

    case Type::Datetime:
        fread(&v.m_value._i64, 8, 1, fp);
        if(feof(fp))
            return LOG("error: EOF within datetime (`%s')", name.c_str());
        return true;

    case Type::Null:
        return true;

    case Type::I32:
        fread(&v.m_value._i32, 4, 1, fp);
        if(feof(fp))
            return LOG("error: EOF within i32 (`%s')", name.c_str());
        return true;

    case Type::I64:
        fread(&v.m_value._i64, 8, 1, fp);
        if(feof(fp))
            return LOG("error: EOF within i64 (`%s')", name.c_str());
        return true;
    }

    /* Reset the value so that it can be destroyed without blowing up */
    v = mkNull();

    return LOG("error: unknown value type: 0x%02x (`%s')", type, name.c_str());
}

bool BSON::parseDocument(FILE *fp, BSON &v, std::string const &name, bool log)
{
    i32 len;
    fread(&len, 4, 1, fp);
    if(feof(fp))
        return LOG("error: EOF within document size (`%s')", name.c_str());

    std::vector<BSONField> fields;
    while(true) {
        int t = fgetc(fp);
        if(feof(fp))
            return LOG("error: EOF within document (`%s')", name.c_str());
        if(t == '\x00')
            break;
        ungetc(t, fp);

        BSONField f("@", mkNull());
        if(!parseElement(fp, NULL, &f, log))
            return LOG("within document `%s'", name.c_str());
        fields.push_back(std::move(f));
    }

    v = mkDocumentFromFields(fields.data(), fields.size());
    return true;
}

bool BSON::parseArray(FILE *fp, BSON &v, std::string const &name, bool log)
{
    i32 len;
    fread(&len, 4, 1, fp);
    if(feof(fp))
        return LOG("error: EOF within array size (`%s')", name.c_str());

    std::vector<BSON> values;
    while(true) {
        int t = fgetc(fp);
        if(feof(fp))
            return LOG("error: EOF within array (`%s')", name.c_str());
        if(t == '\x00')
            break;
        ungetc(t, fp);

        BSON v;
        if(!parseElement(fp, &v, NULL, log))
            return LOG("within array `%s'", name.c_str());
        values.push_back(std::move(v));
    }

    v = mkArrayFromValues(values.data(), values.size());
    return true;
}

BSON BSON::parseDocumentFromFile(FILE *fp, bool *error, bool log)
{
    BSON v;
    bool rc = parseDocument(fp, v, "<file>", log);
    if(error)
        *error = rc;
    if(!rc)
        v = mkNull();
    return v;
}

BSON BSON::loadDocumentFromFile(
    std::string path, bool log, bool mustExist, char const *expectedType)
{
    FILE *fp = fopen(path.c_str(), "r");
    if(!fp) {
        if(mustExist && log)
            FxOS_log(ERR, "Cannot read '%s': %m", path.c_str());
        return mkNull();
    }

    bool e;
    BSON v = parseDocumentFromFile(fp, &e, log);
    fclose(fp);
    if(!e) {
        if(log)
            FxOS_log(ERR, "Failed to parse '%s'", path.c_str());
        return mkNull();
    }

    if(!v.isDocument()) {
        if(log)
            FxOS_log(ERR, "Contents of '%s' is not a document", path.c_str());
        return mkNull();
    }

    if(expectedType
        && !(v.hasField("*") && v["*"].isString()
             && v["*"].getString() == std::string(expectedType))) {
        if(log)
            FxOS_log(ERR, "Contents of '%s' do not have expected type %s",
                path.c_str(), expectedType);
        return mkNull();
    }

    return v;
}

#undef LOG

BSON BSON::mkDocument(
    std::initializer_list<std::pair<char const *, BSON &&>> pairs)
{
    uint count = pairs.size();
    BSONField *fields
        = static_cast<BSONField *>(malloc(count * sizeof *fields));
    if(!fields)
        throw std::bad_alloc {};

    uint i = 0;
    for(auto const &pair: pairs) {
        new(&fields[i]) BSONField(pair.first, std::move(pair.second));
        i++;
    }

    return mkDocumentFromFieldArray(fields, count);
}

BSON BSON::mkDocumentFromFields(BSONField *fields_ro, size_t count)
{
    BSONField *fields
        = static_cast<BSONField *>(malloc(count * sizeof *fields));
    if(!fields)
        throw std::bad_alloc {};
    for(uint i = 0; i < count; i++)
        fields[i] = std::move(fields_ro[i]);

    return mkDocumentFromFieldArray(fields, count);
}

BSON BSON::mkDocumentFromFieldArray(BSONField *fields, size_t count)
{
    BSON v;
    v.m_type = Type::Document;
    v.m_size = count;
    v.m_value.fields = fields;
    return v;
}

BSON BSON::mkArray(size_t count)
{
    BSON *values = static_cast<BSON *>(malloc(count * sizeof *values));
    if(!values)
        throw std::bad_alloc {};

    for(uint i = 0; i < count; i++)
        values[i] = mkNull();

    return mkArrayFromValueArray(values, count);
}

BSON BSON::mkArrayFromValues(BSON *values_ro, size_t count)
{
    BSON *values = static_cast<BSON *>(malloc(count * sizeof *values));
    if(!values)
        throw std::bad_alloc {};

    for(uint i = 0; i < count; i++)
        values[i] = std::move(values_ro[i]);

    return mkArrayFromValueArray(values, count);
}

BSON BSON::mkArrayFromValueArray(BSON *values, size_t count)
{
    BSON v;
    v.m_type = Type::Array;
    v.m_size = count;
    v.m_value.values = values;
    return v;
}

BSON BSON::mkBinaryCopy(int subtype, u8 const *data_ro, size_t size)
{
    u8 *data = new u8[size];
    memcpy(data, data_ro, size);
    return mkBinaryMove(subtype, data, size);
}

BSON BSON::mkBinaryMove(int subtype, u8 *data, size_t size)
{
    BSON v;
    v.m_type = Type::Binary;
    v.m_subtype = subtype;
    v.m_size = size;
    v.m_value.binary = data;
    return v;
}

BSON BSON::mkStringCopy(char const *str, int len)
{
    BSON v;
    v.m_type = Type::String;

    if(len < 0)
        len = strlen(str);

    if(len < (int)SSO_MAXLEN) {
        v.m_subtype = len;
        memset((char *)&v + 2, 0, SSO_MAXLEN);
        memcpy((char *)&v + 2, str, len);
    }
    else {
        v.m_value.str = new char[len + 1];
        memcpy(v.m_value.str, str, len);
        v.m_value.str[len] = 0;
    }

    return v;
}

BSON BSON::mkString(std::string const &str)
{
    BSON v;
    v.m_type = Type::String;

    if(str.size() < SSO_MAXLEN) {
        v.m_subtype = str.size();
        strncpy((char *)&v + 2, str.c_str(), SSO_MAXLEN);
    }
    else {
        v.m_value.str = new char[str.size() + 1];
        strcpy(v.m_value.str, str.c_str());
    }

    return v;
}

BSON BSON::mkStringMove(char *str)
{
    BSON v;
    v.m_type = Type::String;

    int len = strlen(str);
    if(len < (int)SSO_MAXLEN) {
        v.m_subtype = len;
        memset((char *)&v + 2, 0, SSO_MAXLEN);
        memcpy((char *)&v + 2, str, len);
        free(str);
    }
    else {
        v.m_value.str = str;
    }

    return v;
}

char const *BSON::getStringReadOnly() const
{
    assert(isString() && "wrong BSON accessor: getStringReadOnly");
    if(m_subtype)
        return (char *)this + 2;
    else
        return m_value.str;
}

char *BSON::getStringCopy() const
{
    return strdup(getStringReadOnly());
}

BSON &BSON::operator[](int i)
{
    assert(isArray() && i >= 0 && (uint)i < m_size
           && "BSON::operator[]: out-of-bounds");
    return m_value.values[i];
}

BSON const &BSON::operator[](int i) const
{
    assert(isArray() && i >= 0 && (uint)i < m_size
           && "BSON::operator[]: out-of-bounds");
    return m_value.values[i];
}

static BSONField *getFieldWithName(BSONField *fields, char const *str, int n)
{
    for(int i = 0; i < n; i++) {
        if(fields[i].compareName(str))
            return &fields[i];
    }
    return NULL;
}

bool BSON::hasField(char const *str) const
{
    assert(isDocument() && "BSON::hasField: not a document");
    return getFieldWithName(m_value.fields, str, m_size);
}

BSON &BSON::operator[](char const *str)
{
    assert(isDocument() && "BSON::operator[]: not a document");
    BSONField *f = getFieldWithName(m_value.fields, str, m_size);
    assert(f && "BSON::operator[]: key missing");
    return f->value();
}

BSON const &BSON::operator[](char const *str) const
{
    assert(isDocument() && "BSON::operator[]: not a document");
    BSONField *f = getFieldWithName(m_value.fields, str, m_size);
    assert(f && "BSON::operator[]: key missing");
    return f->value();
}

BSONField::BSONField(char const *name, BSON &&value, int len)
{
    size_t n = (len >= 0) ? len : strnlen(name, sizeof m_literal + 1);

    if(n <= sizeof m_literal) {
        m_layout = n;
        memset(m_literal, 0, sizeof m_literal);
        memcpy(m_literal, name, n);
    }
    else {
        m_name = (len >= 0) ? strndup(name, len) : strdup(name);
        /* Check that the top byte is unused */
        assert((uintptr_t)m_name >> (8 * sizeof m_name - 8) == 0);
    }

    m_value = std::move(value);
}

BSONField::BSONField(BSONField &&other)
{
    *this = std::move(other);
}

BSONField &BSONField::operator=(BSONField &&other)
{
    m_name = other.m_name;
    other.m_name = nullptr;
    other.m_layout = 1;
    other.m_literal[0] = '@';
    m_value = std::move(other.m_value);
    return *this;
}

BSONField::~BSONField()
{
    if(!m_layout)
        free(m_name);
}

BSONField BSONField::clone() const
{
    if(m_layout)
        return BSONField(m_literal, m_value.clone(), m_layout);
    else
        return BSONField(m_name, m_value.clone());
}

bool BSONField::compareName(char const *str) const
{
    if(m_layout) {
        return !strncmp(m_literal, str, sizeof m_literal)
               && strnlen(str, sizeof m_literal + 1) <= sizeof m_literal;
    }
    else {
        return !strcmp(str, m_name);
    }
}

char const *BSONField::getNameReadOnly(size_t *len) const
{
    if(m_layout) {
        *len = m_layout;
        return m_literal;
    }
    else {
        *len = strlen(m_name);
        return m_name;
    }
}

char *BSONField::getNameCopy() const
{
    if(m_layout)
        return strndup(m_literal, m_layout);
    else
        return strdup(m_name);
}

void BSONField::dump(FILE *fp, int depth) const
{
    fprintf(fp, "%*s", 2 * depth, "");

    if(m_layout)
        fprintf(fp, "'%.*s'(%d): ", m_layout, m_literal, m_layout);
    else
        fprintf(fp, "'%s': ", m_name);

    m_value.dump(fp, depth, true);
}