//---------------------------------------------------------------------------// // 1100101 |_ mov #0, r4 __ // // 11 |_ <0xb380 %5c4> / _|_ _____ ___ // // 0110 |_ 3.50 -> 3.60 | _\ \ / _ (_-< // // |_ base# + offset |_| /_\_\___/__/ // //---------------------------------------------------------------------------// // Reference: https://bsonspec.org/spec.html // // For the subset at hand: // document ::= // | int32 element* "\x00" int32 is the total number of bytes // element ::= // | "\x01" e_name double 64-bit binary floating point // | "\x02" e_name string UTF-8 string // | "\x03" e_name document Embedded document // | "\x04" e_name document Array // | "\x05" e_name binary Binary data // | "\x08" e_name "\x00" Boolean "false" // | "\x08" e_name "\x01" Boolean "true" // | "\x09" e_name int64 UTC datetime // | "\x0A" e_name Null value // | "\x10" e_name int32 32-bit integer // | "\x12" e_name int64 64-bit integer // string ::= // | int32 (byte*) "\x00" int32 is the number of bytes, NUL included // binary ::= // | int32 subtype (byte*) int32 is the number of bytes //--- #include #include #include #include #include /* Number of bytes available in a value after the type/subtype attributes */ #define SSO_MAXLEN (sizeof(BSON) - 2) BSON::BSON(BSON &&other) { *this = std::move(other); } BSON &BSON::operator=(BSON &&other) { m_type = other.m_type; m_subtype = other.m_subtype; m_zero = other.m_zero; m_size = other.m_size; m_value = other.m_value; other.m_type = Type::Null; other.m_subtype = 0; other.m_zero = 0; other.m_size = 0; other.m_value._i64 = 0; return *this; } BSON::~BSON() { if(m_type == Type::String && !m_subtype) free(m_value.str); else if(m_type == Type::Document) { for(uint i = 0; i < m_size; i++) m_value.fields[i].~BSONField(); free(m_value.fields); } else if(m_type == Type::Array) { for(uint i = 0; i < m_size; i++) m_value.values[i].~BSON(); free(m_value.values); } else if(m_type == Type::Binary) free(m_value.binary); } BSON BSON::clone() const { /* All subtypes with no referenced subvalues */ switch(m_type) { case Type::Double: case Type::Bool: case Type::Datetime: case Type::Null: case Type::I32: case Type::I64: { BSON v; v.m_type = m_type; v.m_subtype = m_subtype; v.m_zero = m_zero; v.m_size = m_size; v.m_value = m_value; return v; } /* Strings need to be copied only if the SSO is not used */ case Type::String: { BSON v; v.m_type = m_type; v.m_subtype = m_subtype; v.m_zero = m_zero; v.m_size = m_size; v.m_value = m_value; return v; if(!m_subtype) { v.m_value.str = strdup(v.m_value.str); if(!v.m_value.str) throw std::bad_alloc {}; } return v; } /* Arrays and objects need to have their entries cloned */ case Type::Document: { BSONField *fields = static_cast(malloc(m_size * sizeof *fields)); if(!fields) throw std::bad_alloc {}; for(uint i = 0; i < m_size; i++) fields[i] = m_value.fields[i].clone(); return mkDocumentFromFieldArray(fields, m_size); } case Type::Array: { BSON *values = static_cast(malloc(m_size * sizeof *values)); if(!values) throw std::bad_alloc {}; for(uint i = 0; i < m_size; i++) values[i] = m_value.values[i].clone(); return mkArrayFromValueArray(values, m_size); } case Type::Binary: return mkBinaryCopy(m_subtype, m_value.binary, m_size); } assert(false && "BSON::clone: unsupported type"); } void BSON::dump(FILE *fp, int depth, bool noindent) const { if(!noindent) fprintf(fp, "%*s", 2 * depth, ""); switch(m_type) { case Type::String: if(m_subtype) fprintf(fp, "string(%d) \"%s\"\n", m_subtype - 1, (char *)this + 2); else fprintf(fp, "string \"%s\"\n", m_value.str); break; case Type::Document: fprintf(fp, "document\n"); for(uint i = 0; i < m_size; i++) m_value.fields[i].dump(fp, depth + 1); break; case Type::Array: fprintf(fp, "array\n"); for(uint i = 0; i < m_size; i++) m_value.values[i].dump(fp, depth + 1); break; case Type::Double: fprintf(fp, "double %f\n", m_value._double); break; case Type::Bool: fprintf(fp, m_subtype ? "true\n" : "false\n"); break; case Type::Datetime: fprintf(fp, "datetime %ld\n", m_value._i64); break; case Type::Null: fprintf(fp, "null\n"); break; case Type::I32: fprintf(fp, "i32 %d\n", m_value._i32); break; case Type::I64: fprintf(fp, "i64 %ld\n", m_value._i64); break; default: fprintf(fp, "UNKNOWN(%d/%d)\n", m_type, m_subtype); } } void BSON::serialize(FILE *fp, char const *name, int len) const { static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && "BSON::serialize currently assumes little-endian"); assert((name || isDocument()) && "BSON::serialize: missing name"); if(name) { fputc((int)m_type, fp); if(len >= 0) fwrite(name, len, 1, fp); else fputs(name, fp); fputc('\x00', fp); } switch(m_type) { case Type::Double: fwrite(&m_value._double, 8, 1, fp); return; case Type::String: { char const *str = getStringReadOnly(); int size = strlen(str) + 1; fwrite(&size, 4, 1, fp); fputs(str, fp); fputc('\x00', fp); return; } case Type::Document: case Type::Array: { long start_o = ftell(fp); fputs("####", fp); if(m_type == Type::Document) { for(uint i = 0; i < m_size; i++) { BSONField const &f = m_value.fields[i]; size_t flen; char const *name = f.getNameReadOnly(&flen); f.value().serialize(fp, name, flen); } } else { for(uint i = 0; i < m_size; i++) { char str[16]; sprintf(str, "%u", i); m_value.values[i].serialize(fp, str, -1); } } fputc('\x00', fp); long end_o = ftell(fp); fseek(fp, start_o, SEEK_SET); i32 size = end_o - start_o; fwrite(&size, 4, 1, fp); fseek(fp, end_o, SEEK_SET); return; } case Type::Binary: fwrite(&m_size, 4, 1, fp); fputc(m_subtype, fp); fwrite(m_value.binary, m_size, 1, fp); fputc('\x00', fp); return; case Type::Bool: fputc(m_subtype != 0, fp); return; case Type::Datetime: case Type::I64: fwrite(&m_value._i64, 8, 1, fp); return; case Type::Null: return; case Type::I32: fwrite(&m_value._i32, 4, 1, fp); return; } assert(false && "BSON::serialize: unsupported object type"); } #define LOG(FMT, ...) \ ({ \ if(log) \ fprintf(stderr, "[bson::parse] " FMT "\n", ##__VA_ARGS__); \ false; \ }) /* Parse an element. If f is non-NULL, record the name and fill *f. Otherwise, b should be non-NULL and the value is stored in b. */ bool BSON::parseElement(FILE *fp, BSON *b, BSONField *f, bool log) { int type = fgetc(fp); if(feof(fp)) return LOG("error: EOF where field was expected"); std::string name; int c; while((c = fgetc(fp))) { if(feof(fp)) return LOG("error: EOF within field name"); name.push_back(c); } if(f) new(f) BSONField(name, mkNull()); BSON &v = f ? f->value() : *b; v = mkNull(); v.m_type = static_cast(type); i32 len = 0; switch(type) { case Type::Double: fread(&v.m_value._double, 8, 1, fp); if(feof(fp)) return LOG("error: EOF within double (`%s')", name.c_str()); return true; case Type::String: fread(&len, 4, 1, fp); if(feof(fp)) return LOG("error: EOF within string size (`%s')", name.c_str()); if(len <= (int)SSO_MAXLEN) { v.m_subtype = len; if(fread((char *)&v + 2, len, 1, fp) != 1) return LOG("error: failed to read str (`%s')", name.c_str()); } else { v.m_value.str = new char[len]; if(!v.m_value.str) throw std::bad_alloc {}; if(fread(v.m_value.str, len, 1, fp) != 1) { delete[] v.m_value.str; return LOG("error: failed to read str (`%s')", name.c_str()); } } return true; case Type::Document: return parseDocument(fp, v, name, log); case Type::Array: return parseArray(fp, v, name, log); case Type::Binary: fread(&v.m_size, 4, 1, fp); if(feof(fp)) return LOG("error: EOF within binary size (`%s')", name.c_str()); v.m_subtype = fgetc(fp); if(feof(fp)) return LOG("error: EOF at binary subtype (`%s')", name.c_str()); v.m_value.binary = new u8[v.m_size]; if(!v.m_value.binary) throw std::bad_alloc(); if(fread(v.m_value.binary, v.m_size, 1, fp) != 1) { delete[] v.m_value.binary; return LOG("error: failed to read binary (`%s')", name.c_str()); } return true; case Type::Bool: v.m_subtype = (fgetc(fp) != 0); if(feof(fp)) return LOG("error: EOF within boolean (`%s')", name.c_str()); return true; case Type::Datetime: fread(&v.m_value._i64, 8, 1, fp); if(feof(fp)) return LOG("error: EOF within datetime (`%s')", name.c_str()); return true; case Type::Null: return true; case Type::I32: fread(&v.m_value._i32, 4, 1, fp); if(feof(fp)) return LOG("error: EOF within i32 (`%s')", name.c_str()); return true; case Type::I64: fread(&v.m_value._i64, 8, 1, fp); if(feof(fp)) return LOG("error: EOF within i64 (`%s')", name.c_str()); return true; } /* Reset the value so that it can be destroyed without blowing up */ v = mkNull(); return LOG("error: unknown value type: 0x%02x (`%s')", type, name.c_str()); } bool BSON::parseDocument(FILE *fp, BSON &v, std::string const &name, bool log) { i32 len; fread(&len, 4, 1, fp); if(feof(fp)) return LOG("error: EOF within document size (`%s')", name.c_str()); std::vector fields; while(true) { int t = fgetc(fp); if(feof(fp)) return LOG("error: EOF within document (`%s')", name.c_str()); if(t == '\x00') break; ungetc(t, fp); BSONField f("@", mkNull()); if(!parseElement(fp, NULL, &f, log)) return LOG("within document `%s'", name.c_str()); fields.push_back(std::move(f)); } v = mkDocumentFromFields(fields.data(), fields.size()); return true; } bool BSON::parseArray(FILE *fp, BSON &v, std::string const &name, bool log) { i32 len; fread(&len, 4, 1, fp); if(feof(fp)) return LOG("error: EOF within array size (`%s')", name.c_str()); std::vector values; while(true) { int t = fgetc(fp); if(feof(fp)) return LOG("error: EOF within array (`%s')", name.c_str()); if(t == '\x00') break; ungetc(t, fp); BSON v; if(!parseElement(fp, &v, NULL, log)) return LOG("within array `%s'", name.c_str()); values.push_back(std::move(v)); } v = mkArrayFromValues(values.data(), values.size()); return true; } BSON BSON::parseDocumentFromFile(FILE *fp, bool *error, bool log) { BSON v; bool rc = parseDocument(fp, v, "", log); if(error) *error = rc; if(!rc) v = mkNull(); return v; } BSON BSON::loadDocumentFromFile( std::string path, bool log, bool mustExist, char const *expectedType) { FILE *fp = fopen(path.c_str(), "r"); if(!fp) { if(mustExist && log) FxOS_log(ERR, "Cannot read '%s': %m", path.c_str()); return mkNull(); } bool e; BSON v = parseDocumentFromFile(fp, &e, log); fclose(fp); if(!e) { if(log) FxOS_log(ERR, "Failed to parse '%s'", path.c_str()); return mkNull(); } if(!v.isDocument()) { if(log) FxOS_log(ERR, "Contents of '%s' is not a document", path.c_str()); return mkNull(); } if(expectedType && !(v.hasField("*") && v["*"].isString() && v["*"].getString() == std::string(expectedType))) { if(log) FxOS_log(ERR, "Contents of '%s' do not have expected type %s", path.c_str(), expectedType); return mkNull(); } return v; } #undef LOG BSON BSON::mkDocument( std::initializer_list> pairs) { uint count = pairs.size(); BSONField *fields = static_cast(malloc(count * sizeof *fields)); if(!fields) throw std::bad_alloc {}; uint i = 0; for(auto const &pair: pairs) { new(&fields[i]) BSONField(pair.first, std::move(pair.second)); i++; } return mkDocumentFromFieldArray(fields, count); } BSON BSON::mkDocumentFromFields(BSONField *fields_ro, size_t count) { BSONField *fields = static_cast(malloc(count * sizeof *fields)); if(!fields) throw std::bad_alloc {}; for(uint i = 0; i < count; i++) fields[i] = std::move(fields_ro[i]); return mkDocumentFromFieldArray(fields, count); } BSON BSON::mkDocumentFromFieldArray(BSONField *fields, size_t count) { BSON v; v.m_type = Type::Document; v.m_size = count; v.m_value.fields = fields; return v; } BSON BSON::mkArray(size_t count) { BSON *values = static_cast(malloc(count * sizeof *values)); if(!values) throw std::bad_alloc {}; for(uint i = 0; i < count; i++) values[i] = mkNull(); return mkArrayFromValueArray(values, count); } BSON BSON::mkArrayFromValues(BSON *values_ro, size_t count) { BSON *values = static_cast(malloc(count * sizeof *values)); if(!values) throw std::bad_alloc {}; for(uint i = 0; i < count; i++) values[i] = std::move(values_ro[i]); return mkArrayFromValueArray(values, count); } BSON BSON::mkArrayFromValueArray(BSON *values, size_t count) { BSON v; v.m_type = Type::Array; v.m_size = count; v.m_value.values = values; return v; } BSON BSON::mkBinaryCopy(int subtype, u8 const *data_ro, size_t size) { u8 *data = new u8[size]; memcpy(data, data_ro, size); return mkBinaryMove(subtype, data, size); } BSON BSON::mkBinaryMove(int subtype, u8 *data, size_t size) { BSON v; v.m_type = Type::Binary; v.m_subtype = subtype; v.m_size = size; v.m_value.binary = data; return v; } BSON BSON::mkStringCopy(char const *str, int len) { BSON v; v.m_type = Type::String; if(len < 0) len = strlen(str); if(len < (int)SSO_MAXLEN) { v.m_subtype = len; memset((char *)&v + 2, 0, SSO_MAXLEN); memcpy((char *)&v + 2, str, len); } else { v.m_value.str = new char[len + 1]; memcpy(v.m_value.str, str, len); v.m_value.str[len] = 0; } return v; } BSON BSON::mkString(std::string const &str) { BSON v; v.m_type = Type::String; if(str.size() < SSO_MAXLEN) { v.m_subtype = str.size(); strncpy((char *)&v + 2, str.c_str(), SSO_MAXLEN); } else { v.m_value.str = new char[str.size() + 1]; strcpy(v.m_value.str, str.c_str()); } return v; } BSON BSON::mkStringMove(char *str) { BSON v; v.m_type = Type::String; int len = strlen(str); if(len < (int)SSO_MAXLEN) { v.m_subtype = len; memset((char *)&v + 2, 0, SSO_MAXLEN); memcpy((char *)&v + 2, str, len); free(str); } else { v.m_value.str = str; } return v; } char const *BSON::getStringReadOnly() const { assert(isString() && "wrong BSON accessor: getStringReadOnly"); if(m_subtype) return (char *)this + 2; else return m_value.str; } char *BSON::getStringCopy() const { return strdup(getStringReadOnly()); } BSON &BSON::operator[](int i) { assert(isArray() && i >= 0 && (uint)i < m_size && "BSON::operator[]: out-of-bounds"); return m_value.values[i]; } BSON const &BSON::operator[](int i) const { assert(isArray() && i >= 0 && (uint)i < m_size && "BSON::operator[]: out-of-bounds"); return m_value.values[i]; } static BSONField *getFieldWithName(BSONField *fields, char const *str, int n) { for(int i = 0; i < n; i++) { if(fields[i].compareName(str)) return &fields[i]; } return NULL; } bool BSON::hasField(char const *str) const { assert(isDocument() && "BSON::hasField: not a document"); return getFieldWithName(m_value.fields, str, m_size); } BSON &BSON::operator[](char const *str) { assert(isDocument() && "BSON::operator[]: not a document"); BSONField *f = getFieldWithName(m_value.fields, str, m_size); assert(f && "BSON::operator[]: key missing"); return f->value(); } BSON const &BSON::operator[](char const *str) const { assert(isDocument() && "BSON::operator[]: not a document"); BSONField *f = getFieldWithName(m_value.fields, str, m_size); assert(f && "BSON::operator[]: key missing"); return f->value(); } BSONField::BSONField(char const *name, BSON &&value, int len) { size_t n = (len >= 0) ? len : strnlen(name, sizeof m_literal + 1); if(n <= sizeof m_literal) { m_layout = n; memset(m_literal, 0, sizeof m_literal); memcpy(m_literal, name, n); } else { m_name = (len >= 0) ? strndup(name, len) : strdup(name); /* Check that the top byte is unused */ assert((uintptr_t)m_name >> (8 * sizeof m_name - 8) == 0); } m_value = std::move(value); } BSONField::BSONField(BSONField &&other) { *this = std::move(other); } BSONField &BSONField::operator=(BSONField &&other) { m_name = other.m_name; other.m_name = nullptr; other.m_layout = 1; other.m_literal[0] = '@'; m_value = std::move(other.m_value); return *this; } BSONField::~BSONField() { if(!m_layout) free(m_name); } BSONField BSONField::clone() const { if(m_layout) return BSONField(m_literal, m_value.clone(), m_layout); else return BSONField(m_name, m_value.clone()); } bool BSONField::compareName(char const *str) const { if(m_layout) { return !strncmp(m_literal, str, sizeof m_literal) && strnlen(str, sizeof m_literal + 1) <= sizeof m_literal; } else { return !strcmp(str, m_name); } } char const *BSONField::getNameReadOnly(size_t *len) const { if(m_layout) { *len = m_layout; return m_literal; } else { *len = strlen(m_name); return m_name; } } char *BSONField::getNameCopy() const { if(m_layout) return strndup(m_literal, m_layout); else return strdup(m_name); } void BSONField::dump(FILE *fp, int depth) const { fprintf(fp, "%*s", 2 * depth, ""); if(m_layout) fprintf(fp, "'%.*s'(%d): ", m_layout, m_literal, m_layout); else fprintf(fp, "'%s': ", m_name); m_value.dump(fp, depth, true); }