// Copyright 2007-2011 Baptiste Lepilleur // Distributed under MIT license, or public domain if desired and // recognized in your jurisdiction. // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE #if !defined(JSON_IS_AMALGAMATION) #include #include #include #include "json_tool.h" #endif // if !defined(JSON_IS_AMALGAMATION) #include #include #include #include #include #include #include #include #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below #define snprintf _snprintf #endif #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0 // Disable warning about strdup being deprecated. #pragma warning(disable : 4996) #endif static int const stackLimit_g = 1000; static int stackDepth_g = 0; // see readValue() namespace Json { #if __cplusplus >= 201103L typedef std::unique_ptr CharReaderPtr; #else typedef std::auto_ptr CharReaderPtr; #endif // Implementation of class Features // //////////////////////////////// Features::Features() : allowComments_(true), strictRoot_(false), allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {} Features Features::all() { return Features(); } Features Features::strictMode() { Features features; features.allowComments_ = false; features.strictRoot_ = true; features.allowDroppedNullPlaceholders_ = false; features.allowNumericKeys_ = false; return features; } // Implementation of class Reader // //////////////////////////////// static bool containsNewLine(Reader::Location begin, Reader::Location end) { for (; begin < end; ++begin) if (*begin == '\n' || *begin == '\r') return true; return false; } // Class Reader // ////////////////////////////////////////////////////////////////// Reader::Reader() : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(Features::all()), collectComments_() {} Reader::Reader(const Features& features) : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(features), collectComments_() { } bool Reader::parse(const std::string& document, Value& root, bool collectComments) { document_ = document; const char* begin = document_.c_str(); const char* end = begin + document_.length(); return parse(begin, end, root, collectComments); } bool Reader::parse(std::istream& sin, Value& root, bool collectComments) { // std::istream_iterator begin(sin); // std::istream_iterator end; // Those would allow streamed input from a file, if parse() were a // template function. // Since std::string is reference-counted, this at least does not // create an extra copy. std::string doc; std::getline(sin, doc, (char)EOF); return parse(doc, root, collectComments); } bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root, bool collectComments) { if (!features_.allowComments_) { collectComments = false; } begin_ = beginDoc; end_ = endDoc; collectComments_ = collectComments; current_ = begin_; lastValueEnd_ = 0; lastValue_ = 0; commentsBefore_ = ""; errors_.clear(); while (!nodes_.empty()) nodes_.pop(); nodes_.push(&root); stackDepth_g = 0; // Yes, this is bad coding, but options are limited. bool successful = readValue(); Token token; skipCommentTokens(token); if (collectComments_ && !commentsBefore_.empty()) root.setComment(commentsBefore_, commentAfter); if (features_.strictRoot_) { if (!root.isArray() && !root.isObject()) { // Set error location to start of doc, ideally should be first token found // in doc token.type_ = tokenError; token.start_ = beginDoc; token.end_ = endDoc; addError( "A valid JSON document must be either an array or an object value.", token); return false; } } return successful; } bool Reader::readValue() { // This is a non-reentrant way to support a stackLimit. Terrible! // But this deprecated class has a security problem: Bad input can // cause a seg-fault. This seems like a fair, binary-compatible way // to prevent the problem. if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue()."); ++stackDepth_g; Token token; skipCommentTokens(token); bool successful = true; if (collectComments_ && !commentsBefore_.empty()) { currentValue().setComment(commentsBefore_, commentBefore); commentsBefore_ = ""; } switch (token.type_) { case tokenObjectBegin: successful = readObject(token); currentValue().setOffsetLimit(current_ - begin_); break; case tokenArrayBegin: successful = readArray(token); currentValue().setOffsetLimit(current_ - begin_); break; case tokenNumber: successful = decodeNumber(token); break; case tokenString: successful = decodeString(token); break; case tokenTrue: { Value v(true); currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); } break; case tokenFalse: { Value v(false); currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); } break; case tokenNull: { Value v; currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); } break; case tokenArraySeparator: case tokenObjectEnd: case tokenArrayEnd: if (features_.allowDroppedNullPlaceholders_) { // "Un-read" the current token and mark the current value as a null // token. current_--; Value v; currentValue().swapPayload(v); currentValue().setOffsetStart(current_ - begin_ - 1); currentValue().setOffsetLimit(current_ - begin_); break; } // Else, fall through... default: currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return addError("Syntax error: value, object or array expected.", token); } if (collectComments_) { lastValueEnd_ = current_; lastValue_ = ¤tValue(); } --stackDepth_g; return successful; } void Reader::skipCommentTokens(Token& token) { if (features_.allowComments_) { do { readToken(token); } while (token.type_ == tokenComment); } else { readToken(token); } } bool Reader::readToken(Token& token) { skipSpaces(); token.start_ = current_; Char c = getNextChar(); bool ok = true; switch (c) { case '{': token.type_ = tokenObjectBegin; break; case '}': token.type_ = tokenObjectEnd; break; case '[': token.type_ = tokenArrayBegin; break; case ']': token.type_ = tokenArrayEnd; break; case '"': token.type_ = tokenString; ok = readString(); break; case '/': token.type_ = tokenComment; ok = readComment(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': token.type_ = tokenNumber; readNumber(); break; case 't': token.type_ = tokenTrue; ok = match("rue", 3); break; case 'f': token.type_ = tokenFalse; ok = match("alse", 4); break; case 'n': token.type_ = tokenNull; ok = match("ull", 3); break; case ',': token.type_ = tokenArraySeparator; break; case ':': token.type_ = tokenMemberSeparator; break; case 0: token.type_ = tokenEndOfStream; break; default: ok = false; break; } if (!ok) token.type_ = tokenError; token.end_ = current_; return true; } void Reader::skipSpaces() { while (current_ != end_) { Char c = *current_; if (c == ' ' || c == '\t' || c == '\r' || c == '\n') ++current_; else break; } } bool Reader::match(Location pattern, int patternLength) { if (end_ - current_ < patternLength) return false; int index = patternLength; while (index--) if (current_[index] != pattern[index]) return false; current_ += patternLength; return true; } bool Reader::readComment() { Location commentBegin = current_ - 1; Char c = getNextChar(); bool successful = false; if (c == '*') successful = readCStyleComment(); else if (c == '/') successful = readCppStyleComment(); if (!successful) return false; if (collectComments_) { CommentPlacement placement = commentBefore; if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { if (c != '*' || !containsNewLine(commentBegin, current_)) placement = commentAfterOnSameLine; } addComment(commentBegin, current_, placement); } return true; } static std::string normalizeEOL(Reader::Location begin, Reader::Location end) { std::string normalized; normalized.reserve(end - begin); Reader::Location current = begin; while (current != end) { char c = *current++; if (c == '\r') { if (current != end && *current == '\n') // convert dos EOL ++current; // convert Mac EOL normalized += '\n'; } else { normalized += c; } } return normalized; } void Reader::addComment(Location begin, Location end, CommentPlacement placement) { assert(collectComments_); const std::string& normalized = normalizeEOL(begin, end); if (placement == commentAfterOnSameLine) { assert(lastValue_ != 0); lastValue_->setComment(normalized, placement); } else { commentsBefore_ += normalized; } } bool Reader::readCStyleComment() { while (current_ != end_) { Char c = getNextChar(); if (c == '*' && *current_ == '/') break; } return getNextChar() == '/'; } bool Reader::readCppStyleComment() { while (current_ != end_) { Char c = getNextChar(); if (c == '\n') break; if (c == '\r') { // Consume DOS EOL. It will be normalized in addComment. if (current_ != end_ && *current_ == '\n') getNextChar(); // Break on Moc OS 9 EOL. break; } } return true; } void Reader::readNumber() { const char *p = current_; char c = '0'; // stopgap for already consumed character // integral part while (c >= '0' && c <= '9') c = (current_ = p) < end_ ? *p++ : 0; // fractional part if (c == '.') { c = (current_ = p) < end_ ? *p++ : 0; while (c >= '0' && c <= '9') c = (current_ = p) < end_ ? *p++ : 0; } // exponential part if (c == 'e' || c == 'E') { c = (current_ = p) < end_ ? *p++ : 0; if (c == '+' || c == '-') c = (current_ = p) < end_ ? *p++ : 0; while (c >= '0' && c <= '9') c = (current_ = p) < end_ ? *p++ : 0; } } bool Reader::readString() { Char c = 0; while (current_ != end_) { c = getNextChar(); if (c == '\\') getNextChar(); else if (c == '"') break; } return c == '"'; } bool Reader::readObject(Token& tokenStart) { Token tokenName; std::string name; Value init(objectValue); currentValue().swapPayload(init); currentValue().setOffsetStart(tokenStart.start_ - begin_); while (readToken(tokenName)) { bool initialTokenOk = true; while (tokenName.type_ == tokenComment && initialTokenOk) initialTokenOk = readToken(tokenName); if (!initialTokenOk) break; if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object return true; name = ""; if (tokenName.type_ == tokenString) { if (!decodeString(tokenName, name)) return recoverFromError(tokenObjectEnd); } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { Value numberName; if (!decodeNumber(tokenName, numberName)) return recoverFromError(tokenObjectEnd); name = numberName.asString(); } else { break; } Token colon; if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { return addErrorAndRecover( "Missing ':' after object member name", colon, tokenObjectEnd); } Value& value = currentValue()[name]; nodes_.push(&value); bool ok = readValue(); nodes_.pop(); if (!ok) // error already set return recoverFromError(tokenObjectEnd); Token comma; if (!readToken(comma) || (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) { return addErrorAndRecover( "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); } bool finalizeTokenOk = true; while (comma.type_ == tokenComment && finalizeTokenOk) finalizeTokenOk = readToken(comma); if (comma.type_ == tokenObjectEnd) return true; } return addErrorAndRecover( "Missing '}' or object member name", tokenName, tokenObjectEnd); } bool Reader::readArray(Token& tokenStart) { Value init(arrayValue); currentValue().swapPayload(init); currentValue().setOffsetStart(tokenStart.start_ - begin_); skipSpaces(); if (*current_ == ']') // empty array { Token endArray; readToken(endArray); return true; } int index = 0; for (;;) { Value& value = currentValue()[index++]; nodes_.push(&value); bool ok = readValue(); nodes_.pop(); if (!ok) // error already set return recoverFromError(tokenArrayEnd); Token token; // Accept Comment after last item in the array. ok = readToken(token); while (token.type_ == tokenComment && ok) { ok = readToken(token); } bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); if (!ok || badTokenType) { return addErrorAndRecover( "Missing ',' or ']' in array declaration", token, tokenArrayEnd); } if (token.type_ == tokenArrayEnd) break; } return true; } bool Reader::decodeNumber(Token& token) { Value decoded; if (!decodeNumber(token, decoded)) return false; currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeNumber(Token& token, Value& decoded) { // Attempts to parse the number as an integer. If the number is // larger than the maximum supported value of an integer then // we decode the number as a double. Location current = token.start_; bool isNegative = *current == '-'; if (isNegative) ++current; // TODO: Help the compiler do the div and mod at compile time or get rid of them. Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) : Value::maxLargestUInt; Value::LargestUInt threshold = maxIntegerValue / 10; Value::LargestUInt value = 0; while (current < token.end_) { Char c = *current++; if (c < '0' || c > '9') return decodeDouble(token, decoded); Value::UInt digit(c - '0'); if (value >= threshold) { // We've hit or exceeded the max value divided by 10 (rounded down). If // a) we've only just touched the limit, b) this is the last digit, and // c) it's small enough to fit in that rounding delta, we're okay. // Otherwise treat this number as a double to avoid overflow. if (value > threshold || current != token.end_ || digit > maxIntegerValue % 10) { return decodeDouble(token, decoded); } } value = value * 10 + digit; } if (isNegative) decoded = -Value::LargestInt(value); else if (value <= Value::LargestUInt(Value::maxInt)) decoded = Value::LargestInt(value); else decoded = value; return true; } bool Reader::decodeDouble(Token& token) { Value decoded; if (!decodeDouble(token, decoded)) return false; currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeDouble(Token& token, Value& decoded) { double value = 0; std::string buffer(token.start_, token.end_); std::istringstream is(buffer); if (!(is >> value)) return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token); decoded = value; return true; } bool Reader::decodeString(Token& token) { std::string decoded_string; if (!decodeString(token, decoded_string)) return false; Value decoded(decoded_string); currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool Reader::decodeString(Token& token, std::string& decoded) { decoded.reserve(token.end_ - token.start_ - 2); Location current = token.start_ + 1; // skip '"' Location end = token.end_ - 1; // do not include '"' while (current != end) { Char c = *current++; if (c == '"') break; else if (c == '\\') { if (current == end) return addError("Empty escape sequence in string", token, current); Char escape = *current++; switch (escape) { case '"': decoded += '"'; break; case '/': decoded += '/'; break; case '\\': decoded += '\\'; break; case 'b': decoded += '\b'; break; case 'f': decoded += '\f'; break; case 'n': decoded += '\n'; break; case 'r': decoded += '\r'; break; case 't': decoded += '\t'; break; case 'u': { unsigned int unicode; if (!decodeUnicodeCodePoint(token, current, end, unicode)) return false; decoded += codePointToUTF8(unicode); } break; default: return addError("Bad escape sequence in string", token, current); } } else { decoded += c; } } return true; } bool Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode) { if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) return false; if (unicode >= 0xD800 && unicode <= 0xDBFF) { // surrogate pairs if (end - current < 6) return addError( "additional six characters expected to parse unicode surrogate pair.", token, current); unsigned int surrogatePair; if (*(current++) == '\\' && *(current++) == 'u') { if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else return false; } else return addError("expecting another \\u token to begin the second half of " "a unicode surrogate pair", token, current); } return true; } bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current, Location end, unsigned int& unicode) { if (end - current < 4) return addError( "Bad unicode escape sequence in string: four digits expected.", token, current); unicode = 0; for (int index = 0; index < 4; ++index) { Char c = *current++; unicode *= 16; if (c >= '0' && c <= '9') unicode += c - '0'; else if (c >= 'a' && c <= 'f') unicode += c - 'a' + 10; else if (c >= 'A' && c <= 'F') unicode += c - 'A' + 10; else return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current); } return true; } bool Reader::addError(const std::string& message, Token& token, Location extra) { ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = extra; errors_.push_back(info); return false; } bool Reader::recoverFromError(TokenType skipUntilToken) { int errorCount = int(errors_.size()); Token skip; for (;;) { if (!readToken(skip)) errors_.resize(errorCount); // discard errors caused by recovery if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) break; } errors_.resize(errorCount); return false; } bool Reader::addErrorAndRecover(const std::string& message, Token& token, TokenType skipUntilToken) { addError(message, token); return recoverFromError(skipUntilToken); } Value& Reader::currentValue() { return *(nodes_.top()); } Reader::Char Reader::getNextChar() { if (current_ == end_) return 0; return *current_++; } void Reader::getLocationLineAndColumn(Location location, int& line, int& column) const { Location current = begin_; Location lastLineStart = current; line = 0; while (current < location && current != end_) { Char c = *current++; if (c == '\r') { if (*current == '\n') ++current; lastLineStart = current; ++line; } else if (c == '\n') { lastLineStart = current; ++line; } } // column & line start at 1 column = int(location - lastLineStart) + 1; ++line; } std::string Reader::getLocationLineAndColumn(Location location) const { int line, column; getLocationLineAndColumn(location, line, column); char buffer[18 + 16 + 16 + 1]; #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) #if defined(WINCE) _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #else sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #endif #else snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #endif return buffer; } // Deprecated. Preserved for backward compatibility std::string Reader::getFormatedErrorMessages() const { return getFormattedErrorMessages(); } std::string Reader::getFormattedErrorMessages() const { std::string formattedMessage; for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) { const ErrorInfo& error = *itError; formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; formattedMessage += " " + error.message_ + "\n"; if (error.extra_) formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; } return formattedMessage; } std::vector Reader::getStructuredErrors() const { std::vector allErrors; for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) { const ErrorInfo& error = *itError; Reader::StructuredError structured; structured.offset_start = error.token_.start_ - begin_; structured.offset_limit = error.token_.end_ - begin_; structured.message = error.message_; allErrors.push_back(structured); } return allErrors; } bool Reader::pushError(const Value& value, const std::string& message) { size_t length = end_ - begin_; if(value.getOffsetStart() > length || value.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); token.end_ = end_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = 0; errors_.push_back(info); return true; } bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) { size_t length = end_ - begin_; if(value.getOffsetStart() > length || value.getOffsetLimit() > length || extra.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); token.end_ = begin_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = begin_ + extra.getOffsetStart(); errors_.push_back(info); return true; } bool Reader::good() const { return !errors_.size(); } // exact copy of Features class OurFeatures { public: static OurFeatures all(); OurFeatures(); bool allowComments_; bool strictRoot_; bool allowDroppedNullPlaceholders_; bool allowNumericKeys_; bool allowSingleQuotes_; bool failIfExtra_; bool rejectDupKeys_; int stackLimit_; }; // OurFeatures // exact copy of Implementation of class Features // //////////////////////////////// OurFeatures::OurFeatures() : allowComments_(true), strictRoot_(false) , allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) , allowSingleQuotes_(false) , failIfExtra_(false) { } OurFeatures OurFeatures::all() { return OurFeatures(); } // Implementation of class Reader // //////////////////////////////// // exact copy of Reader, renamed to OurReader class OurReader { public: typedef char Char; typedef const Char* Location; struct StructuredError { size_t offset_start; size_t offset_limit; std::string message; }; OurReader(OurFeatures const& features); bool parse(const char* beginDoc, const char* endDoc, Value& root, bool collectComments = true); std::string getFormattedErrorMessages() const; std::vector getStructuredErrors() const; bool pushError(const Value& value, const std::string& message); bool pushError(const Value& value, const std::string& message, const Value& extra); bool good() const; private: OurReader(OurReader const&); // no impl void operator=(OurReader const&); // no impl enum TokenType { tokenEndOfStream = 0, tokenObjectBegin, tokenObjectEnd, tokenArrayBegin, tokenArrayEnd, tokenString, tokenNumber, tokenTrue, tokenFalse, tokenNull, tokenArraySeparator, tokenMemberSeparator, tokenComment, tokenError }; class Token { public: TokenType type_; Location start_; Location end_; }; class ErrorInfo { public: Token token_; std::string message_; Location extra_; }; typedef std::deque Errors; bool readToken(Token& token); void skipSpaces(); bool match(Location pattern, int patternLength); bool readComment(); bool readCStyleComment(); bool readCppStyleComment(); bool readString(); bool readStringSingleQuote(); void readNumber(); bool readValue(); bool readObject(Token& token); bool readArray(Token& token); bool decodeNumber(Token& token); bool decodeNumber(Token& token, Value& decoded); bool decodeString(Token& token); bool decodeString(Token& token, std::string& decoded); bool decodeDouble(Token& token); bool decodeDouble(Token& token, Value& decoded); bool decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode); bool decodeUnicodeEscapeSequence(Token& token, Location& current, Location end, unsigned int& unicode); bool addError(const std::string& message, Token& token, Location extra = 0); bool recoverFromError(TokenType skipUntilToken); bool addErrorAndRecover(const std::string& message, Token& token, TokenType skipUntilToken); void skipUntilSpace(); Value& currentValue(); Char getNextChar(); void getLocationLineAndColumn(Location location, int& line, int& column) const; std::string getLocationLineAndColumn(Location location) const; void addComment(Location begin, Location end, CommentPlacement placement); void skipCommentTokens(Token& token); typedef std::stack Nodes; Nodes nodes_; Errors errors_; std::string document_; Location begin_; Location end_; Location current_; Location lastValueEnd_; Value* lastValue_; std::string commentsBefore_; int stackDepth_; OurFeatures const features_; bool collectComments_; }; // OurReader // complete copy of Read impl, for OurReader OurReader::OurReader(OurFeatures const& features) : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), lastValue_(), commentsBefore_(), features_(features), collectComments_() { } bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, bool collectComments) { if (!features_.allowComments_) { collectComments = false; } begin_ = beginDoc; end_ = endDoc; collectComments_ = collectComments; current_ = begin_; lastValueEnd_ = 0; lastValue_ = 0; commentsBefore_ = ""; errors_.clear(); while (!nodes_.empty()) nodes_.pop(); nodes_.push(&root); stackDepth_ = 0; bool successful = readValue(); Token token; skipCommentTokens(token); if (features_.failIfExtra_) { if (token.type_ != tokenError && token.type_ != tokenEndOfStream) { addError("Extra non-whitespace after JSON value.", token); return false; } } if (collectComments_ && !commentsBefore_.empty()) root.setComment(commentsBefore_, commentAfter); if (features_.strictRoot_) { if (!root.isArray() && !root.isObject()) { // Set error location to start of doc, ideally should be first token found // in doc token.type_ = tokenError; token.start_ = beginDoc; token.end_ = endDoc; addError( "A valid JSON document must be either an array or an object value.", token); return false; } } return successful; } bool OurReader::readValue() { if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue()."); ++stackDepth_; Token token; skipCommentTokens(token); bool successful = true; if (collectComments_ && !commentsBefore_.empty()) { currentValue().setComment(commentsBefore_, commentBefore); commentsBefore_ = ""; } switch (token.type_) { case tokenObjectBegin: successful = readObject(token); currentValue().setOffsetLimit(current_ - begin_); break; case tokenArrayBegin: successful = readArray(token); currentValue().setOffsetLimit(current_ - begin_); break; case tokenNumber: successful = decodeNumber(token); break; case tokenString: successful = decodeString(token); break; case tokenTrue: { Value v(true); currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); } break; case tokenFalse: { Value v(false); currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); } break; case tokenNull: { Value v; currentValue().swapPayload(v); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); } break; case tokenArraySeparator: case tokenObjectEnd: case tokenArrayEnd: if (features_.allowDroppedNullPlaceholders_) { // "Un-read" the current token and mark the current value as a null // token. current_--; Value v; currentValue().swapPayload(v); currentValue().setOffsetStart(current_ - begin_ - 1); currentValue().setOffsetLimit(current_ - begin_); break; } // else, fall through ... default: currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return addError("Syntax error: value, object or array expected.", token); } if (collectComments_) { lastValueEnd_ = current_; lastValue_ = ¤tValue(); } --stackDepth_; return successful; } void OurReader::skipCommentTokens(Token& token) { if (features_.allowComments_) { do { readToken(token); } while (token.type_ == tokenComment); } else { readToken(token); } } bool OurReader::readToken(Token& token) { skipSpaces(); token.start_ = current_; Char c = getNextChar(); bool ok = true; switch (c) { case '{': token.type_ = tokenObjectBegin; break; case '}': token.type_ = tokenObjectEnd; break; case '[': token.type_ = tokenArrayBegin; break; case ']': token.type_ = tokenArrayEnd; break; case '"': token.type_ = tokenString; ok = readString(); break; case '\'': if (features_.allowSingleQuotes_) { token.type_ = tokenString; ok = readStringSingleQuote(); break; } // else continue case '/': token.type_ = tokenComment; ok = readComment(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': token.type_ = tokenNumber; readNumber(); break; case 't': token.type_ = tokenTrue; ok = match("rue", 3); break; case 'f': token.type_ = tokenFalse; ok = match("alse", 4); break; case 'n': token.type_ = tokenNull; ok = match("ull", 3); break; case ',': token.type_ = tokenArraySeparator; break; case ':': token.type_ = tokenMemberSeparator; break; case 0: token.type_ = tokenEndOfStream; break; default: ok = false; break; } if (!ok) token.type_ = tokenError; token.end_ = current_; return true; } void OurReader::skipSpaces() { while (current_ != end_) { Char c = *current_; if (c == ' ' || c == '\t' || c == '\r' || c == '\n') ++current_; else break; } } bool OurReader::match(Location pattern, int patternLength) { if (end_ - current_ < patternLength) return false; int index = patternLength; while (index--) if (current_[index] != pattern[index]) return false; current_ += patternLength; return true; } bool OurReader::readComment() { Location commentBegin = current_ - 1; Char c = getNextChar(); bool successful = false; if (c == '*') successful = readCStyleComment(); else if (c == '/') successful = readCppStyleComment(); if (!successful) return false; if (collectComments_) { CommentPlacement placement = commentBefore; if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { if (c != '*' || !containsNewLine(commentBegin, current_)) placement = commentAfterOnSameLine; } addComment(commentBegin, current_, placement); } return true; } void OurReader::addComment(Location begin, Location end, CommentPlacement placement) { assert(collectComments_); const std::string& normalized = normalizeEOL(begin, end); if (placement == commentAfterOnSameLine) { assert(lastValue_ != 0); lastValue_->setComment(normalized, placement); } else { commentsBefore_ += normalized; } } bool OurReader::readCStyleComment() { while (current_ != end_) { Char c = getNextChar(); if (c == '*' && *current_ == '/') break; } return getNextChar() == '/'; } bool OurReader::readCppStyleComment() { while (current_ != end_) { Char c = getNextChar(); if (c == '\n') break; if (c == '\r') { // Consume DOS EOL. It will be normalized in addComment. if (current_ != end_ && *current_ == '\n') getNextChar(); // Break on Moc OS 9 EOL. break; } } return true; } void OurReader::readNumber() { const char *p = current_; char c = '0'; // stopgap for already consumed character // integral part while (c >= '0' && c <= '9') c = (current_ = p) < end_ ? *p++ : 0; // fractional part if (c == '.') { c = (current_ = p) < end_ ? *p++ : 0; while (c >= '0' && c <= '9') c = (current_ = p) < end_ ? *p++ : 0; } // exponential part if (c == 'e' || c == 'E') { c = (current_ = p) < end_ ? *p++ : 0; if (c == '+' || c == '-') c = (current_ = p) < end_ ? *p++ : 0; while (c >= '0' && c <= '9') c = (current_ = p) < end_ ? *p++ : 0; } } bool OurReader::readString() { Char c = 0; while (current_ != end_) { c = getNextChar(); if (c == '\\') getNextChar(); else if (c == '"') break; } return c == '"'; } bool OurReader::readStringSingleQuote() { Char c = 0; while (current_ != end_) { c = getNextChar(); if (c == '\\') getNextChar(); else if (c == '\'') break; } return c == '\''; } bool OurReader::readObject(Token& tokenStart) { Token tokenName; std::string name; Value init(objectValue); currentValue().swapPayload(init); currentValue().setOffsetStart(tokenStart.start_ - begin_); while (readToken(tokenName)) { bool initialTokenOk = true; while (tokenName.type_ == tokenComment && initialTokenOk) initialTokenOk = readToken(tokenName); if (!initialTokenOk) break; if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object return true; name = ""; if (tokenName.type_ == tokenString) { if (!decodeString(tokenName, name)) return recoverFromError(tokenObjectEnd); } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { Value numberName; if (!decodeNumber(tokenName, numberName)) return recoverFromError(tokenObjectEnd); name = numberName.asString(); } else { break; } Token colon; if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { return addErrorAndRecover( "Missing ':' after object member name", colon, tokenObjectEnd); } if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30"); if (features_.rejectDupKeys_ && currentValue().isMember(name)) { std::string msg = "Duplicate key: '" + name + "'"; return addErrorAndRecover( msg, tokenName, tokenObjectEnd); } Value& value = currentValue()[name]; nodes_.push(&value); bool ok = readValue(); nodes_.pop(); if (!ok) // error already set return recoverFromError(tokenObjectEnd); Token comma; if (!readToken(comma) || (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment)) { return addErrorAndRecover( "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); } bool finalizeTokenOk = true; while (comma.type_ == tokenComment && finalizeTokenOk) finalizeTokenOk = readToken(comma); if (comma.type_ == tokenObjectEnd) return true; } return addErrorAndRecover( "Missing '}' or object member name", tokenName, tokenObjectEnd); } bool OurReader::readArray(Token& tokenStart) { Value init(arrayValue); currentValue().swapPayload(init); currentValue().setOffsetStart(tokenStart.start_ - begin_); skipSpaces(); if (*current_ == ']') // empty array { Token endArray; readToken(endArray); return true; } int index = 0; for (;;) { Value& value = currentValue()[index++]; nodes_.push(&value); bool ok = readValue(); nodes_.pop(); if (!ok) // error already set return recoverFromError(tokenArrayEnd); Token token; // Accept Comment after last item in the array. ok = readToken(token); while (token.type_ == tokenComment && ok) { ok = readToken(token); } bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); if (!ok || badTokenType) { return addErrorAndRecover( "Missing ',' or ']' in array declaration", token, tokenArrayEnd); } if (token.type_ == tokenArrayEnd) break; } return true; } bool OurReader::decodeNumber(Token& token) { Value decoded; if (!decodeNumber(token, decoded)) return false; currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool OurReader::decodeNumber(Token& token, Value& decoded) { // Attempts to parse the number as an integer. If the number is // larger than the maximum supported value of an integer then // we decode the number as a double. Location current = token.start_; bool isNegative = *current == '-'; if (isNegative) ++current; // TODO: Help the compiler do the div and mod at compile time or get rid of them. Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) : Value::maxLargestUInt; Value::LargestUInt threshold = maxIntegerValue / 10; Value::LargestUInt value = 0; while (current < token.end_) { Char c = *current++; if (c < '0' || c > '9') return decodeDouble(token, decoded); Value::UInt digit(c - '0'); if (value >= threshold) { // We've hit or exceeded the max value divided by 10 (rounded down). If // a) we've only just touched the limit, b) this is the last digit, and // c) it's small enough to fit in that rounding delta, we're okay. // Otherwise treat this number as a double to avoid overflow. if (value > threshold || current != token.end_ || digit > maxIntegerValue % 10) { return decodeDouble(token, decoded); } } value = value * 10 + digit; } if (isNegative) decoded = -Value::LargestInt(value); else if (value <= Value::LargestUInt(Value::maxInt)) decoded = Value::LargestInt(value); else decoded = value; return true; } bool OurReader::decodeDouble(Token& token) { Value decoded; if (!decodeDouble(token, decoded)) return false; currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool OurReader::decodeDouble(Token& token, Value& decoded) { double value = 0; const int bufferSize = 32; int count; int length = int(token.end_ - token.start_); // Sanity check to avoid buffer overflow exploits. if (length < 0) { return addError("Unable to parse token length", token); } // Avoid using a string constant for the format control string given to // sscanf, as this can cause hard to debug crashes on OS X. See here for more // info: // // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html char format[] = "%lf"; if (length <= bufferSize) { Char buffer[bufferSize + 1]; memcpy(buffer, token.start_, length); buffer[length] = 0; count = sscanf(buffer, format, &value); } else { std::string buffer(token.start_, token.end_); count = sscanf(buffer.c_str(), format, &value); } if (count != 1) return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token); decoded = value; return true; } bool OurReader::decodeString(Token& token) { std::string decoded_string; if (!decodeString(token, decoded_string)) return false; Value decoded(decoded_string); currentValue().swapPayload(decoded); currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetLimit(token.end_ - begin_); return true; } bool OurReader::decodeString(Token& token, std::string& decoded) { decoded.reserve(token.end_ - token.start_ - 2); Location current = token.start_ + 1; // skip '"' Location end = token.end_ - 1; // do not include '"' while (current != end) { Char c = *current++; if (c == '"') break; else if (c == '\\') { if (current == end) return addError("Empty escape sequence in string", token, current); Char escape = *current++; switch (escape) { case '"': decoded += '"'; break; case '/': decoded += '/'; break; case '\\': decoded += '\\'; break; case 'b': decoded += '\b'; break; case 'f': decoded += '\f'; break; case 'n': decoded += '\n'; break; case 'r': decoded += '\r'; break; case 't': decoded += '\t'; break; case 'u': { unsigned int unicode; if (!decodeUnicodeCodePoint(token, current, end, unicode)) return false; decoded += codePointToUTF8(unicode); } break; default: return addError("Bad escape sequence in string", token, current); } } else { decoded += c; } } return true; } bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode) { if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) return false; if (unicode >= 0xD800 && unicode <= 0xDBFF) { // surrogate pairs if (end - current < 6) return addError( "additional six characters expected to parse unicode surrogate pair.", token, current); unsigned int surrogatePair; if (*(current++) == '\\' && *(current++) == 'u') { if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); } else return false; } else return addError("expecting another \\u token to begin the second half of " "a unicode surrogate pair", token, current); } return true; } bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current, Location end, unsigned int& unicode) { if (end - current < 4) return addError( "Bad unicode escape sequence in string: four digits expected.", token, current); unicode = 0; for (int index = 0; index < 4; ++index) { Char c = *current++; unicode *= 16; if (c >= '0' && c <= '9') unicode += c - '0'; else if (c >= 'a' && c <= 'f') unicode += c - 'a' + 10; else if (c >= 'A' && c <= 'F') unicode += c - 'A' + 10; else return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current); } return true; } bool OurReader::addError(const std::string& message, Token& token, Location extra) { ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = extra; errors_.push_back(info); return false; } bool OurReader::recoverFromError(TokenType skipUntilToken) { int errorCount = int(errors_.size()); Token skip; for (;;) { if (!readToken(skip)) errors_.resize(errorCount); // discard errors caused by recovery if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) break; } errors_.resize(errorCount); return false; } bool OurReader::addErrorAndRecover(const std::string& message, Token& token, TokenType skipUntilToken) { addError(message, token); return recoverFromError(skipUntilToken); } Value& OurReader::currentValue() { return *(nodes_.top()); } OurReader::Char OurReader::getNextChar() { if (current_ == end_) return 0; return *current_++; } void OurReader::getLocationLineAndColumn(Location location, int& line, int& column) const { Location current = begin_; Location lastLineStart = current; line = 0; while (current < location && current != end_) { Char c = *current++; if (c == '\r') { if (*current == '\n') ++current; lastLineStart = current; ++line; } else if (c == '\n') { lastLineStart = current; ++line; } } // column & line start at 1 column = int(location - lastLineStart) + 1; ++line; } std::string OurReader::getLocationLineAndColumn(Location location) const { int line, column; getLocationLineAndColumn(location, line, column); char buffer[18 + 16 + 16 + 1]; #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) #if defined(WINCE) _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #else sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #endif #else snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); #endif return buffer; } std::string OurReader::getFormattedErrorMessages() const { std::string formattedMessage; for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) { const ErrorInfo& error = *itError; formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; formattedMessage += " " + error.message_ + "\n"; if (error.extra_) formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; } return formattedMessage; } std::vector OurReader::getStructuredErrors() const { std::vector allErrors; for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError) { const ErrorInfo& error = *itError; OurReader::StructuredError structured; structured.offset_start = error.token_.start_ - begin_; structured.offset_limit = error.token_.end_ - begin_; structured.message = error.message_; allErrors.push_back(structured); } return allErrors; } bool OurReader::pushError(const Value& value, const std::string& message) { size_t length = end_ - begin_; if(value.getOffsetStart() > length || value.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); token.end_ = end_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = 0; errors_.push_back(info); return true; } bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) { size_t length = end_ - begin_; if(value.getOffsetStart() > length || value.getOffsetLimit() > length || extra.getOffsetLimit() > length) return false; Token token; token.type_ = tokenError; token.start_ = begin_ + value.getOffsetStart(); token.end_ = begin_ + value.getOffsetLimit(); ErrorInfo info; info.token_ = token; info.message_ = message; info.extra_ = begin_ + extra.getOffsetStart(); errors_.push_back(info); return true; } bool OurReader::good() const { return !errors_.size(); } class OurCharReader : public CharReader { bool const collectComments_; OurReader reader_; public: OurCharReader( bool collectComments, OurFeatures const& features) : collectComments_(collectComments) , reader_(features) {} virtual bool parse( char const* beginDoc, char const* endDoc, Value* root, std::string* errs) { bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_); if (errs) { *errs = reader_.getFormattedErrorMessages(); } return ok; } }; CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); } CharReaderBuilder::~CharReaderBuilder() {} CharReader* CharReaderBuilder::newCharReader() const { bool collectComments = settings_["collectComments"].asBool(); OurFeatures features = OurFeatures::all(); features.allowComments_ = settings_["allowComments"].asBool(); features.strictRoot_ = settings_["strictRoot"].asBool(); features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool(); features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool(); features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool(); features.stackLimit_ = settings_["stackLimit"].asInt(); features.failIfExtra_ = settings_["failIfExtra"].asBool(); features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool(); return new OurCharReader(collectComments, features); } static void getValidReaderKeys(std::set* valid_keys) { valid_keys->clear(); valid_keys->insert("collectComments"); valid_keys->insert("allowComments"); valid_keys->insert("strictRoot"); valid_keys->insert("allowDroppedNullPlaceholders"); valid_keys->insert("allowNumericKeys"); valid_keys->insert("allowSingleQuotes"); valid_keys->insert("stackLimit"); valid_keys->insert("failIfExtra"); valid_keys->insert("rejectDupKeys"); } bool CharReaderBuilder::validate(Json::Value* invalid) const { Json::Value my_invalid; if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL Json::Value& inv = *invalid; std::set valid_keys; getValidReaderKeys(&valid_keys); Value::Members keys = settings_.getMemberNames(); size_t n = keys.size(); for (size_t i = 0; i < n; ++i) { std::string const& key = keys[i]; if (valid_keys.find(key) == valid_keys.end()) { inv[key] = settings_[key]; } } return 0u == inv.size(); } Value& CharReaderBuilder::operator[](std::string key) { return settings_[key]; } // static void CharReaderBuilder::strictMode(Json::Value* settings) { //! [CharReaderBuilderStrictMode] (*settings)["allowComments"] = false; (*settings)["strictRoot"] = true; (*settings)["allowDroppedNullPlaceholders"] = false; (*settings)["allowNumericKeys"] = false; (*settings)["allowSingleQuotes"] = false; (*settings)["failIfExtra"] = true; (*settings)["rejectDupKeys"] = true; //! [CharReaderBuilderStrictMode] } // static void CharReaderBuilder::setDefaults(Json::Value* settings) { //! [CharReaderBuilderDefaults] (*settings)["collectComments"] = true; (*settings)["allowComments"] = true; (*settings)["strictRoot"] = false; (*settings)["allowDroppedNullPlaceholders"] = false; (*settings)["allowNumericKeys"] = false; (*settings)["allowSingleQuotes"] = false; (*settings)["stackLimit"] = 1000; (*settings)["failIfExtra"] = false; (*settings)["rejectDupKeys"] = false; //! [CharReaderBuilderDefaults] } ////////////////////////////////// // global functions bool parseFromStream( CharReader::Factory const& fact, std::istream& sin, Value* root, std::string* errs) { std::ostringstream ssin; ssin << sin.rdbuf(); std::string doc = ssin.str(); char const* begin = doc.data(); char const* end = begin + doc.size(); // Note that we do not actually need a null-terminator. CharReaderPtr const reader(fact.newCharReader()); return reader->parse(begin, end, root, errs); } std::istream& operator>>(std::istream& sin, Value& root) { CharReaderBuilder b; std::string errs; bool ok = parseFromStream(b, sin, &root, &errs); if (!ok) { fprintf(stderr, "Error from reader: %s", errs.c_str()); throwRuntimeError("reader error"); } return sin; } } // namespace Json