| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. All rights reserved. |
| // https://developers.google.com/protocol-buffers/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <google/protobuf/util/internal/json_stream_parser.h> |
| |
| #include <algorithm> |
| #include <cctype> |
| #include <cerrno> |
| #include <cstdlib> |
| #include <cstring> |
| #include <memory> |
| #ifndef _SHARED_PTR_H |
| #include <google/protobuf/stubs/shared_ptr.h> |
| #endif |
| |
| #include <google/protobuf/stubs/logging.h> |
| #include <google/protobuf/stubs/common.h> |
| #include <google/protobuf/util/internal/object_writer.h> |
| #include <google/protobuf/util/internal/json_escaping.h> |
| #include <google/protobuf/stubs/strutil.h> |
| |
| namespace google { |
| namespace protobuf { |
| namespace util { |
| |
| // Allow these symbols to be referenced as util::Status, util::error::* in |
| // this file. |
| using util::Status; |
| namespace error { |
| using util::error::INTERNAL; |
| using util::error::INVALID_ARGUMENT; |
| } // namespace error |
| |
| namespace converter { |
| |
| // Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X) |
| static const int kUnicodeEscapedLength = 6; |
| |
| // Length of the true, false, and null literals. |
| static const int true_len = strlen("true"); |
| static const int false_len = strlen("false"); |
| static const int null_len = strlen("null"); |
| |
| inline bool IsLetter(char c) { |
| return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || |
| (c == '$'); |
| } |
| |
| inline bool IsAlphanumeric(char c) { |
| return IsLetter(c) || ('0' <= c && c <= '9'); |
| } |
| |
| static bool ConsumeKey(StringPiece* input, StringPiece* key) { |
| if (input->empty() || !IsLetter((*input)[0])) return false; |
| int len = 1; |
| for (; len < input->size(); ++len) { |
| if (!IsAlphanumeric((*input)[len])) { |
| break; |
| } |
| } |
| *key = StringPiece(input->data(), len); |
| *input = StringPiece(input->data() + len, input->size() - len); |
| return true; |
| } |
| |
| static bool MatchKey(StringPiece input) { |
| return !input.empty() && IsLetter(input[0]); |
| } |
| |
| JsonStreamParser::JsonStreamParser(ObjectWriter* ow) |
| : ow_(ow), |
| stack_(), |
| leftover_(), |
| json_(), |
| p_(), |
| key_(), |
| key_storage_(), |
| finishing_(false), |
| parsed_(), |
| parsed_storage_(), |
| string_open_(0), |
| chunk_storage_(), |
| coerce_to_utf8_(false) { |
| // Initialize the stack with a single value to be parsed. |
| stack_.push(VALUE); |
| } |
| |
| JsonStreamParser::~JsonStreamParser() {} |
| |
| |
| util::Status JsonStreamParser::Parse(StringPiece json) { |
| StringPiece chunk = json; |
| // If we have leftovers from a previous chunk, append the new chunk to it |
| // and create a new StringPiece pointing at the string's data. This could |
| // be large but we rely on the chunks to be small, assuming they are |
| // fragments of a Cord. |
| if (!leftover_.empty()) { |
| // Don't point chunk to leftover_ because leftover_ will be updated in |
| // ParseChunk(chunk). |
| chunk_storage_.swap(leftover_); |
| json.AppendToString(&chunk_storage_); |
| chunk = StringPiece(chunk_storage_); |
| } |
| |
| // Find the structurally valid UTF8 prefix and parse only that. |
| int n = internal::UTF8SpnStructurallyValid(chunk); |
| if (n > 0) { |
| util::Status status = ParseChunk(chunk.substr(0, n)); |
| |
| // Any leftover characters are stashed in leftover_ for later parsing when |
| // there is more data available. |
| chunk.substr(n).AppendToString(&leftover_); |
| return status; |
| } else { |
| chunk.CopyToString(&leftover_); |
| return util::Status::OK; |
| } |
| } |
| |
| util::Status JsonStreamParser::FinishParse() { |
| // If we do not expect anything and there is nothing left to parse we're all |
| // done. |
| if (stack_.empty() && leftover_.empty()) { |
| return util::Status::OK; |
| } |
| |
| // Storage for UTF8-coerced string. |
| google::protobuf::scoped_array<char> utf8; |
| if (coerce_to_utf8_) { |
| utf8.reset(new char[leftover_.size()]); |
| char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' '); |
| p_ = json_ = StringPiece(coerced, leftover_.size()); |
| } else { |
| p_ = json_ = leftover_; |
| if (!internal::IsStructurallyValidUTF8(leftover_)) { |
| return ReportFailure("Encountered non UTF-8 code points."); |
| } |
| } |
| |
| // Parse the remainder in finishing mode, which reports errors for things like |
| // unterminated strings or unknown tokens that would normally be retried. |
| finishing_ = true; |
| util::Status result = RunParser(); |
| if (result.ok()) { |
| SkipWhitespace(); |
| if (!p_.empty()) { |
| result = ReportFailure("Parsing terminated before end of input."); |
| } |
| } |
| return result; |
| } |
| |
| util::Status JsonStreamParser::ParseChunk(StringPiece chunk) { |
| // Do not do any work if the chunk is empty. |
| if (chunk.empty()) return util::Status::OK; |
| |
| p_ = json_ = chunk; |
| |
| finishing_ = false; |
| util::Status result = RunParser(); |
| if (!result.ok()) return result; |
| |
| SkipWhitespace(); |
| if (p_.empty()) { |
| // If we parsed everything we had, clear the leftover. |
| leftover_.clear(); |
| } else { |
| // If we do not expect anything i.e. stack is empty, and we have non-empty |
| // string left to parse, we report an error. |
| if (stack_.empty()) { |
| return ReportFailure("Parsing terminated before end of input."); |
| } |
| // If we expect future data i.e. stack is non-empty, and we have some |
| // unparsed data left, we save it for later parse. |
| leftover_ = p_.ToString(); |
| } |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::RunParser() { |
| while (!stack_.empty()) { |
| ParseType type = stack_.top(); |
| TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING; |
| stack_.pop(); |
| util::Status result; |
| switch (type) { |
| case VALUE: |
| result = ParseValue(t); |
| break; |
| |
| case OBJ_MID: |
| result = ParseObjectMid(t); |
| break; |
| |
| case ENTRY: |
| result = ParseEntry(t); |
| break; |
| |
| case ENTRY_MID: |
| result = ParseEntryMid(t); |
| break; |
| |
| case ARRAY_VALUE: |
| result = ParseArrayValue(t); |
| break; |
| |
| case ARRAY_MID: |
| result = ParseArrayMid(t); |
| break; |
| |
| default: |
| result = util::Status(util::error::INTERNAL, |
| StrCat("Unknown parse type: ", type)); |
| break; |
| } |
| if (!result.ok()) { |
| // If we were cancelled, save our state and try again later. |
| if (!finishing_ && result == util::Status::CANCELLED) { |
| stack_.push(type); |
| // If we have a key we still need to render, make sure to save off the |
| // contents in our own storage. |
| if (!key_.empty() && key_storage_.empty()) { |
| key_.AppendToString(&key_storage_); |
| key_ = StringPiece(key_storage_); |
| } |
| result = util::Status::OK; |
| } |
| return result; |
| } |
| } |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ParseValue(TokenType type) { |
| switch (type) { |
| case BEGIN_OBJECT: |
| return HandleBeginObject(); |
| case BEGIN_ARRAY: |
| return HandleBeginArray(); |
| case BEGIN_STRING: |
| return ParseString(); |
| case BEGIN_NUMBER: |
| return ParseNumber(); |
| case BEGIN_TRUE: |
| return ParseTrue(); |
| case BEGIN_FALSE: |
| return ParseFalse(); |
| case BEGIN_NULL: |
| return ParseNull(); |
| case UNKNOWN: |
| return ReportUnknown("Expected a value."); |
| default: { |
| // Special case for having been cut off while parsing, wait for more data. |
| // This handles things like 'fals' being at the end of the string, we |
| // don't know if the next char would be e, completing it, or something |
| // else, making it invalid. |
| if (!finishing_ && p_.length() < false_len) { |
| return util::Status::CANCELLED; |
| } |
| return ReportFailure("Unexpected token."); |
| } |
| } |
| } |
| |
| util::Status JsonStreamParser::ParseString() { |
| util::Status result = ParseStringHelper(); |
| if (result.ok()) { |
| ow_->RenderString(key_, parsed_); |
| key_.clear(); |
| parsed_.clear(); |
| parsed_storage_.clear(); |
| } |
| return result; |
| } |
| |
| util::Status JsonStreamParser::ParseStringHelper() { |
| // If we haven't seen the start quote, grab it and remember it for later. |
| if (string_open_ == 0) { |
| string_open_ = *p_.data(); |
| GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\''); |
| Advance(); |
| } |
| // Track where we last copied data from so we can minimize copying. |
| const char* last = p_.data(); |
| while (!p_.empty()) { |
| const char* data = p_.data(); |
| if (*data == '\\') { |
| // We're about to handle an escape, copy all bytes from last to data. |
| if (last < data) { |
| parsed_storage_.append(last, data - last); |
| last = data; |
| } |
| // If we ran out of string after the \, cancel or report an error |
| // depending on if we expect more data later. |
| if (p_.length() == 1) { |
| if (!finishing_) { |
| return util::Status::CANCELLED; |
| } |
| return ReportFailure("Closing quote expected in string."); |
| } |
| // Parse a unicode escape if we found \u in the string. |
| if (data[1] == 'u') { |
| util::Status result = ParseUnicodeEscape(); |
| if (!result.ok()) { |
| return result; |
| } |
| // Move last pointer past the unicode escape and continue. |
| last = p_.data(); |
| continue; |
| } |
| // Handle the standard set of backslash-escaped characters. |
| switch (data[1]) { |
| case 'b': |
| parsed_storage_.push_back('\b'); |
| break; |
| case 'f': |
| parsed_storage_.push_back('\f'); |
| break; |
| case 'n': |
| parsed_storage_.push_back('\n'); |
| break; |
| case 'r': |
| parsed_storage_.push_back('\r'); |
| break; |
| case 't': |
| parsed_storage_.push_back('\t'); |
| break; |
| case 'v': |
| parsed_storage_.push_back('\v'); |
| break; |
| default: |
| parsed_storage_.push_back(data[1]); |
| } |
| // We handled two characters, so advance past them and continue. |
| p_.remove_prefix(2); |
| last = p_.data(); |
| continue; |
| } |
| // If we found the closing quote note it, advance past it, and return. |
| if (*data == string_open_) { |
| // If we didn't copy anything, reuse the input buffer. |
| if (parsed_storage_.empty()) { |
| parsed_ = StringPiece(last, data - last); |
| } else { |
| if (last < data) { |
| parsed_storage_.append(last, data - last); |
| last = data; |
| } |
| parsed_ = StringPiece(parsed_storage_); |
| } |
| // Clear the quote char so next time we try to parse a string we'll |
| // start fresh. |
| string_open_ = 0; |
| Advance(); |
| return util::Status::OK; |
| } |
| // Normal character, just advance past it. |
| Advance(); |
| } |
| // If we ran out of characters, copy over what we have so far. |
| if (last < p_.data()) { |
| parsed_storage_.append(last, p_.data() - last); |
| } |
| // If we didn't find the closing quote but we expect more data, cancel for now |
| if (!finishing_) { |
| return util::Status::CANCELLED; |
| } |
| // End of string reached without a closing quote, report an error. |
| string_open_ = 0; |
| return ReportFailure("Closing quote expected in string."); |
| } |
| |
| // Converts a unicode escaped character to a decimal value stored in a char32 |
| // for use in UTF8 encoding utility. We assume that str begins with \uhhhh and |
| // convert that from the hex number to a decimal value. |
| // |
| // There are some security exploits with UTF-8 that we should be careful of: |
| // - http://www.unicode.org/reports/tr36/#UTF-8_Exploit |
| // - http://sites/intl-eng/design-guide/core-application |
| util::Status JsonStreamParser::ParseUnicodeEscape() { |
| if (p_.length() < kUnicodeEscapedLength) { |
| if (!finishing_) { |
| return util::Status::CANCELLED; |
| } |
| return ReportFailure("Illegal hex string."); |
| } |
| GOOGLE_DCHECK_EQ('\\', p_.data()[0]); |
| GOOGLE_DCHECK_EQ('u', p_.data()[1]); |
| uint32 code = 0; |
| for (int i = 2; i < kUnicodeEscapedLength; ++i) { |
| if (!isxdigit(p_.data()[i])) { |
| return ReportFailure("Invalid escape sequence."); |
| } |
| code = (code << 4) + hex_digit_to_int(p_.data()[i]); |
| } |
| if (code >= JsonEscaping::kMinHighSurrogate && |
| code <= JsonEscaping::kMaxHighSurrogate) { |
| if (p_.length() < 2 * kUnicodeEscapedLength) { |
| if (!finishing_) { |
| return util::Status::CANCELLED; |
| } |
| if (!coerce_to_utf8_) { |
| return ReportFailure("Missing low surrogate."); |
| } |
| } else if (p_.data()[kUnicodeEscapedLength] == '\\' && |
| p_.data()[kUnicodeEscapedLength + 1] == 'u') { |
| uint32 low_code = 0; |
| for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength; |
| ++i) { |
| if (!isxdigit(p_.data()[i])) { |
| return ReportFailure("Invalid escape sequence."); |
| } |
| low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]); |
| } |
| if (low_code >= JsonEscaping::kMinLowSurrogate && |
| low_code <= JsonEscaping::kMaxLowSurrogate) { |
| // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint. |
| code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) + |
| JsonEscaping::kMinSupplementaryCodePoint; |
| // Advance past the first code unit escape. |
| p_.remove_prefix(kUnicodeEscapedLength); |
| } else if (!coerce_to_utf8_) { |
| return ReportFailure("Invalid low surrogate."); |
| } |
| } else if (!coerce_to_utf8_) { |
| return ReportFailure("Missing low surrogate."); |
| } |
| } |
| if (!coerce_to_utf8_ && !IsValidCodePoint(code)) { |
| return ReportFailure("Invalid unicode code point."); |
| } |
| char buf[UTFmax]; |
| int len = EncodeAsUTF8Char(code, buf); |
| // Advance past the [final] code unit escape. |
| p_.remove_prefix(kUnicodeEscapedLength); |
| parsed_storage_.append(buf, len); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ParseNumber() { |
| NumberResult number; |
| util::Status result = ParseNumberHelper(&number); |
| if (result.ok()) { |
| switch (number.type) { |
| case NumberResult::DOUBLE: |
| ow_->RenderDouble(key_, number.double_val); |
| key_.clear(); |
| break; |
| |
| case NumberResult::INT: |
| ow_->RenderInt64(key_, number.int_val); |
| key_.clear(); |
| break; |
| |
| case NumberResult::UINT: |
| ow_->RenderUint64(key_, number.uint_val); |
| key_.clear(); |
| break; |
| |
| default: |
| return ReportFailure("Unable to parse number."); |
| } |
| } |
| return result; |
| } |
| |
| util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { |
| const char* data = p_.data(); |
| int length = p_.length(); |
| |
| // Look for the first non-numeric character, or the end of the string. |
| int index = 0; |
| bool floating = false; |
| bool negative = data[index] == '-'; |
| // Find the first character that cannot be part of the number. Along the way |
| // detect if the number needs to be parsed as a double. |
| // Note that this restricts numbers to the JSON specification, so for example |
| // we do not support hex or octal notations. |
| for (; index < length; ++index) { |
| char c = data[index]; |
| if (isdigit(c)) continue; |
| if (c == '.' || c == 'e' || c == 'E') { |
| floating = true; |
| continue; |
| } |
| if (c == '+' || c == '-' || c == 'x') continue; |
| // Not a valid number character, break out. |
| break; |
| } |
| |
| // If the entire input is a valid number, and we may have more content in the |
| // future, we abort for now and resume when we know more. |
| if (index == length && !finishing_) { |
| return util::Status::CANCELLED; |
| } |
| |
| // Create a string containing just the number, so we can use safe_strtoX |
| string number = p_.substr(0, index).ToString(); |
| |
| // Floating point number, parse as a double. |
| if (floating) { |
| if (!safe_strtod(number, &result->double_val)) { |
| return ReportFailure("Unable to parse number."); |
| } |
| result->type = NumberResult::DOUBLE; |
| p_.remove_prefix(index); |
| return util::Status::OK; |
| } |
| |
| // Positive non-floating point number, parse as a uint64. |
| if (!negative) { |
| // Octal/Hex numbers are not valid JSON values. |
| if (number.length() >= 2 && number[0] == '0') { |
| return ReportFailure("Octal/hex numbers are not valid JSON values."); |
| } |
| if (!safe_strtou64(number, &result->uint_val)) { |
| return ReportFailure("Unable to parse number."); |
| } |
| result->type = NumberResult::UINT; |
| p_.remove_prefix(index); |
| return util::Status::OK; |
| } |
| |
| // Octal/Hex numbers are not valid JSON values. |
| if (number.length() >= 3 && number[1] == '0') { |
| return ReportFailure("Octal/hex numbers are not valid JSON values."); |
| } |
| // Negative non-floating point number, parse as an int64. |
| if (!safe_strto64(number, &result->int_val)) { |
| return ReportFailure("Unable to parse number."); |
| } |
| result->type = NumberResult::INT; |
| p_.remove_prefix(index); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::HandleBeginObject() { |
| GOOGLE_DCHECK_EQ('{', *p_.data()); |
| Advance(); |
| ow_->StartObject(key_); |
| key_.clear(); |
| stack_.push(ENTRY); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ParseObjectMid(TokenType type) { |
| if (type == UNKNOWN) { |
| return ReportUnknown("Expected , or } after key:value pair."); |
| } |
| |
| // Object is complete, advance past the comma and render the EndObject. |
| if (type == END_OBJECT) { |
| Advance(); |
| ow_->EndObject(); |
| return util::Status::OK; |
| } |
| // Found a comma, advance past it and get ready for an entry. |
| if (type == VALUE_SEPARATOR) { |
| Advance(); |
| stack_.push(ENTRY); |
| return util::Status::OK; |
| } |
| // Illegal token after key:value pair. |
| return ReportFailure("Expected , or } after key:value pair."); |
| } |
| |
| util::Status JsonStreamParser::ParseEntry(TokenType type) { |
| if (type == UNKNOWN) { |
| return ReportUnknown("Expected an object key or }."); |
| } |
| |
| // Close the object and return. This allows for trailing commas. |
| if (type == END_OBJECT) { |
| ow_->EndObject(); |
| Advance(); |
| return util::Status::OK; |
| } |
| |
| util::Status result; |
| if (type == BEGIN_STRING) { |
| // Key is a string (standard JSON), parse it and store the string. |
| result = ParseStringHelper(); |
| if (result.ok()) { |
| key_storage_.clear(); |
| if (!parsed_storage_.empty()) { |
| parsed_storage_.swap(key_storage_); |
| key_ = StringPiece(key_storage_); |
| } else { |
| key_ = parsed_; |
| } |
| parsed_.clear(); |
| } |
| } else if (type == BEGIN_KEY) { |
| // Key is a bare key (back compat), create a StringPiece pointing to it. |
| result = ParseKey(); |
| } else { |
| // Unknown key type, report an error. |
| result = ReportFailure("Expected an object key or }."); |
| } |
| // On success we next expect an entry mid ':' then an object mid ',' or '}' |
| if (result.ok()) { |
| stack_.push(OBJ_MID); |
| stack_.push(ENTRY_MID); |
| } |
| return result; |
| } |
| |
| util::Status JsonStreamParser::ParseEntryMid(TokenType type) { |
| if (type == UNKNOWN) { |
| return ReportUnknown("Expected : between key:value pair."); |
| } |
| if (type == ENTRY_SEPARATOR) { |
| Advance(); |
| stack_.push(VALUE); |
| return util::Status::OK; |
| } |
| return ReportFailure("Expected : between key:value pair."); |
| } |
| |
| util::Status JsonStreamParser::HandleBeginArray() { |
| GOOGLE_DCHECK_EQ('[', *p_.data()); |
| Advance(); |
| ow_->StartList(key_); |
| key_.clear(); |
| stack_.push(ARRAY_VALUE); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ParseArrayValue(TokenType type) { |
| if (type == UNKNOWN) { |
| return ReportUnknown("Expected a value or ] within an array."); |
| } |
| |
| if (type == END_ARRAY) { |
| ow_->EndList(); |
| Advance(); |
| return util::Status::OK; |
| } |
| |
| // The ParseValue call may push something onto the stack so we need to make |
| // sure an ARRAY_MID is after it, so we push it on now. |
| stack_.push(ARRAY_MID); |
| util::Status result = ParseValue(type); |
| if (result == util::Status::CANCELLED) { |
| // If we were cancelled, pop back off the ARRAY_MID so we don't try to |
| // push it on again when we try over. |
| stack_.pop(); |
| } |
| return result; |
| } |
| |
| util::Status JsonStreamParser::ParseArrayMid(TokenType type) { |
| if (type == UNKNOWN) { |
| return ReportUnknown("Expected , or ] after array value."); |
| } |
| |
| if (type == END_ARRAY) { |
| ow_->EndList(); |
| Advance(); |
| return util::Status::OK; |
| } |
| |
| // Found a comma, advance past it and expect an array value next. |
| if (type == VALUE_SEPARATOR) { |
| Advance(); |
| stack_.push(ARRAY_VALUE); |
| return util::Status::OK; |
| } |
| // Illegal token after array value. |
| return ReportFailure("Expected , or ] after array value."); |
| } |
| |
| util::Status JsonStreamParser::ParseTrue() { |
| ow_->RenderBool(key_, true); |
| key_.clear(); |
| p_.remove_prefix(true_len); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ParseFalse() { |
| ow_->RenderBool(key_, false); |
| key_.clear(); |
| p_.remove_prefix(false_len); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ParseNull() { |
| ow_->RenderNull(key_); |
| key_.clear(); |
| p_.remove_prefix(null_len); |
| return util::Status::OK; |
| } |
| |
| util::Status JsonStreamParser::ReportFailure(StringPiece message) { |
| static const int kContextLength = 20; |
| const char* p_start = p_.data(); |
| const char* json_start = json_.data(); |
| const char* begin = std::max(p_start - kContextLength, json_start); |
| const char* end = |
| std::min(p_start + kContextLength, json_start + json_.size()); |
| StringPiece segment(begin, end - begin); |
| string location(p_start - begin, ' '); |
| location.push_back('^'); |
| return util::Status(util::error::INVALID_ARGUMENT, |
| StrCat(message, "\n", segment, "\n", location)); |
| } |
| |
| util::Status JsonStreamParser::ReportUnknown(StringPiece message) { |
| // If we aren't finishing the parse, cancel parsing and try later. |
| if (!finishing_) { |
| return util::Status::CANCELLED; |
| } |
| if (p_.empty()) { |
| return ReportFailure(StrCat("Unexpected end of string. ", message)); |
| } |
| return ReportFailure(message); |
| } |
| |
| void JsonStreamParser::SkipWhitespace() { |
| while (!p_.empty() && ascii_isspace(*p_.data())) { |
| Advance(); |
| } |
| } |
| |
| void JsonStreamParser::Advance() { |
| // Advance by moving one UTF8 character while making sure we don't go beyond |
| // the length of StringPiece. |
| p_.remove_prefix(std::min<int>( |
| p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length()))); |
| } |
| |
| util::Status JsonStreamParser::ParseKey() { |
| StringPiece original = p_; |
| if (!ConsumeKey(&p_, &key_)) { |
| return ReportFailure("Invalid key or variable name."); |
| } |
| // If we consumed everything but expect more data, reset p_ and cancel since |
| // we can't know if the key was complete or not. |
| if (!finishing_ && p_.empty()) { |
| p_ = original; |
| return util::Status::CANCELLED; |
| } |
| // Since we aren't using the key storage, clear it out. |
| key_storage_.clear(); |
| return util::Status::OK; |
| } |
| |
| JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() { |
| SkipWhitespace(); |
| |
| int size = p_.size(); |
| if (size == 0) { |
| // If we ran out of data, report unknown and we'll place the previous parse |
| // type onto the stack and try again when we have more data. |
| return UNKNOWN; |
| } |
| // TODO(sven): Split this method based on context since different contexts |
| // support different tokens. Would slightly speed up processing? |
| const char* data = p_.data(); |
| if (*data == '\"' || *data == '\'') return BEGIN_STRING; |
| if (*data == '-' || ('0' <= *data && *data <= '9')) { |
| return BEGIN_NUMBER; |
| } |
| if (size >= true_len && !strncmp(data, "true", true_len)) { |
| return BEGIN_TRUE; |
| } |
| if (size >= false_len && !strncmp(data, "false", false_len)) { |
| return BEGIN_FALSE; |
| } |
| if (size >= null_len && !strncmp(data, "null", null_len)) { |
| return BEGIN_NULL; |
| } |
| if (*data == '{') return BEGIN_OBJECT; |
| if (*data == '}') return END_OBJECT; |
| if (*data == '[') return BEGIN_ARRAY; |
| if (*data == ']') return END_ARRAY; |
| if (*data == ':') return ENTRY_SEPARATOR; |
| if (*data == ',') return VALUE_SEPARATOR; |
| if (MatchKey(p_)) { |
| return BEGIN_KEY; |
| } |
| |
| // We don't know that we necessarily have an invalid token here, just that we |
| // can't parse what we have so far. So we don't report an error and just |
| // return UNKNOWN so we can try again later when we have more data, or if we |
| // finish and we have leftovers. |
| return UNKNOWN; |
| } |
| |
| } // namespace converter |
| } // namespace util |
| } // namespace protobuf |
| } // namespace google |