blob: 1557f51a7f6445be72eb5fdd8a3bfdd33a6a0156 [file] [log] [blame]
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/trace_processor/importers/json/json_trace_tokenizer.h"
#include <memory>
#include "perfetto/base/build_config.h"
#include "perfetto/ext/base/string_utils.h"
#include "perfetto/trace_processor/trace_blob_view.h"
#include "src/trace_processor/importers/json/json_utils.h"
#include "src/trace_processor/sorter/trace_sorter.h"
#include "src/trace_processor/storage/stats.h"
#include "src/trace_processor/util/status_macros.h"
namespace perfetto {
namespace trace_processor {
namespace {
base::Status AppendUnescapedCharacter(char c,
bool is_escaping,
std::string* key) {
if (is_escaping) {
switch (c) {
case '"':
case '\\':
case '/':
key->push_back(c);
break;
case 'b':
key->push_back('\b');
break;
case 'f':
key->push_back('\f');
break;
case 'n':
key->push_back('\n');
break;
case 'r':
key->push_back('\r');
break;
case 't':
key->push_back('\t');
break;
case 'u':
// Just pass through \uxxxx escape sequences which JSON supports but is
// not worth the effort to parse as we never use them here.
key->append("\\u");
break;
default:
return base::ErrStatus("Illegal character in JSON");
}
} else if (c != '\\') {
key->push_back(c);
}
return base::OkStatus();
}
enum class ReadStringRes {
kEndOfString,
kNeedsMoreData,
kFatalError,
};
ReadStringRes ReadOneJsonString(const char* start,
const char* end,
std::string* key,
const char** next) {
if (start == end) {
return ReadStringRes::kNeedsMoreData;
}
if (*start != '"') {
return ReadStringRes::kFatalError;
}
bool is_escaping = false;
for (const char* s = start + 1; s < end; s++) {
// Control characters are not allowed in JSON strings.
if (iscntrl(*s))
return ReadStringRes::kFatalError;
// If we get a quote character end of the string.
if (*s == '"' && !is_escaping) {
*next = s + 1;
return ReadStringRes::kEndOfString;
}
base::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
if (!status.ok())
return ReadStringRes::kFatalError;
// If we're in a string and we see a backslash and the last character was
// not a backslash the next character is escaped:
is_escaping = *s == '\\' && !is_escaping;
}
return ReadStringRes::kNeedsMoreData;
}
enum class SkipValueRes {
kEndOfValue,
kNeedsMoreData,
kFatalError,
};
SkipValueRes SkipOneJsonValue(const char* start,
const char* end,
const char** next) {
uint32_t brace_count = 0;
uint32_t bracket_count = 0;
for (const char* s = start; s < end; s++) {
if (*s == '"') {
// Because strings can contain {}[] characters, handle them separately
// before anything else.
std::string ignored;
const char* str_next = nullptr;
switch (ReadOneJsonString(s, end, &ignored, &str_next)) {
case ReadStringRes::kFatalError:
return SkipValueRes::kFatalError;
case ReadStringRes::kNeedsMoreData:
return SkipValueRes::kNeedsMoreData;
case ReadStringRes::kEndOfString:
// -1 as the loop body will +1 getting to the correct place.
s = str_next - 1;
break;
}
continue;
}
if (brace_count == 0 && bracket_count == 0 && (*s == ',' || *s == '}')) {
// Regardless of a comma or brace, this will be skipped by the caller so
// just set it to this character.
*next = s;
return SkipValueRes::kEndOfValue;
}
if (*s == '[') {
++bracket_count;
continue;
}
if (*s == ']') {
if (bracket_count == 0) {
return SkipValueRes::kFatalError;
}
--bracket_count;
continue;
}
if (*s == '{') {
++brace_count;
continue;
}
if (*s == '}') {
if (brace_count == 0) {
return SkipValueRes::kFatalError;
}
--brace_count;
continue;
}
}
return SkipValueRes::kNeedsMoreData;
}
base::Status SetOutAndReturn(const char* ptr, const char** out) {
*out = ptr;
return base::OkStatus();
}
} // namespace
ReadDictRes ReadOneJsonDict(const char* start,
const char* end,
base::StringView* value,
const char** next) {
int braces = 0;
int square_brackets = 0;
const char* dict_begin = nullptr;
bool in_string = false;
bool is_escaping = false;
for (const char* s = start; s < end; s++) {
if (isspace(*s) || *s == ',')
continue;
if (*s == '"' && !is_escaping) {
in_string = !in_string;
continue;
}
if (in_string) {
// If we're in a string and we see a backslash and the last character was
// not a backslash the next character is escaped:
is_escaping = *s == '\\' && !is_escaping;
// If we're currently parsing a string we should ignore otherwise special
// characters:
continue;
}
if (*s == '{') {
if (braces == 0)
dict_begin = s;
braces++;
continue;
}
if (*s == '}') {
if (braces <= 0)
return ReadDictRes::kEndOfTrace;
if (--braces > 0)
continue;
size_t len = static_cast<size_t>((s + 1) - dict_begin);
*value = base::StringView(dict_begin, len);
*next = s + 1;
return ReadDictRes::kFoundDict;
}
if (*s == '[') {
square_brackets++;
continue;
}
if (*s == ']') {
if (square_brackets == 0) {
// We've reached the end of [traceEvents] array.
// There might be other top level keys in the json (e.g. metadata)
// after.
*next = s + 1;
return ReadDictRes::kEndOfArray;
}
square_brackets--;
}
}
return ReadDictRes::kNeedsMoreData;
}
ReadKeyRes ReadOneJsonKey(const char* start,
const char* end,
std::string* key,
const char** next) {
enum class NextToken {
kStringOrEndOfDict,
kColon,
kValue,
};
NextToken next_token = NextToken::kStringOrEndOfDict;
for (const char* s = start; s < end; s++) {
// Whitespace characters anywhere can be skipped.
if (isspace(*s))
continue;
switch (next_token) {
case NextToken::kStringOrEndOfDict: {
// If we see a closing brace, that means we've reached the end of the
// wrapping dictionary.
if (*s == '}') {
*next = s + 1;
return ReadKeyRes::kEndOfDictionary;
}
// If we see a comma separator, just ignore it.
if (*s == ',')
continue;
auto res = ReadOneJsonString(s, end, key, &s);
if (res == ReadStringRes::kFatalError)
return ReadKeyRes::kFatalError;
if (res == ReadStringRes::kNeedsMoreData)
return ReadKeyRes::kNeedsMoreData;
// We need to decrement from the pointer as the loop will increment
// it back up.
s--;
next_token = NextToken::kColon;
break;
}
case NextToken::kColon:
if (*s != ':')
return ReadKeyRes::kFatalError;
next_token = NextToken::kValue;
break;
case NextToken::kValue:
// Allowed value starting chars: [ { digit - "
// Also allowed: true, false, null. For simplicities sake, we only check
// against the first character as we're not trying to be super accurate.
if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
*s == 't' || *s == 'f' || *s == 'n') {
*next = s;
return ReadKeyRes::kFoundKey;
}
return ReadKeyRes::kFatalError;
}
}
return ReadKeyRes::kNeedsMoreData;
}
base::Status ExtractValueForJsonKey(base::StringView dict,
const std::string& key,
std::optional<std::string>* value) {
PERFETTO_DCHECK(dict.size() >= 2);
const char* start = dict.data();
const char* end = dict.data() + dict.size();
enum ExtractValueState {
kBeforeDict,
kInsideDict,
kAfterDict,
};
ExtractValueState state = kBeforeDict;
for (const char* s = start; s < end;) {
if (isspace(*s)) {
++s;
continue;
}
if (state == kBeforeDict) {
if (*s == '{') {
++s;
state = kInsideDict;
continue;
}
return base::ErrStatus("Unexpected character before JSON dict");
}
if (state == kAfterDict)
return base::ErrStatus("Unexpected character after JSON dict");
PERFETTO_DCHECK(state == kInsideDict);
PERFETTO_DCHECK(s < end);
if (*s == '}') {
++s;
state = kAfterDict;
continue;
}
std::string current_key;
auto res = ReadOneJsonKey(s, end, &current_key, &s);
if (res == ReadKeyRes::kEndOfDictionary)
break;
if (res == ReadKeyRes::kFatalError) {
return base::ErrStatus(
"Failure parsing JSON: encountered fatal error while parsing key for "
"value");
}
if (res == ReadKeyRes::kNeedsMoreData) {
return base::ErrStatus("Failure parsing JSON: partial JSON dictionary");
}
PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
if (*s == '[') {
return base::ErrStatus(
"Failure parsing JSON: unsupported JSON dictionary with array");
}
std::string value_str;
if (*s == '{') {
base::StringView dict_str;
ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
if (dict_res == ReadDictRes::kNeedsMoreData ||
dict_res == ReadDictRes::kEndOfArray ||
dict_res == ReadDictRes::kEndOfTrace) {
return base::ErrStatus(
"Failure parsing JSON: unable to parse dictionary");
}
value_str = dict_str.ToStdString();
} else if (*s == '"') {
auto str_res = ReadOneJsonString(s, end, &value_str, &s);
if (str_res == ReadStringRes::kNeedsMoreData ||
str_res == ReadStringRes::kFatalError) {
return base::ErrStatus("Failure parsing JSON: unable to parse string");
}
} else {
const char* value_start = s;
const char* value_end = end;
for (; s < end; ++s) {
if (*s == ',' || isspace(*s) || *s == '}') {
value_end = s;
break;
}
}
value_str = std::string(value_start, value_end);
}
if (key == current_key) {
*value = value_str;
return base::OkStatus();
}
}
if (state != kAfterDict)
return base::ErrStatus("Failure parsing JSON: malformed dictionary");
*value = std::nullopt;
return base::OkStatus();
}
ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
const char* end,
std::string* line,
const char** next) {
bool is_escaping = false;
for (const char* s = start; s < end; s++) {
// If we get a quote character and we're not escaping, we are done with the
// system trace string.
if (*s == '"' && !is_escaping) {
*next = s + 1;
return ReadSystemLineRes::kEndOfSystemTrace;
}
// If we are escaping n, that means this is a new line which is a delimiter
// for a system trace line.
if (*s == 'n' && is_escaping) {
*next = s + 1;
return ReadSystemLineRes::kFoundLine;
}
base::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
if (!status.ok())
return ReadSystemLineRes::kFatalError;
// If we're in a string and we see a backslash and the last character was
// not a backslash the next character is escaped:
is_escaping = *s == '\\' && !is_escaping;
}
return ReadSystemLineRes::kNeedsMoreData;
}
JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
: context_(ctx) {}
JsonTraceTokenizer::~JsonTraceTokenizer() = default;
base::Status JsonTraceTokenizer::Parse(TraceBlobView blob) {
PERFETTO_DCHECK(json::IsJsonSupported());
buffer_.insert(buffer_.end(), blob.data(), blob.data() + blob.size());
const char* buf = buffer_.data();
const char* next = buf;
const char* end = buf + buffer_.size();
if (offset_ == 0) {
// Strip leading whitespace.
while (next != end && isspace(*next)) {
next++;
}
if (next == end) {
return base::ErrStatus(
"Failure parsing JSON: first chunk has only whitespace");
}
// Trace could begin in any of these ways:
// {"traceEvents":[{
// { "traceEvents": [{
// [{
if (*next != '{' && *next != '[') {
return base::ErrStatus(
"Failure parsing JSON: first non-whitespace character is not [ or {");
}
// Figure out the format of the JSON file based on the first non-whitespace
// character.
format_ = *next == '{' ? TraceFormat::kOuterDictionary
: TraceFormat::kOnlyTraceEvents;
// Skip the '[' or '{' character.
next++;
// Set our current position based on the format of the trace.
position_ = format_ == TraceFormat::kOuterDictionary
? TracePosition::kDictionaryKey
: TracePosition::kInsideTraceEventsArray;
}
RETURN_IF_ERROR(ParseInternal(next, end, &next));
offset_ += static_cast<uint64_t>(next - buf);
buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
return base::OkStatus();
}
base::Status JsonTraceTokenizer::ParseInternal(const char* start,
const char* end,
const char** out) {
PERFETTO_DCHECK(json::IsJsonSupported());
switch (position_) {
case TracePosition::kDictionaryKey:
return HandleDictionaryKey(start, end, out);
case TracePosition::kInsideSystemTraceEventsString:
return HandleSystemTraceEvent(start, end, out);
case TracePosition::kInsideTraceEventsArray:
return HandleTraceEvent(start, end, out);
case TracePosition::kEof: {
return start == end
? base::OkStatus()
: base::ErrStatus(
"Failure parsing JSON: tried to parse data after EOF");
}
}
PERFETTO_FATAL("For GCC");
}
base::Status JsonTraceTokenizer::HandleTraceEvent(const char* start,
const char* end,
const char** out) {
const char* next = start;
while (next < end) {
base::StringView unparsed;
switch (ReadOneJsonDict(next, end, &unparsed, &next)) {
case ReadDictRes::kEndOfArray: {
if (format_ == TraceFormat::kOnlyTraceEvents) {
position_ = TracePosition::kEof;
return SetOutAndReturn(next, out);
}
position_ = TracePosition::kDictionaryKey;
return ParseInternal(next, end, out);
}
case ReadDictRes::kEndOfTrace:
position_ = TracePosition::kEof;
return SetOutAndReturn(next, out);
case ReadDictRes::kNeedsMoreData:
return SetOutAndReturn(next, out);
case ReadDictRes::kFoundDict:
break;
}
std::optional<std::string> opt_raw_ts;
RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
std::optional<int64_t> opt_ts =
opt_raw_ts ? json::CoerceToTs(*opt_raw_ts) : std::nullopt;
int64_t ts = 0;
if (opt_ts.has_value()) {
ts = opt_ts.value();
} else {
// Metadata events may omit ts. In all other cases error:
std::optional<std::string> opt_raw_ph;
RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
if (!opt_raw_ph || *opt_raw_ph != "M") {
context_->storage->IncrementStats(stats::json_tokenizer_failure);
continue;
}
}
context_->sorter->PushJsonValue(ts, unparsed.ToStdString());
}
return SetOutAndReturn(next, out);
}
base::Status JsonTraceTokenizer::HandleDictionaryKey(const char* start,
const char* end,
const char** out) {
if (format_ != TraceFormat::kOuterDictionary) {
return base::ErrStatus(
"Failure parsing JSON: illegal format when parsing dictionary key");
}
const char* next = start;
std::string key;
switch (ReadOneJsonKey(start, end, &key, &next)) {
case ReadKeyRes::kFatalError:
return base::ErrStatus(
"Failure parsing JSON: encountered fatal error while parsing key");
case ReadKeyRes::kEndOfDictionary:
position_ = TracePosition::kEof;
return SetOutAndReturn(next, out);
case ReadKeyRes::kNeedsMoreData:
// If we didn't manage to read the key we need to set |out| to |start|
// (*not* |next|) to keep the state machine happy.
return SetOutAndReturn(start, out);
case ReadKeyRes::kFoundKey:
break;
}
// ReadOneJsonKey should ensure that the first character of the value is
// available.
PERFETTO_CHECK(next < end);
if (key == "traceEvents") {
// Skip the [ character opening the array.
if (*next != '[') {
return base::ErrStatus(
"Failure parsing JSON: traceEvents is not an array.");
}
next++;
position_ = TracePosition::kInsideTraceEventsArray;
return ParseInternal(next, end, out);
}
if (key == "systemTraceEvents") {
// Skip the " character opening the string.
if (*next != '"') {
return base::ErrStatus(
"Failure parsing JSON: systemTraceEvents is not an string.");
}
next++;
position_ = TracePosition::kInsideSystemTraceEventsString;
return ParseInternal(next, end, out);
}
if (key == "displayTimeUnit") {
std::string time_unit;
auto result = ReadOneJsonString(next, end, &time_unit, &next);
if (result == ReadStringRes::kFatalError)
return base::ErrStatus("Could not parse displayTimeUnit");
context_->storage->IncrementStats(stats::json_display_time_unit);
return ParseInternal(next, end, out);
}
// If we don't know the key for this JSON value just skip it.
switch (SkipOneJsonValue(next, end, &next)) {
case SkipValueRes::kFatalError:
return base::ErrStatus(
"Failure parsing JSON: error while parsing value for key %s",
key.c_str());
case SkipValueRes::kNeedsMoreData:
// If we didn't manage to read the key *and* the value, we need to set
// |out| to |start| (*not* |next|) to keep the state machine happy (as
// we expect to always see a key before the value).
return SetOutAndReturn(start, out);
case SkipValueRes::kEndOfValue:
return ParseInternal(next, end, out);
}
PERFETTO_FATAL("For GCC");
}
base::Status JsonTraceTokenizer::HandleSystemTraceEvent(const char* start,
const char* end,
const char** out) {
if (format_ != TraceFormat::kOuterDictionary) {
return base::ErrStatus(
"Failure parsing JSON: illegal format when parsing system events");
}
const char* next = start;
while (next < end) {
std::string raw_line;
switch (ReadOneSystemTraceLine(next, end, &raw_line, &next)) {
case ReadSystemLineRes::kFatalError:
return base::ErrStatus(
"Failure parsing JSON: encountered fatal error while parsing "
"event inside trace event string");
case ReadSystemLineRes::kNeedsMoreData:
return SetOutAndReturn(next, out);
case ReadSystemLineRes::kEndOfSystemTrace:
position_ = TracePosition::kDictionaryKey;
return ParseInternal(next, end, out);
case ReadSystemLineRes::kFoundLine:
break;
}
if (base::StartsWith(raw_line, "#") || raw_line.empty())
continue;
SystraceLine line;
RETURN_IF_ERROR(systrace_line_tokenizer_.Tokenize(raw_line, &line));
context_->sorter->PushSystraceLine(std::move(line));
}
return SetOutAndReturn(next, out);
}
void JsonTraceTokenizer::NotifyEndOfFile() {
PERFETTO_DCHECK(position_ == TracePosition::kEof);
}
} // namespace trace_processor
} // namespace perfetto