blob: 232590f78823b4014988d67474370ce95d51b859 [file] [log] [blame]
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/trace_processor/importers/ninja/ninja_log_parser.h"
#include "perfetto/ext/base/string_splitter.h"
#include "perfetto/ext/base/string_utils.h"
#include "src/trace_processor/importers/common/process_tracker.h"
#include "src/trace_processor/importers/common/slice_tracker.h"
#include "src/trace_processor/importers/common/track_tracker.h"
#include "src/trace_processor/sorter/trace_sorter.h"
#include "src/trace_processor/storage/trace_storage.h"
namespace perfetto {
namespace trace_processor {
using base::StringSplitter;
NinjaLogParser::NinjaLogParser(TraceProcessorContext* ctx) : ctx_(ctx) {}
NinjaLogParser::~NinjaLogParser() = default;
util::Status NinjaLogParser::Parse(TraceBlobView blob) {
// A trace is read in chunks of arbitrary size (for http fetch() pipeliniing),
// not necessarily aligned on a line boundary.
// Here we push everything into a vector and, on each call, consume only
// the leading part until the last \n, keeping the rest for the next call.
const char* src = reinterpret_cast<const char*>(blob.data());
log_.insert(log_.end(), src, src + blob.size());
// Find the last \n.
size_t valid_size = log_.size();
for (; valid_size > 0 && log_[valid_size - 1] != '\n'; --valid_size) {
}
for (StringSplitter line(log_.data(), valid_size, '\n'); line.Next();) {
static const char kHeader[] = "# ninja log v";
if (!header_parsed_) {
if (!base::StartsWith(line.cur_token(), kHeader))
return util::ErrStatus("Failed to parse ninja log header");
header_parsed_ = true;
auto version = base::CStringToUInt32(line.cur_token() + strlen(kHeader));
if (!version || *version != 5)
return util::ErrStatus("Unsupported ninja log version");
continue;
}
// Each line in the ninja log looks like this:
// 4 12 1579224178 ui/assets/modal.scss 832a958a9e234dfa
// Where:
// - [4, 12] are the timestamps in ms of [start, end] of the job, measured
// from the beginning of the build.
// - 1579224178 is the "restat" (ignored).
// - ui/assets/modal.scss is the name of the output file being built.
// - 832a958a9e234dfa is a hash of the compiler invocation.
// In most cases, each hash should be unique per ninja invocation (because
// two rules shouln't generate the same output). However, in rare
// circumstances the same hash can show up more than once. Examples:
// - A GN action generates > 1 output per invocation (e.g., protos). In this
// case all items will have the same [start, end] timestamp. In this case
// we want to merge all the output names into one build step, because from
// the build system viewpoint, that was the same compiler/tool invocation.
// - A subtle script that generates different outputs without taking a
// --output=filename argument (e.g. via env vars or similar). Note that
// this happens in the perfetto codebase itself (goto.google.com/nigew).
// In this case we want to treat the two entries as two distinct jobs.
//
// In summary the deduping logic here is: if both the hash and the
// timestamps match -> merge, if not, keep distinct.
StringSplitter tok(&line, '\t');
auto t_start = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
auto t_end = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
tok.Next(); // Ignore restat.
const char* name = tok.Next() ? tok.cur_token() : nullptr;
auto cmdhash = base::CStringToUInt64(tok.Next() ? tok.cur_token() : "", 16);
if (!t_start || !t_end || !name || !cmdhash) {
ctx_->storage->IncrementStats(stats::ninja_parse_errors);
continue;
}
// If more hashes show up back-to-back with the same timestamps, merge them
// together as they identify multiple outputs for the same build rule.
// TODO(lalitm): this merging should really happen in NotifyEndOfFile
// because we want to merge across builds. However, this needs some
// non-significant rework of this class so it's not been found to be worth
// implementing yet.
if (!jobs_.empty() && *cmdhash == jobs_.back().hash &&
*t_start == jobs_.back().start_ms && *t_end == jobs_.back().end_ms) {
jobs_.back().names.append(" ");
jobs_.back().names.append(name);
continue;
}
jobs_.emplace_back(*t_start, *t_end, *cmdhash, name);
}
log_.erase(log_.begin(), log_.begin() + static_cast<ssize_t>(valid_size));
return util::OkStatus();
}
// This is called after the last Parse() call. At this point all |jobs_| have
// been populated.
void NinjaLogParser::NotifyEndOfFile() {
std::sort(jobs_.begin(), jobs_.end(),
[](const Job& x, const Job& y) { return x.start_ms < y.start_ms; });
// Now we need to work out the job parallelism. There's no direct indication
// of that in the ninja logs, so it must be inferred by observing overlapping
// of timestamps. In this context a "Worker" is an inferred sequence of jobs
// that happened concurrently with other sequences.
// Here we pack jobs according the following heuristic, for the sake of making
// the graph nicer to read to humans. Consider the initial situation:
// 1: [ job 1 ]
// 2: [ job 2 ]
// 3: [ job 3 ]
// T=0 | T=6
// Assume that a new job starts at T=6. It's very likely that job4 was started
// as a consequence of job2 completion (othewise it could have been started
// earlier, soon after job 1 or Job 3). It seems to make more sense to draw
// it next in the 2nd worker, i.e. next to job 2.
struct Worker {
int64_t busy_until;
TrackId track_id;
};
std::vector<Worker> workers;
for (const auto& job : jobs_) {
Worker* worker = nullptr;
for (Worker& cur : workers) {
// Pick the worker which has the greatest end time (busy_until) <= the
// job's start time.
if (cur.busy_until <= job.start_ms) {
if (!worker || worker->busy_until < cur.busy_until)
worker = &cur;
}
}
if (worker) {
// Update the worker's end time with the newly assigned job.
worker->busy_until = job.end_ms;
} else {
static constexpr uint32_t kSyntheticNinjaPid = 1;
// All workers are busy, allocate a new one.
uint32_t worker_id = static_cast<uint32_t>(workers.size()) + 1;
ctx_->process_tracker->SetProcessNameIfUnset(
ctx_->process_tracker->GetOrCreateProcess(kSyntheticNinjaPid),
ctx_->storage->InternString("Build"));
auto utid =
ctx_->process_tracker->UpdateThread(worker_id, kSyntheticNinjaPid);
base::StackString<32> name("Worker");
StringId name_id = ctx_->storage->InternString(name.string_view());
ctx_->process_tracker->UpdateThreadNameByUtid(utid, name_id,
ThreadNamePriority::kOther);
TrackId track_id = ctx_->track_tracker->InternThreadTrack(utid);
workers.emplace_back(Worker{/*busy_until=*/job.end_ms, track_id});
worker = &workers.back();
}
static constexpr int64_t kMsToNs = 1000 * 1000;
const int64_t start_ns = job.start_ms * kMsToNs;
const int64_t dur_ns = (job.end_ms - job.start_ms) * kMsToNs;
StringId name_id = ctx_->storage->InternString(base::StringView(job.names));
ctx_->slice_tracker->Scoped(start_ns, worker->track_id, StringId::Null(),
name_id, dur_ns);
}
}
} // namespace trace_processor
} // namespace perfetto