third_party/perfetto/src/trace_processor/importers/ninja/ninja_log_parser.cc - cobalt - Git at Google

 /*
  * Copyright (C) 2020 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "src/trace_processor/importers/ninja/ninja_log_parser.h"

 #include "perfetto/ext/base/string_splitter.h"
 #include "perfetto/ext/base/string_utils.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/common/slice_tracker.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
 #include "src/trace_processor/sorter/trace_sorter.h"
 #include "src/trace_processor/storage/trace_storage.h"

 namespace perfetto {
 namespace trace_processor {

 using base::StringSplitter;

 NinjaLogParser::NinjaLogParser(TraceProcessorContext* ctx) : ctx_(ctx) {}
 NinjaLogParser::~NinjaLogParser() = default;

 util::Status NinjaLogParser::Parse(TraceBlobView blob) {
   // A trace is read in chunks of arbitrary size (for http fetch() pipeliniing),
   // not necessarily aligned on a line boundary.
   // Here we push everything into a vector and, on each call, consume only
   // the leading part until the last \n, keeping the rest for the next call.
   const char* src = reinterpret_cast<const char*>(blob.data());
   log_.insert(log_.end(), src, src + blob.size());

   // Find the last \n.
   size_t valid_size = log_.size();
   for (; valid_size > 0 && log_[valid_size - 1] != '\n'; --valid_size) {
   }

   for (StringSplitter line(log_.data(), valid_size, '\n'); line.Next();) {
     static const char kHeader[] = "# ninja log v";
     if (!header_parsed_) {
       if (!base::StartsWith(line.cur_token(), kHeader))
         return util::ErrStatus("Failed to parse ninja log header");
       header_parsed_ = true;
       auto version = base::CStringToUInt32(line.cur_token() + strlen(kHeader));
       if (!version || *version != 5)
         return util::ErrStatus("Unsupported ninja log version");
       continue;
     }

     // Each line in the ninja log looks like this:
     // 4 12  1579224178  ui/assets/modal.scss  832a958a9e234dfa
     // Where:
     // - [4, 12] are the timestamps in ms of [start, end] of the job, measured
     //     from the beginning of the build.
     // - 1579224178 is the "restat" (ignored).
     // - ui/assets/modal.scss is the name of the output file being built.
     // - 832a958a9e234dfa is a hash of the compiler invocation.
     // In most cases, each hash should be unique per ninja invocation (because
     // two rules shouln't generate the same output). However, in rare
     // circumstances the same hash can show up more than once. Examples:
     // - A GN action generates > 1 output per invocation (e.g., protos). In this
     //   case all items will have the same [start, end] timestamp. In this case
     //   we want to merge all the output names into one build step, because from
     //   the build system viewpoint, that was the same compiler/tool invocation.
     // - A subtle script that generates different outputs without taking a
     //   --output=filename argument (e.g. via env vars or similar). Note that
     //   this happens in the perfetto codebase itself (goto.google.com/nigew).
     //   In this case we want to treat the two entries as two distinct jobs.
     //
     // In summary the deduping logic here is: if both the hash and the
     // timestamps match -> merge, if not, keep distinct.
     StringSplitter tok(&line, '\t');
     auto t_start = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
     auto t_end = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
     tok.Next();  // Ignore restat.
     const char* name = tok.Next() ? tok.cur_token() : nullptr;
     auto cmdhash = base::CStringToUInt64(tok.Next() ? tok.cur_token() : "", 16);

     if (!t_start || !t_end || !name || !cmdhash) {
       ctx_->storage->IncrementStats(stats::ninja_parse_errors);
       continue;
     }

     // If more hashes show up back-to-back with the same timestamps, merge them
     // together as they identify multiple outputs for the same build rule.
     // TODO(lalitm): this merging should really happen in NotifyEndOfFile
     // because we want to merge across builds. However, this needs some
     // non-significant rework of this class so it's not been found to be worth
     // implementing yet.
     if (!jobs_.empty() && *cmdhash == jobs_.back().hash &&
         *t_start == jobs_.back().start_ms && *t_end == jobs_.back().end_ms) {
       jobs_.back().names.append(" ");
       jobs_.back().names.append(name);
       continue;
     }

     jobs_.emplace_back(*t_start, *t_end, *cmdhash, name);
   }
   log_.erase(log_.begin(), log_.begin() + static_cast<ssize_t>(valid_size));
   return util::OkStatus();
 }

 // This is called after the last Parse() call. At this point all |jobs_| have
 // been populated.
 void NinjaLogParser::NotifyEndOfFile() {
   std::sort(jobs_.begin(), jobs_.end(),
             [](const Job& x, const Job& y) { return x.start_ms < y.start_ms; });

   // Now we need to work out the job parallelism. There's no direct indication
   // of that in the ninja logs, so it must be inferred by observing overlapping
   // of timestamps. In this context a "Worker" is an inferred sequence of jobs
   // that happened concurrently with other sequences.
   // Here we pack jobs according the following heuristic, for the sake of making
   // the graph nicer to read to humans. Consider the initial situation:
   // 1: [  job 1 ]
   // 2:   [   job 2   ]
   // 3: [   job 3   ]
   //    T=0              | T=6
   // Assume that a new job starts at T=6. It's very likely that job4 was started
   // as a consequence of job2 completion (othewise it could have been started
   // earlier, soon after job 1 or Job 3). It seems to make more sense to draw
   // it next in the 2nd worker, i.e. next to job 2.
   struct Worker {
     int64_t busy_until;
     TrackId track_id;
   };
   std::vector<Worker> workers;
   for (const auto& job : jobs_) {
     Worker* worker = nullptr;
     for (Worker& cur : workers) {
       // Pick the worker which has the greatest end time (busy_until) <= the
       // job's start time.
       if (cur.busy_until <= job.start_ms) {
         if (!worker || worker->busy_until < cur.busy_until)
           worker = &cur;
       }
     }
     if (worker) {
       // Update the worker's end time with the newly assigned job.
       worker->busy_until = job.end_ms;
     } else {
       static constexpr uint32_t kSyntheticNinjaPid = 1;

       // All workers are busy, allocate a new one.
       uint32_t worker_id = static_cast<uint32_t>(workers.size()) + 1;
       ctx_->process_tracker->SetProcessNameIfUnset(
           ctx_->process_tracker->GetOrCreateProcess(kSyntheticNinjaPid),
           ctx_->storage->InternString("Build"));
       auto utid =
           ctx_->process_tracker->UpdateThread(worker_id, kSyntheticNinjaPid);

       base::StackString<32> name("Worker");
       StringId name_id = ctx_->storage->InternString(name.string_view());
       ctx_->process_tracker->UpdateThreadNameByUtid(utid, name_id,
                                                     ThreadNamePriority::kOther);
       TrackId track_id = ctx_->track_tracker->InternThreadTrack(utid);
       workers.emplace_back(Worker{/*busy_until=*/job.end_ms, track_id});
       worker = &workers.back();
     }

     static constexpr int64_t kMsToNs = 1000 * 1000;
     const int64_t start_ns = job.start_ms * kMsToNs;
     const int64_t dur_ns = (job.end_ms - job.start_ms) * kMsToNs;
     StringId name_id = ctx_->storage->InternString(base::StringView(job.names));
     ctx_->slice_tracker->Scoped(start_ns, worker->track_id, StringId::Null(),
                                 name_id, dur_ns);
   }
 }

 }  // namespace trace_processor
 }  // namespace perfetto
	/*
	* Copyright (C) 2020 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "src/trace_processor/importers/ninja/ninja_log_parser.h"

	#include "perfetto/ext/base/string_splitter.h"
	#include "perfetto/ext/base/string_utils.h"
	#include "src/trace_processor/importers/common/process_tracker.h"
	#include "src/trace_processor/importers/common/slice_tracker.h"
	#include "src/trace_processor/importers/common/track_tracker.h"
	#include "src/trace_processor/sorter/trace_sorter.h"
	#include "src/trace_processor/storage/trace_storage.h"

	namespace perfetto {
	namespace trace_processor {

	using base::StringSplitter;

	NinjaLogParser::NinjaLogParser(TraceProcessorContext* ctx) : ctx_(ctx) {}
	NinjaLogParser::~NinjaLogParser() = default;

	util::Status NinjaLogParser::Parse(TraceBlobView blob) {
	// A trace is read in chunks of arbitrary size (for http fetch() pipeliniing),
	// not necessarily aligned on a line boundary.
	// Here we push everything into a vector and, on each call, consume only
	// the leading part until the last \n, keeping the rest for the next call.
	const char* src = reinterpret_cast<const char*>(blob.data());
	log_.insert(log_.end(), src, src + blob.size());

	// Find the last \n.
	size_t valid_size = log_.size();
	for (; valid_size > 0 && log_[valid_size - 1] != '\n'; --valid_size) {
	}

	for (StringSplitter line(log_.data(), valid_size, '\n'); line.Next();) {
	static const char kHeader[] = "# ninja log v";
	if (!header_parsed_) {
	if (!base::StartsWith(line.cur_token(), kHeader))
	return util::ErrStatus("Failed to parse ninja log header");
	header_parsed_ = true;
	auto version = base::CStringToUInt32(line.cur_token() + strlen(kHeader));
	if (!version \|\| *version != 5)
	return util::ErrStatus("Unsupported ninja log version");
	continue;
	}

	// Each line in the ninja log looks like this:
	// 4 12 1579224178 ui/assets/modal.scss 832a958a9e234dfa
	// Where:
	// - [4, 12] are the timestamps in ms of [start, end] of the job, measured
	// from the beginning of the build.
	// - 1579224178 is the "restat" (ignored).
	// - ui/assets/modal.scss is the name of the output file being built.
	// - 832a958a9e234dfa is a hash of the compiler invocation.
	// In most cases, each hash should be unique per ninja invocation (because
	// two rules shouln't generate the same output). However, in rare
	// circumstances the same hash can show up more than once. Examples:
	// - A GN action generates > 1 output per invocation (e.g., protos). In this
	// case all items will have the same [start, end] timestamp. In this case
	// we want to merge all the output names into one build step, because from
	// the build system viewpoint, that was the same compiler/tool invocation.
	// - A subtle script that generates different outputs without taking a
	// --output=filename argument (e.g. via env vars or similar). Note that
	// this happens in the perfetto codebase itself (goto.google.com/nigew).
	// In this case we want to treat the two entries as two distinct jobs.
	//
	// In summary the deduping logic here is: if both the hash and the
	// timestamps match -> merge, if not, keep distinct.
	StringSplitter tok(&line, '\t');
	auto t_start = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
	auto t_end = base::CStringToInt64(tok.Next() ? tok.cur_token() : "");
	tok.Next(); // Ignore restat.
	const char* name = tok.Next() ? tok.cur_token() : nullptr;
	auto cmdhash = base::CStringToUInt64(tok.Next() ? tok.cur_token() : "", 16);

	if (!t_start \|\| !t_end \|\| !name \|\| !cmdhash) {
	ctx_->storage->IncrementStats(stats::ninja_parse_errors);
	continue;
	}

	// If more hashes show up back-to-back with the same timestamps, merge them
	// together as they identify multiple outputs for the same build rule.
	// TODO(lalitm): this merging should really happen in NotifyEndOfFile
	// because we want to merge across builds. However, this needs some
	// non-significant rework of this class so it's not been found to be worth
	// implementing yet.
	if (!jobs_.empty() && *cmdhash == jobs_.back().hash &&
	t_start == jobs_.back().start_ms && t_end == jobs_.back().end_ms) {
	jobs_.back().names.append(" ");
	jobs_.back().names.append(name);
	continue;
	}

	jobs_.emplace_back(t_start, t_end, *cmdhash, name);
	}
	log_.erase(log_.begin(), log_.begin() + static_cast<ssize_t>(valid_size));
	return util::OkStatus();
	}

	// This is called after the last Parse() call. At this point all \|jobs_\| have
	// been populated.
	void NinjaLogParser::NotifyEndOfFile() {
	std::sort(jobs_.begin(), jobs_.end(),
	[](const Job& x, const Job& y) { return x.start_ms < y.start_ms; });

	// Now we need to work out the job parallelism. There's no direct indication
	// of that in the ninja logs, so it must be inferred by observing overlapping
	// of timestamps. In this context a "Worker" is an inferred sequence of jobs
	// that happened concurrently with other sequences.
	// Here we pack jobs according the following heuristic, for the sake of making
	// the graph nicer to read to humans. Consider the initial situation:
	// 1: [ job 1 ]
	// 2: [ job 2 ]
	// 3: [ job 3 ]
	// T=0 \| T=6
	// Assume that a new job starts at T=6. It's very likely that job4 was started
	// as a consequence of job2 completion (othewise it could have been started
	// earlier, soon after job 1 or Job 3). It seems to make more sense to draw
	// it next in the 2nd worker, i.e. next to job 2.
	struct Worker {
	int64_t busy_until;
	TrackId track_id;
	};
	std::vector<Worker> workers;
	for (const auto& job : jobs_) {
	Worker* worker = nullptr;
	for (Worker& cur : workers) {
	// Pick the worker which has the greatest end time (busy_until) <= the
	// job's start time.
	if (cur.busy_until <= job.start_ms) {
	if (!worker \|\| worker->busy_until < cur.busy_until)
	worker = &cur;
	}
	}
	if (worker) {
	// Update the worker's end time with the newly assigned job.
	worker->busy_until = job.end_ms;
	} else {
	static constexpr uint32_t kSyntheticNinjaPid = 1;

	// All workers are busy, allocate a new one.
	uint32_t worker_id = static_cast<uint32_t>(workers.size()) + 1;
	ctx_->process_tracker->SetProcessNameIfUnset(
	ctx_->process_tracker->GetOrCreateProcess(kSyntheticNinjaPid),
	ctx_->storage->InternString("Build"));
	auto utid =
	ctx_->process_tracker->UpdateThread(worker_id, kSyntheticNinjaPid);

	base::StackString<32> name("Worker");
	StringId name_id = ctx_->storage->InternString(name.string_view());
	ctx_->process_tracker->UpdateThreadNameByUtid(utid, name_id,
	ThreadNamePriority::kOther);
	TrackId track_id = ctx_->track_tracker->InternThreadTrack(utid);
	workers.emplace_back(Worker{/busy_until=/job.end_ms, track_id});
	worker = &workers.back();
	}

	static constexpr int64_t kMsToNs = 1000 * 1000;
	const int64_t start_ns = job.start_ms * kMsToNs;
	const int64_t dur_ns = (job.end_ms - job.start_ms) * kMsToNs;
	StringId name_id = ctx_->storage->InternString(base::StringView(job.names));
	ctx_->slice_tracker->Scoped(start_ns, worker->track_id, StringId::Null(),
	name_id, dur_ns);
	}
	}

	} // namespace trace_processor
	} // namespace perfetto