blob: 8e81cb0405527c767712ffab0456dbaad4e7369d [file] [log] [blame]
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/protozero/filtering/filter_util.h"
#include <algorithm>
#include <deque>
#include <map>
#include <memory>
#include <set>
#include <google/protobuf/compiler/importer.h>
#include "perfetto/base/build_config.h"
#include "perfetto/ext/base/file_utils.h"
#include "perfetto/ext/base/getopt.h"
#include "perfetto/ext/base/string_utils.h"
#include "perfetto/protozero/proto_utils.h"
#include "src/protozero/filtering/filter_bytecode_generator.h"
#include "src/protozero/filtering/filter_bytecode_parser.h"
namespace protozero {
namespace {
class MultiFileErrorCollectorImpl
: public google::protobuf::compiler::MultiFileErrorCollector {
public:
~MultiFileErrorCollectorImpl() override;
void AddError(const std::string&, int, int, const std::string&) override;
void AddWarning(const std::string&, int, int, const std::string&) override;
};
MultiFileErrorCollectorImpl::~MultiFileErrorCollectorImpl() = default;
void MultiFileErrorCollectorImpl::AddError(const std::string& filename,
int line,
int column,
const std::string& message) {
PERFETTO_ELOG("Error %s %d:%d: %s", filename.c_str(), line, column,
message.c_str());
}
void MultiFileErrorCollectorImpl::AddWarning(const std::string& filename,
int line,
int column,
const std::string& message) {
PERFETTO_ELOG("Warning %s %d:%d: %s", filename.c_str(), line, column,
message.c_str());
}
} // namespace
FilterUtil::FilterUtil() = default;
FilterUtil::~FilterUtil() = default;
bool FilterUtil::LoadMessageDefinition(const std::string& proto_file,
const std::string& root_message,
const std::string& proto_dir_path) {
// The protobuf compiler doesn't like backslashes and prints an error like:
// Error C:\it7mjanpw3\perfetto-a16500 -1:0: Backslashes, consecutive slashes,
// ".", or ".." are not allowed in the virtual path.
// Given that C:\foo\bar is a legit path on windows, fix it at this level
// because the problem is really the protobuf compiler being too picky.
static auto normalize_for_win = [](const std::string& path) {
#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
return perfetto::base::ReplaceAll(path, "\\", "/");
#else
return path;
#endif
};
google::protobuf::compiler::DiskSourceTree dst;
#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
// If the path is absolute, maps "C:/" -> "C:/" (without hardcoding 'C').
if (proto_file.size() > 3 && proto_file[1] == ':') {
char win_drive[4]{proto_file[0], ':', '/', '\0'};
dst.MapPath(win_drive, win_drive);
}
#endif
dst.MapPath("/", "/"); // We might still need this on Win under cygwin.
dst.MapPath("", normalize_for_win(proto_dir_path));
MultiFileErrorCollectorImpl mfe;
google::protobuf::compiler::Importer importer(&dst, &mfe);
const google::protobuf::FileDescriptor* root_file =
importer.Import(normalize_for_win(proto_file));
const google::protobuf::Descriptor* root_msg = nullptr;
if (!root_message.empty()) {
root_msg = importer.pool()->FindMessageTypeByName(root_message);
} else if (root_file->message_type_count() > 0) {
// The user didn't specfy the root type. Pick the first type in the file,
// most times it's the right guess.
root_msg = root_file->message_type(0);
if (root_msg)
PERFETTO_LOG(
"The guessed root message name is \"%s\". Pass -r com.MyName to "
"override",
root_msg->full_name().c_str());
}
if (!root_msg) {
PERFETTO_ELOG("Could not find the root message \"%s\" in %s",
root_message.c_str(), proto_file.c_str());
return false;
}
// |descriptors_by_full_name| is passed by argument rather than being a member
// field so that we don't risk leaving it out of sync (and depending on it in
// future without realizing) when performing the Dedupe() pass.
DescriptorsByNameMap descriptors_by_full_name;
ParseProtoDescriptor(root_msg, &descriptors_by_full_name);
return true;
}
// Generates a Message object for the given libprotobuf message descriptor.
// Recurses as needed into nested fields.
FilterUtil::Message* FilterUtil::ParseProtoDescriptor(
const google::protobuf::Descriptor* proto,
DescriptorsByNameMap* descriptors_by_full_name) {
auto descr_it = descriptors_by_full_name->find(proto->full_name());
if (descr_it != descriptors_by_full_name->end())
return descr_it->second;
descriptors_.emplace_back();
Message* msg = &descriptors_.back();
msg->full_name = proto->full_name();
(*descriptors_by_full_name)[msg->full_name] = msg;
for (int i = 0; i < proto->field_count(); ++i) {
const auto* proto_field = proto->field(i);
const uint32_t field_id = static_cast<uint32_t>(proto_field->number());
PERFETTO_CHECK(msg->fields.count(field_id) == 0);
auto& field = msg->fields[field_id];
field.name = proto_field->name();
field.type = proto_field->type_name();
if (proto_field->message_type()) {
msg->has_nested_fields = true;
// Recurse.
field.nested_type = ParseProtoDescriptor(proto_field->message_type(),
descriptors_by_full_name);
}
}
return msg;
}
void FilterUtil::Dedupe() {
std::map<std::string /*identity*/, Message*> index;
std::map<Message*, Message*> dupe_graph; // K,V: K shall be duped against V.
// As a first pass, generate an |identity| string for each leaf message. The
// identity is simply the comma-separated stringification of its field ids.
// If another message with the same identity exists, add an edge to the graph.
const size_t initial_count = descriptors_.size();
size_t field_count = 0;
for (auto& descr : descriptors_) {
if (descr.has_nested_fields)
continue; // Dedupe only leaf messages without nested fields.
std::string identity;
for (const auto& id_and_field : descr.fields)
identity.append(std::to_string(id_and_field.first) + ",");
auto it_and_inserted = index.emplace(identity, &descr);
if (!it_and_inserted.second) {
// insertion failed, a dupe exists already.
Message* dupe_against = it_and_inserted.first->second;
dupe_graph.emplace(&descr, dupe_against);
}
}
// Now apply de-duplications by re-directing the nested_type pointer to the
// equivalent descriptors that have the same set of allowed field ids.
std::set<Message*> referenced_descriptors;
referenced_descriptors.emplace(&descriptors_.front()); // The root.
for (auto& descr : descriptors_) {
for (auto& id_and_field : descr.fields) {
Message* target = id_and_field.second.nested_type;
if (!target)
continue; // Only try to dedupe nested types.
auto it = dupe_graph.find(target);
if (it == dupe_graph.end()) {
referenced_descriptors.emplace(target);
continue;
}
++field_count;
// Replace with the dupe.
id_and_field.second.nested_type = it->second;
} // for (nested_fields).
} // for (descriptors_).
// Remove unreferenced descriptors. We should much rather crash in the case of
// a logic bug rathern than trying to use them but don't emit them.
size_t removed_count = 0;
for (auto it = descriptors_.begin(); it != descriptors_.end();) {
if (referenced_descriptors.count(&*it)) {
++it;
} else {
++removed_count;
it = descriptors_.erase(it);
}
}
PERFETTO_LOG(
"Deduplication removed %zu duped descriptors out of %zu descriptors from "
"%zu fields",
removed_count, initial_count, field_count);
}
// Prints the list of messages and fields in a diff-friendly text format.
void FilterUtil::PrintAsText(std::optional<std::string> filter_bytecode) {
using perfetto::base::StripPrefix;
const std::string& root_name = descriptors_.front().full_name;
std::string root_prefix = root_name.substr(0, root_name.rfind('.'));
if (!root_prefix.empty())
root_prefix.append(".");
FilterBytecodeParser parser;
if (filter_bytecode) {
PERFETTO_CHECK(
parser.Load(filter_bytecode->data(), filter_bytecode->size()));
}
// <Filter msg_index, Message>
std::deque<std::pair<uint32_t, const Message*>> queue;
std::set<const Message*> seen_msgs{&descriptors_.front()};
queue.emplace_back(0u, &descriptors_.front());
while (!queue.empty()) {
auto index_and_descr = queue.front();
queue.pop_front();
uint32_t msg_index = index_and_descr.first;
const auto& descr = *index_and_descr.second;
for (const auto& id_and_field : descr.fields) {
const uint32_t field_id = id_and_field.first;
const auto& field = id_and_field.second;
FilterBytecodeParser::QueryResult result{};
if (filter_bytecode) {
result = parser.Query(msg_index, field_id);
if (!result.allowed)
continue;
}
const Message* nested_type = id_and_field.second.nested_type;
if (nested_type) {
PERFETTO_CHECK(!result.simple_field() || !filter_bytecode);
if (seen_msgs.find(nested_type) == seen_msgs.end()) {
seen_msgs.insert(nested_type);
queue.emplace_back(result.nested_msg_index, nested_type);
}
} else { // simple field
PERFETTO_CHECK(result.simple_field() || !filter_bytecode);
}
auto stripped_name = StripPrefix(descr.full_name, root_prefix);
std::string stripped_nested =
nested_type ? " " + StripPrefix(nested_type->full_name, root_prefix)
: "";
fprintf(print_stream_, "%-60s %3u %-8s %-32s%s\n", stripped_name.c_str(),
field_id, field.type.c_str(), field.name.c_str(),
stripped_nested.c_str());
}
}
}
std::string FilterUtil::GenerateFilterBytecode() {
protozero::FilterBytecodeGenerator bytecode_gen;
// Assign indexes to descriptors, simply by counting them in order;
std::map<Message*, uint32_t> descr_to_idx;
for (auto& descr : descriptors_)
descr_to_idx[&descr] = static_cast<uint32_t>(descr_to_idx.size());
for (auto& descr : descriptors_) {
for (auto it = descr.fields.begin(); it != descr.fields.end();) {
uint32_t field_id = it->first;
const Message::Field& field = it->second;
if (field.nested_type) {
// Append the index of the target submessage.
PERFETTO_CHECK(descr_to_idx.count(field.nested_type));
uint32_t nested_msg_index = descr_to_idx[field.nested_type];
bytecode_gen.AddNestedField(field_id, nested_msg_index);
++it;
continue;
}
// Simple field. Lookahead to see if we have a range of contiguous simple
// fields.
for (uint32_t range_len = 1;; ++range_len) {
++it;
if (it != descr.fields.end() && it->first == field_id + range_len &&
it->second.is_simple()) {
continue;
}
// At this point it points to either the end() of the vector or a
// non-contiguous or non-simple field (which will be picked up by the
// next iteration).
if (range_len == 1) {
bytecode_gen.AddSimpleField(field_id);
} else {
bytecode_gen.AddSimpleFieldRange(field_id, range_len);
}
break;
} // for (range_len)
} // for (descr.fields)
bytecode_gen.EndMessage();
} // for (descriptors)
return bytecode_gen.Serialize();
}
std::string FilterUtil::LookupField(const std::string& varint_encoded_path) {
const uint8_t* ptr =
reinterpret_cast<const uint8_t*>(varint_encoded_path.data());
const uint8_t* const end = ptr + varint_encoded_path.size();
std::vector<uint32_t> fields;
while (ptr < end) {
uint64_t varint;
const uint8_t* next = proto_utils::ParseVarInt(ptr, end, &varint);
PERFETTO_CHECK(next != ptr);
fields.emplace_back(static_cast<uint32_t>(varint));
ptr = next;
}
return LookupField(fields.data(), fields.size());
}
std::string FilterUtil::LookupField(const uint32_t* field_ids,
size_t num_fields) {
const Message* msg = descriptors_.empty() ? nullptr : &descriptors_.front();
std::string res;
for (size_t i = 0; i < num_fields; ++i) {
const uint32_t field_id = field_ids[i];
const Message::Field* field = nullptr;
if (msg) {
auto it = msg->fields.find(field_id);
field = it == msg->fields.end() ? nullptr : &it->second;
}
res.append(".");
if (field) {
res.append(field->name);
msg = field->nested_type;
} else {
res.append(std::to_string(field_id));
}
}
return res;
}
} // namespace protozero