blob: ccba445508e92f64d813fea377abca4affc9ca9f [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""A script to merge multiple source xml files into a single histograms.xml."""
import argparse
import os
import sys
import xml.dom.minidom
import expand_owners
import extract_histograms
import histogram_configuration_model
import histogram_paths
import populate_enums
def GetElementsByTagName(trees, tag, depth=2):
"""Gets all elements with the specified tag from a set of DOM trees.
Args:
trees: A list of DOM trees.
tag: The tag of the elements to find.
depth: The depth in the trees by which a match should be found.
Returns:
A list of DOM nodes with the specified tag.
"""
iterator = extract_histograms.IterElementsWithTag
return list(e for t in trees for e in iterator(t, tag, depth))
def GetEnumsNodes(doc, trees):
"""Gets all enums from a set of DOM trees.
If trees contain ukm events, populates a list of ints to the
"UkmEventNameHash" enum where each value is a ukm event name hash truncated
to 31 bits and each label is the corresponding event name.
Args:
doc: The document to create the node in.
trees: A list of DOM trees.
Returns:
A list of enums DOM nodes.
"""
enums_list = GetElementsByTagName(trees, 'enums')
ukm_events = GetElementsByTagName(
GetElementsByTagName(trees, 'ukm-configuration'), 'event')
# Early return if there are no ukm events provided. MergeFiles have callers
# that do not pass ukm events so, in that case, we don't need to iterate
# through the enum list.
if not ukm_events:
return enums_list
for enums in enums_list:
populate_enums.PopulateEnumsWithUkmEvents(doc, enums, ukm_events)
return enums_list
def CombineHistogramsSorted(doc, trees):
"""Sorts histograms related nodes by name and returns the combined nodes.
This function sorts nodes including <histogram>, <variant> and
<histogram_suffix>. Then it returns one <histograms> that contains the
sorted <histogram> and <variant> nodes and the other <histogram_suffixes_list>
node containing all <histogram_suffixes> nodes.
Args:
doc: The document to create the node in.
trees: A list of DOM trees.
Returns:
A list containing the combined <histograms> node and the combined
<histogram_suffix_list> node.
"""
# Create the combined <histograms> tag.
combined_histograms = doc.createElement('histograms')
def SortByLowerCaseName(node):
return node.getAttribute('name').lower()
variants_nodes = GetElementsByTagName(trees, 'variants', depth=3)
sorted_variants = sorted(variants_nodes, key=SortByLowerCaseName)
histogram_nodes = GetElementsByTagName(trees, 'histogram', depth=3)
sorted_histograms = sorted(histogram_nodes, key=SortByLowerCaseName)
for variants in sorted_variants:
# Use unsafe version of `appendChild` function here because the safe one
# takes a lot longer (10000x) to append all children. The unsafe version
# is ok here because:
# 1. the node to be appended is a clean node.
# 2. The unsafe version only does fewer checks but not changing any
# behavior and it's documented to be usable if performance matters.
# See https://github.com/python/cpython/blob/2.7/Lib/xml/dom/minidom.py#L276.
xml.dom.minidom._append_child(combined_histograms, variants)
for histogram in sorted_histograms:
xml.dom.minidom._append_child(combined_histograms, histogram)
# Create the combined <histogram_suffixes_list> tag.
combined_histogram_suffixes_list = doc.createElement(
'histogram_suffixes_list')
histogram_suffixes_nodes = GetElementsByTagName(trees,
'histogram_suffixes',
depth=3)
sorted_histogram_suffixes = sorted(histogram_suffixes_nodes,
key=SortByLowerCaseName)
for histogram_suffixes in sorted_histogram_suffixes:
xml.dom.minidom._append_child(combined_histogram_suffixes_list,
histogram_suffixes)
return [combined_histograms, combined_histogram_suffixes_list]
def MakeNodeWithChildren(doc, tag, children):
"""Creates a DOM node with specified tag and child nodes.
Args:
doc: The document to create the node in.
tag: The tag to create the node with.
children: A list of DOM nodes to add as children.
Returns:
A DOM node.
"""
node = doc.createElement(tag)
for child in children:
node.appendChild(child)
return node
def MergeTrees(trees, should_expand_owners):
"""Merges a list of histograms.xml DOM trees.
Args:
trees: A list of histograms.xml DOM trees.
should_expand_owners: Whether we want to expand owners for histograms.
Returns:
A merged DOM tree.
"""
doc = xml.dom.minidom.Document()
doc.appendChild(
MakeNodeWithChildren(
doc,
'histogram-configuration',
# This can result in the merged document having multiple <enums> and
# similar sections, but scripts ignore these anyway.
GetEnumsNodes(doc, trees) +
# Sort the <histogram> and <histogram_suffixes> nodes by name and
# return the combined nodes.
CombineHistogramsSorted(doc, trees)))
# After using the unsafe version of appendChild, we see a regression when
# pretty-printing the merged |doc|. This might because the unsafe appendChild
# doesn't build indexes for later lookup. And thus, we need to convert the
# merged |doc| to a xml string and convert it back to force it to build
# indexes for the merged |doc|.
doc = xml.dom.minidom.parseString(doc.toxml().encode('utf-8'))
# Only perform fancy operations after |doc| becomes stable. This helps improve
# the runtime performance.
if should_expand_owners:
for histograms in doc.getElementsByTagName('histograms'):
expand_owners.ExpandHistogramsOWNERS(histograms)
return doc
def _GetComponentFromMetadataFile(filename):
"""Extracts a component string from the metadata file.
Args:
filename: The filename for the metadata file.
Returns:
The component name as a string.
"""
with open(filename, 'r') as f:
for line in f.read().splitlines():
# component line looks like '[\s+]component: "name"[\s+]'.
line = line.strip()
if line.startswith('component:'):
component = line[line.find('"') + 1:-1]
if component:
return component
return None
def _AddComponentFromMetadataFile(tree, filename):
"""Adds the component from the metadata file to the DOM tree.
Args:
tree: A histogram.xml DOM tree.
filename: The name of the metadata file.
Returns:
The updated tree with the component (optionally) added.
"""
component = _GetComponentFromMetadataFile(filename)
if component:
histograms = tree.getElementsByTagName('histograms')
if histograms:
iter_matches = extract_histograms.IterElementsWithTag
for histogram in iter_matches(histograms[0], 'histogram'):
expand_owners.AddHistogramComponent(histogram, component)
return tree
def _BuildDOMTreeWithComponentMetadata(filename_or_file):
"""Builds the DOM tree for the given file.
Args:
filename_or_file: The string filename or the file handle for histograms.xml.
Returns:
The histograms.xml DOM tree with (optional) component metadata.
"""
tree = xml.dom.minidom.parse(filename_or_file)
if isinstance(filename_or_file, str):
# If we can find a metadata file in the same directory, we try to extract
# a component from it.
metadata_filename = os.path.join(os.path.dirname(filename_or_file),
'DIR_METADATA')
if os.path.exists(metadata_filename):
return _AddComponentFromMetadataFile(tree, metadata_filename)
return tree
def MergeFiles(filenames=[], files=[], should_expand_owners=False):
"""Merges a list of histograms.xml files.
Args:
filenames: A list of histograms.xml filenames.
files: A list of histograms.xml file-like objects.
should_expand_owners: Whether we want to expand owners. By default, it's
false because most of the callers don't care about the owners for each
metadata.
Returns:
A merged DOM tree.
"""
# minidom.parse() takes both files and filenames:
all_files = files + filenames
trees = [_BuildDOMTreeWithComponentMetadata(f) for f in all_files]
return MergeTrees(trees, should_expand_owners)
def PrettyPrintMergedFiles(filenames=[], files=[]):
return histogram_configuration_model.PrettifyTree(
MergeFiles(filenames=filenames, files=files, should_expand_owners=True))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--output', required=True)
args = parser.parse_args()
with open(args.output, 'w', encoding='utf-8', newline='\n') as f:
# This is run by
# https://source.chromium.org/chromium/chromium/src/+/main:tools/metrics/BUILD.gn;drc=573e48309695102dec2da1e8f806c18c3200d414;l=5
# to send the merged histograms.xml to the server side. Providing |UKM_XML|
# here is not to merge ukm.xml but to populate `UkmEventNameHash` enum
# values.
f.write(PrettyPrintMergedFiles(
histogram_paths.ALL_XMLS + [histogram_paths.UKM_XML]))
if __name__ == '__main__':
main()