blob: ae0408e6946aea2142fd0065463c9b01f98f56d5 [file] [log] [blame]
# Copyright 2013 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Extract histogram names from the description XML file.
For more information on the format of the XML file, which is self-documenting,
see histograms.xml; however, here is a simple example to get you started. The
XML below will generate the following five histograms:
HistogramTime
HistogramEnum
HistogramEnum_Chrome
HistogramEnum_IE
HistogramEnum_Firefox
<histogram-configuration>
<histograms>
<histogram name="HistogramTime" units="milliseconds">
<owner>person@chromium.org</owner>
<owner>some-team@chromium.org</owner>
<summary>A brief description.</summary>
</histogram>
<histogram name="HistogramEnum" enum="MyEnumType">
<owner>person@chromium.org</owner>
<summary>This histogram sports an enum value type.</summary>
</histogram>
</histograms>
<enums>
<enum name="MyEnumType">
<summary>This is an example enum type, where the values mean little.</summary>
<int value="1" label="FIRST_VALUE">This is the first value.</int>
<int value="2" label="SECOND_VALUE">This is the second value.</int>
</enum>
</enums>
<histogram_suffixes_list>
<histogram_suffixes name="BrowserType" separator="_">
<suffix name="Chrome"/>
<suffix name="IE"/>
<suffix name="Firefox"/>
<affected-histogram name="HistogramEnum"/>
</histogram_suffixes>
</histogram_suffixes_list>
</histogram-configuration>
"""
import bisect
import copy
import datetime
import itertools
try:
import HTMLParser
html = HTMLParser.HTMLParser()
except ImportError: # For Py3 compatibility
import html
import logging
import re
import xml.dom.minidom
import histogram_configuration_model
BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
OWNER_PLACEHOLDER = (
'Please list the metric\'s owners. Add more owner tags as needed.')
MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5
DEFAULT_BASE_HISTOGRAM_OBSOLETE_REASON = (
'Base histogram. Use suffixes of this histogram instead.')
EXPIRY_DATE_PATTERN = "%Y-%m-%d"
EXPIRY_MILESTONE_RE = re.compile(r'M[0-9]{2,3}\Z')
_ELEMENT_NODE = xml.dom.minidom.Node.ELEMENT_NODE
class Error(Exception):
pass
def IterElementsWithTag(root, tag, depth=-1):
"""Iterates over DOM tree and yields elements matching tag name.
It's meant to be replacement for `getElementsByTagName`,
(which does recursive search) but without recursive search
(nested tags are not supported in histograms files).
Note: This generator stops going deeper in the tree when it detects
that there are elements with given tag.
Args:
root: XML dom tree.
tag: Element's tag name.
depth: Defines how deep in the tree function should search for a match.
Yields:
xml.dom.minidom.Node: Element matching criteria.
"""
if depth == 0 and root.nodeType == _ELEMENT_NODE and root.tagName == tag:
yield root
return
had_tag = False
skipped = 0
for child in root.childNodes:
if child.nodeType == _ELEMENT_NODE and child.tagName == tag:
had_tag = True
yield child
else:
skipped += 1
depth -= 1
if not had_tag and depth != 0:
for child in root.childNodes:
for match in IterElementsWithTag(child, tag, depth):
yield match
def _GetTextFromChildNodes(node):
"""Returns a string concatenation of the text of the given node's children.
Comments are ignored, consecutive lines of text are joined with a single
space, and paragraphs are maintained so that long text is more readable on
dashboards.
Args:
node: The DOM Element whose children's text is to be extracted, processed,
and returned.
"""
paragraph_break = '\n\n'
text_parts = []
for child in node.childNodes:
if child.nodeType != xml.dom.minidom.Node.COMMENT_NODE:
child_text = child.toxml()
if not child_text:
continue
# If the given node has the below XML representation, then the text
# added to the list is 'Some words.\n\nWords.'
# <tag>
# Some
# words.
#
# <!--Child comment node.-->
#
# Words.
# </tag>
# In the case of the first child text node, raw_paragraphs would store
# ['\n Some\n words.', ' '], and in the case of the second,
# raw_paragraphs would store ['', ' Words.\n'].
raw_paragraphs = child_text.split(paragraph_break)
# In the case of the first child text node, processed_paragraphs would
# store ['Some words.', ''], and in the case of the second,
# processed_paragraphs would store ['Words.'].
processed_paragraphs = [NormalizeString(text)
for text in raw_paragraphs
if text]
text_parts.append(paragraph_break.join(processed_paragraphs))
return ''.join(text_parts).strip()
def NormalizeString(text):
r"""Replaces all white space sequences with a single space.
Also, unescapes any HTML escaped characters, e.g. &quot; or &gt;.
Args:
text: The string to normalize, '\n\n a \n b&gt;c '.
Returns:
The normalized string 'a b>c'.
"""
line = ' '.join(text.split())
# Unescape using default ASCII encoding. Unescapes any HTML escaped character
# like &quot; etc.
return html.unescape(line)
def _NormalizeAllAttributeValues(node):
"""Recursively normalizes all tag attribute values in the given tree.
Args:
node: The minidom node to be normalized.
Returns:
The normalized minidom node.
"""
if node.nodeType == _ELEMENT_NODE:
for a in node.attributes.keys():
node.attributes[a].value = NormalizeString(node.attributes[a].value)
for c in node.childNodes:
_NormalizeAllAttributeValues(c)
return node
def _ExpandHistogramNameWithSuffixes(suffix_name, histogram_name,
histogram_suffixes_node):
"""Creates a new histogram name based on a histogram suffix.
Args:
suffix_name: The suffix string to apply to the histogram name. May be empty.
histogram_name: The name of the histogram. May be of the form
Group.BaseName or BaseName.
histogram_suffixes_node: The histogram_suffixes XML node.
Returns:
A string with the expanded histogram name.
Raises:
Error: if the expansion can't be done.
"""
if histogram_suffixes_node.hasAttribute('separator'):
separator = histogram_suffixes_node.getAttribute('separator')
else:
separator = '_'
if histogram_suffixes_node.hasAttribute('ordering'):
ordering = histogram_suffixes_node.getAttribute('ordering')
else:
ordering = 'suffix'
parts = ordering.split(',')
ordering = parts[0]
if len(parts) > 1:
placement = int(parts[1])
else:
placement = 1
if ordering not in ['prefix', 'suffix']:
logging.error('ordering needs to be prefix or suffix, value is %s',
ordering)
raise Error()
if not suffix_name:
return histogram_name
if ordering == 'suffix':
return histogram_name + separator + suffix_name
# For prefixes, the suffix_name is inserted between the "cluster" and the
# "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
sections = histogram_name.split('.')
if len(sections) <= placement:
logging.error(
'Prefix histogram_suffixes expansions require histogram names which '
'include a dot separator. Histogram name is %s, histogram_suffixes is '
'%s, and placment is %d', histogram_name,
histogram_suffixes_node.getAttribute('name'), placement)
raise Error()
cluster = '.'.join(sections[0:placement]) + '.'
remainder = '.'.join(sections[placement:])
return cluster + suffix_name + separator + remainder
def ExtractEnumsFromXmlTree(tree):
"""Extracts all <enum> nodes in the tree into a dictionary."""
enums = {}
have_errors = False
last_name = None
for enum in IterElementsWithTag(tree, 'enum'):
name = enum.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Enums %s and %s are not in alphabetical order', last_name,
name)
have_errors = True
last_name = name
if name in enums:
logging.error('Duplicate enum %s', name)
have_errors = True
continue
enum_dict = {}
enum_dict['name'] = name
enum_dict['values'] = {}
labels = set()
nodes = list(IterElementsWithTag(enum, 'int'))
obsolete_nodes = list(IterElementsWithTag(enum, 'obsolete', 1))
if not nodes and not obsolete_nodes:
logging.error('Non-obsolete enum %s should have at least one <int>', name)
have_errors = True
continue
for int_tag in nodes:
value_dict = {}
int_value = int(int_tag.getAttribute('value'))
if int_value in enum_dict['values']:
logging.error('Duplicate enum value %d for enum %s', int_value, name)
have_errors = True
continue
label = int_tag.getAttribute('label')
if label in labels:
logging.error('Duplicate enum label "%s" for enum %s', label, name)
have_errors = True
continue
labels.add(label)
value_dict['label'] = label
value_dict['summary'] = _GetTextFromChildNodes(int_tag)
enum_dict['values'][int_value] = value_dict
enum_int_values = sorted(enum_dict['values'].keys())
last_int_value = None
for int_tag in nodes:
int_value = int(int_tag.getAttribute('value'))
if last_int_value is not None and int_value < last_int_value:
logging.error('Enum %s int values %d and %d are not in numerical order',
name, last_int_value, int_value)
have_errors = True
left_item_index = bisect.bisect_left(enum_int_values, int_value)
if left_item_index == 0:
logging.warning('Insert value %d at the beginning', int_value)
else:
left_int_value = enum_int_values[left_item_index - 1]
left_label = enum_dict['values'][left_int_value]['label']
logging.warning('Insert value %d after %d ("%s")', int_value,
left_int_value, left_label)
else:
last_int_value = int_value
for summary in IterElementsWithTag(enum, 'summary'):
enum_dict['summary'] = _GetTextFromChildNodes(summary)
break
enums[name] = enum_dict
return enums, have_errors
def _ExtractOwners(node):
"""Extracts owners information from the given node, if exists.
Args:
node: A DOM Element.
Returns:
A tuple of owner-related info, e.g. (['alice@chromium.org'], True)
The first element is a list of the owners' email addresses, excluding the
owner placeholder string. The second element is a boolean indicating
whether the node has an owner. A histogram whose owner is the owner
placeholder string has an owner.
"""
email_pattern = re.compile(BASIC_EMAIL_REGEXP)
owners = []
has_owner = False
for owner_node in IterElementsWithTag(node, 'owner', 1):
child = owner_node.firstChild
owner_text = (child and child.nodeValue) or ''
is_email = email_pattern.match(owner_text)
if owner_text and (is_email or OWNER_PLACEHOLDER in owner_text):
has_owner = True
if is_email:
owners.append(owner_text)
return owners, has_owner
def _ExtractImprovementDirection(histogram_node):
"""Extracts improvement direction from the given histogram element, if any.
Args:
histogram_node: A DOM Element corresponding to a histogram.
Returns:
A tuple, where the first element is the improvement direction, if any;
the second element is an error message if the given direction is invalid.
"""
direction = None
error = None
improvement_nodes = histogram_node.getElementsByTagName('improvement')
if not improvement_nodes:
return None, None
if len(improvement_nodes) > 1:
histogram_name = histogram_node.getAttribute('name')
error = f'Histogram "{histogram_name}" has multiple <improvement> tags.'
return None, error
improvement_node = improvement_nodes[0]
direction = improvement_node.getAttribute('direction')
if (direction not in
histogram_configuration_model.IMPROVEMENT_DIRECTION_VALID_VALUES):
histogram_name = histogram_node.getAttribute('name')
error = (
f'Histogram "{histogram_name}" has an invalid direction "{direction}" '
f'in its <improvement> tag.')
return None, error
return direction, None
def _ExtractComponents(histogram):
"""Extracts component information from the given histogram element.
Components are present when a histogram has a component tag, e.g.
<component>UI&gt;Browser</component>. Components may also be present when an
OWNERS file is given as a histogram owner, e.g. <owner>src/dir/OWNERS</owner>;
in this case the component is extracted from adjacent DIR_METADATA files.
See _ExtractComponentViaDirmd() in the following file for details:
chromium/src/tools/metrics/histograms/expand_owners.py.
Args:
histogram: A DOM Element corresponding to a histogram.
Returns:
A list of the components associated with the histogram, e.g.
['UI>Browser>Spellcheck'].
"""
component_nodes = histogram.getElementsByTagName('component')
return [
_GetTextFromChildNodes(component_node)
for component_node in component_nodes
]
def _ValidateDateString(date_str):
"""Checks if |date_str| matches 'YYYY-MM-DD'.
Args:
date_str: string
Returns:
True iff |date_str| matches 'YYYY-MM-DD' format.
"""
try:
_ = datetime.datetime.strptime(date_str, EXPIRY_DATE_PATTERN).date()
except ValueError:
return False
return True
def _ValidateMilestoneString(milestone_str):
"""Check if |milestone_str| matches 'M*'."""
return EXPIRY_MILESTONE_RE.match(milestone_str) is not None
def _ProcessBaseHistogramAttribute(node, histogram_entry):
if node.hasAttribute('base'):
is_base = node.getAttribute('base').lower() == 'true'
histogram_entry['base'] = is_base
if is_base and 'obsolete' not in histogram_entry:
histogram_entry['obsolete'] = DEFAULT_BASE_HISTOGRAM_OBSOLETE_REASON
# The following code represents several concepts as JSON objects
#
# Token: an analog of <token> tag, represented as a JSON object like:
# {
# 'key': 'token_key',
# 'variants': [a list of Variant objects]
# }
#
# Variant: an analog of <variant> tag, represented as a JSON object like:
# {
# 'name': 'variant_name',
# 'summary': 'variant_summary',
# 'obsolete': 'Obsolete text.',
# 'owners': ['me@chromium.org', 'you@chromium.org']
# }
#
# Variants: an analog of <variants> tag, represented as a JSON object like:
# {
# 'name: 'variants_name',
# 'variants': [a list of Variant objects]
# }
def _ExtractTokens(histogram, variants_dict):
"""Extracts tokens and variants from the given histogram element.
Args:
histogram: A DOM Element corresponding to a histogram.
variants_dict: A dictionary of variants extracted from the tree.
Returns:
A tuple where the first element is a list of extracted Tokens, and the
second indicates if any errors were detected while extracting them.
"""
tokens = []
have_error = False
histogram_name = histogram.getAttribute('name')
for token_node in IterElementsWithTag(histogram, 'token', 1):
token_key = token_node.getAttribute('key')
if token_key in tokens:
logging.error(
"Histogram %s contains duplicate token key %s, please ensure token "
"keys are unique." % (histogram_name, token_key))
have_error = True
continue
token_key_format = '{' + token_key + '}'
if token_key_format not in histogram_name:
logging.error(
"Histogram %s includes a token tag but the token key is not present "
"in histogram name. Please insert the token key into the histogram "
"name in order for the token to be added." % histogram_name)
have_error = True
continue
token = dict(key=token_key)
token['variants'] = []
# If 'variants' attribute is set for the <token>, get the list of Variant
# objects from from the |variants_dict|. Else, extract the <variant>
# children nodes of the |token_node| as a list of Variant objects.
if token_node.hasAttribute('variants'):
variants_name = token_node.getAttribute('variants')
variant_list = variants_dict.get(variants_name)
if variant_list:
token['variants'] = variant_list[:]
else:
logging.error(
"The variants attribute %s of token key %s of histogram %s does "
"not have a corresponding <variants> tag." %
(variants_name, token_key, histogram_name))
token['variants'] = []
have_error = True
# Inline and out-of-line variants can be combined.
token['variants'].extend(_ExtractVariantNodes(token_node))
tokens.append(token)
return tokens, have_error
def _ExtractVariantNodes(node):
"""Extracts the variants of a given node into a list of variant dictionaries.
Args:
node: A DOM element corresponding to <token> node
Returns:
A list of Variants.
"""
variant_list = []
for variant_node in IterElementsWithTag(node, 'variant', 1):
name = variant_node.getAttribute('name')
summary = variant_node.getAttribute('summary') if variant_node.hasAttribute(
'summary') else name
variant = dict(name=name, summary=summary)
obsolete_text = _GetObsoleteReason(variant_node)
if obsolete_text:
variant['obsolete'] = obsolete_text
owners, variant_has_owners = _ExtractOwners(variant_node)
if variant_has_owners:
variant['owners'] = owners
variant_list.append(variant)
return variant_list
def _ExtractHistogramsFromXmlTree(tree, enums):
"""Extracts all <histogram> nodes in the tree into a dictionary."""
# Process the histograms. The descriptions can include HTML tags.
histograms = {}
have_errors = False
variants_dict, variants_errors = _ExtractVariantsFromXmlTree(tree)
have_errors = have_errors or variants_errors
last_name = None
for histogram in IterElementsWithTag(tree, 'histogram'):
name = histogram.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('Histograms %s and %s are not in alphabetical order',
last_name, name)
have_errors = True
last_name = name
if name in histograms:
logging.error('Duplicate histogram definition %s', name)
have_errors = True
continue
histograms[name] = histogram_entry = {}
# Handle expiry attribute.
if histogram.hasAttribute('expires_after'):
expiry_str = histogram.getAttribute('expires_after')
if (expiry_str == "never" or _ValidateMilestoneString(expiry_str) or
_ValidateDateString(expiry_str)):
histogram_entry['expires_after'] = expiry_str
else:
logging.error(
'Expiry of histogram %s does not match expected date format ("%s"),'
' milestone format (M*), or "never": found %s.', name,
EXPIRY_DATE_PATTERN, expiry_str)
have_errors = True
else:
logging.error(
'Your histogram %s must have an expiry date. If you are marking a '
'histogram as obsolete, please set the expiry date to the current '
'date.', name)
have_errors = True
# Find <owner> tag.
owners, has_owner = _ExtractOwners(histogram)
if owners:
histogram_entry['owners'] = owners
# Find the <improvement> tag, if any.
improvement_direction, improvement_error = _ExtractImprovementDirection(
histogram)
if improvement_direction:
histogram_entry['improvement'] = improvement_direction
if improvement_error:
logging.error(improvement_error)
have_errors = True
# Find <component> tag.
components = _ExtractComponents(histogram)
if components:
histogram_entry['components'] = components
# Find <summary> tag.
summary_nodes = list(IterElementsWithTag(histogram, 'summary'))
if summary_nodes:
histogram_entry['summary'] = _GetTextFromChildNodes(summary_nodes[0])
else:
histogram_entry['summary'] = 'TBD'
# Find <obsolete> tag.
obsolete_nodes = list(IterElementsWithTag(histogram, 'obsolete', 1))
if obsolete_nodes:
reason = _GetTextFromChildNodes(obsolete_nodes[0])
histogram_entry['obsolete'] = reason
# Non-obsolete histograms should provide a non-empty <summary>.
if not obsolete_nodes and (not summary_nodes or
not histogram_entry['summary']):
logging.error('histogram %s should provide a <summary>', name)
have_errors = True
# Non-obsolete histograms should specify <owner>s.
if not obsolete_nodes and not has_owner:
logging.error('histogram %s should specify <owner>s', name)
have_errors = True
# Histograms should have either units or enum.
if (not histogram.hasAttribute('units') and
not histogram.hasAttribute('enum')):
logging.error('histogram %s should have either units or enum', name)
have_errors = True
# Histograms should not have both units and enum.
if (histogram.hasAttribute('units') and
histogram.hasAttribute('enum')):
logging.error('histogram %s should not have both units and enum', name)
have_errors = True
# Handle units.
if histogram.hasAttribute('units'):
histogram_entry['units'] = histogram.getAttribute('units')
# Handle enum types.
if histogram.hasAttribute('enum'):
enum_name = histogram.getAttribute('enum')
if enum_name not in enums:
logging.error('Unknown enum %s in histogram %s', enum_name, name)
have_errors = True
else:
histogram_entry['enum'] = enums[enum_name]
# Find <token> tag.
tokens, have_token_errors = _ExtractTokens(histogram, variants_dict)
have_errors = have_errors or have_token_errors
if tokens:
histogram_entry['tokens'] = tokens
_ProcessBaseHistogramAttribute(histogram, histogram_entry)
return histograms, have_errors
def _ExtractVariantsFromXmlTree(tree):
"""Extracts all <variants> nodes in the tree into a dictionary.
Args:
tree: A DOM Element containing histograms and variants nodes.
Returns:
A tuple where the first element is a dictionary of extracted Variants, where
the key is the variants name and the value is a list of Variant objects.
The second element indicates if any errors were detected while
extracting them.
"""
variants_dict = {}
have_errors = False
for variants_node in IterElementsWithTag(tree, 'variants'):
variants_name = variants_node.getAttribute('name')
if variants_name in variants_dict:
logging.error('Duplicate variants definition %s', variants_name)
have_errors = True
continue
variants_dict[variants_name] = _ExtractVariantNodes(variants_node)
return variants_dict, have_errors
def _GetObsoleteReason(node):
"""If the node's histogram is obsolete, returns a string explanation.
Otherwise, returns None.
Args:
node: A DOM Element associated with a histogram.
"""
for child in node.childNodes:
if child.localName == 'obsolete':
# There can be at most 1 obsolete element per node.
return _GetTextFromChildNodes(child)
return None
def _UpdateHistogramsWithSuffixes(tree, histograms):
"""Processes <histogram_suffixes> tags and combines with affected histograms.
The histograms dictionary will be updated in-place by adding new histograms
created by combining histograms themselves with histogram_suffixes targeting
these histograms.
Args:
tree: XML dom tree.
histograms: a dictionary of histograms previously extracted from the tree;
Returns:
True if any errors were found.
"""
have_errors = False
histogram_suffix_tag = 'histogram_suffixes'
suffix_tag = 'suffix'
with_tag = 'with-suffix'
# Verify order of histogram_suffixes fields first.
last_name = None
for histogram_suffixes in IterElementsWithTag(
tree, histogram_suffix_tag, depth=1):
name = histogram_suffixes.getAttribute('name')
if last_name is not None and name.lower() < last_name.lower():
logging.error('histogram_suffixes %s and %s are not in alphabetical '
'order', last_name, name)
have_errors = True
last_name = name
# histogram_suffixes can depend on other histogram_suffixes, so we need to be
# careful. Make a temporary copy of the list of histogram_suffixes to use as a
# queue. histogram_suffixes whose dependencies have not yet been processed
# will get relegated to the back of the queue to be processed later.
reprocess_queue = []
def GenerateHistogramSuffixes():
for f in IterElementsWithTag(tree, histogram_suffix_tag):
yield 0, f
for r, f in reprocess_queue:
yield r, f
for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes():
# Check dependencies first.
dependencies_valid = True
affected_histograms = list(IterElementsWithTag(
histogram_suffixes, 'affected-histogram', 1))
for affected_histogram in affected_histograms:
histogram_name = affected_histogram.getAttribute('name')
if histogram_name not in histograms:
# Base histogram is missing.
dependencies_valid = False
missing_dependency = histogram_name
break
if not dependencies_valid:
if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH:
reprocess_queue.append((reprocess_count + 1, histogram_suffixes))
continue
else:
logging.error('histogram_suffixes %s is missing its dependency %s',
histogram_suffixes.getAttribute('name'),
missing_dependency)
have_errors = True
continue
# If the suffix group has an obsolete tag, all suffixes it generates inherit
# its reason.
group_obsolete_reason = _GetObsoleteReason(histogram_suffixes)
name = histogram_suffixes.getAttribute('name')
suffix_nodes = list(IterElementsWithTag(histogram_suffixes, suffix_tag, 1))
suffix_labels = {}
for suffix in suffix_nodes:
suffix_name = suffix.getAttribute('name')
if not suffix.hasAttribute('label'):
logging.error('suffix %s in histogram_suffixes %s should have a label',
suffix_name, name)
have_errors = True
suffix_labels[suffix_name] = suffix.getAttribute('label')
# Find owners list under current histogram_suffixes tag.
owners, _ = _ExtractOwners(histogram_suffixes)
last_histogram_name = None
for affected_histogram in affected_histograms:
histogram_name = affected_histogram.getAttribute('name')
if (last_histogram_name is not None and
histogram_name.lower() < last_histogram_name.lower()):
logging.error('Affected histograms %s and %s of histogram_suffixes %s '
'are not in alphabetical order', last_histogram_name,
histogram_name, name)
have_errors = True
last_histogram_name = histogram_name
with_suffixes = list(IterElementsWithTag(affected_histogram, with_tag, 1))
if with_suffixes:
suffixes_to_add = with_suffixes
else:
suffixes_to_add = suffix_nodes
for suffix in suffixes_to_add:
suffix_name = suffix.getAttribute('name')
try:
new_histogram_name = _ExpandHistogramNameWithSuffixes(
suffix_name, histogram_name, histogram_suffixes)
if new_histogram_name != histogram_name:
new_histogram = copy.deepcopy(histograms[histogram_name])
# Do not copy forward base histogram state to suffixed
# histograms. Any suffixed histograms that wish to remain base
# histograms must explicitly re-declare themselves as base
# histograms.
if new_histogram.get('base', False):
del new_histogram['base']
if (new_histogram.get(
'obsolete', '') == DEFAULT_BASE_HISTOGRAM_OBSOLETE_REASON):
del new_histogram['obsolete']
histograms[new_histogram_name] = new_histogram
suffix_label = suffix_labels.get(suffix_name, '')
histogram_entry = histograms[new_histogram_name]
# If no owners are added for this histogram-suffixes, it inherits the
# owners of its parents.
if owners:
histogram_entry['owners'] = owners
# If a suffix has an obsolete node, it's marked as obsolete for the
# specified reason, overwriting its group's obsoletion reason if the
# group itself was obsolete as well.
obsolete_reason = _GetObsoleteReason(suffix)
if not obsolete_reason:
obsolete_reason = _GetObsoleteReason(affected_histogram)
if not obsolete_reason:
obsolete_reason = group_obsolete_reason
# If the suffix has an obsolete tag, all histograms it generates
# inherit it.
if obsolete_reason:
histogram_entry['obsolete'] = obsolete_reason
_ProcessBaseHistogramAttribute(suffix, histogram_entry)
except Error:
have_errors = True
return have_errors
class TokenAssignment(object):
"""Assignment of a Variant for each Token of histogram pattern.
Attributes:
pairings: A token_name to Variant map.
"""
def __init__(self, pairings):
self.pairings = pairings
def _GetTokenAssignments(tokens):
"""Get all possible TokenAssignments for the listed tokens.
Args:
tokens: The list of Tokens to create assignments for.
Returns:
A list of TokenAssignments.
"""
token_keys = [token['key'] for token in tokens]
token_variants = [token['variants'] for token in tokens]
return [
TokenAssignment(pairings=dict(zip(token_keys, selected_variants)))
for selected_variants in itertools.product(*token_variants)
]
def _GenerateNewHistogramsFromTokens(histogram_name, histograms_dict,
new_histograms_dict):
"""For a histogram with tokens, generates new histograms and adds to dict.
Args:
histogram_name: The name of the histogram.
histograms_dict: The dictionary of all histograms extracted from the tree.
new_histograms_dict: The dictionary of histograms to add newly generated
histograms to.
Returns:
A boolean that is True if a generated histogram name already exists in the
|new_histograms_dict|.
"""
have_error = False
histogram_node = histograms_dict[histogram_name]
summary_text = histogram_node['summary']
# |token_assignments| contains all the cross-product combinations of token
# variants, representing all the possible histogram names that could be
# generated.
token_assignments = _GetTokenAssignments(histogram_node['tokens'])
# Each |token_assignment| contains one of the cross-product combinations and
# corresponds to one new generated histogram.
for token_assignment in token_assignments:
new_obsolete_reason = ''
new_owners = []
# Dictionaries of pairings used for string formatting of histogram name and
# summary.
token_name_pairings = {}
token_summary_pairings = {}
for token_key, variant in token_assignment.pairings.items():
token_name_pairings[token_key] = variant['name']
token_summary_pairings[token_key] = variant['summary']
# If a variant has an obsolete reason, the new reason overwrites the
# obsolete reason of the original histogram.
if 'obsolete' in variant:
new_obsolete_reason = variant['obsolete']
# If a variant has owner(s), append to |new_owners|, overwriting the
# owners of the original histogram.
if 'owners' in variant:
new_owners += variant['owners']
# Replace token in histogram name with variant name.
new_histogram_name = histogram_name.format(**token_name_pairings)
# Replace token in summary with variant summary.
new_summary_text = summary_text.format(**token_summary_pairings)
if new_histogram_name in new_histograms_dict:
logging.error(
"Duplicate histogram name %s generated. Please remove identical "
"variants in different tokens in %s." %
(new_histogram_name, histogram_name))
have_error = True
continue
new_histogram_node = dict(histogram_node, summary=new_summary_text)
# Do not copy the <token> nodes to the generated histograms.
del new_histogram_node['tokens']
if new_obsolete_reason:
new_histogram_node['obsolete'] = new_obsolete_reason
if new_owners:
new_histogram_node['owners'] = new_owners
new_histograms_dict[new_histogram_name] = new_histogram_node
return have_error
def _UpdateHistogramsWithTokens(histograms_dict):
"""Processes histograms and combines with variants of tokens.
Args:
histograms_dict: A dictionary of all the histograms extracted from the tree.
Returns:
A tuple where the first element is the replacement histograms dictionary,
containing the original histograms without tokens and histograms
whose tokens are replaced by newly variant combinations.
The second element is a boolean is there is error.
"""
have_error = False
# Create new dict instead of modify in place because newly generated
# histograms will be added when iterating through |histograms_dict|.
new_histograms_dict = {}
for histogram_name, histogram_node in histograms_dict.items():
if 'tokens' in histogram_node:
have_error = have_error or _GenerateNewHistogramsFromTokens(
histogram_name, histograms_dict, new_histograms_dict)
# For histograms without tokens, copy to new histograms dict.
else:
new_histograms_dict[histogram_name] = histogram_node
return new_histograms_dict, have_error
def _GetTagSubTree(tree, tag, depth):
"""Returns sub tree with tag element as a root.
When no element with tag name is found or there are many of them
original tree is returned.
Args:
tree: XML dom tree.
tag: Element's tag name.
depth: Defines how deep in the tree function should search for a match.
Returns:
xml.dom.minidom.Node: Sub tree (matching criteria) or original one.
"""
entries = list(IterElementsWithTag(tree, tag, depth))
if len(entries) == 1:
tree = entries[0]
return tree
def ExtractHistogramsFromDom(tree):
"""Computes the histogram names and descriptions from the XML representation.
Args:
tree: A DOM tree of XML content.
Returns:
a tuple of (histograms, status) where histograms is a dictionary mapping
histogram names to dictionaries containing histogram descriptions and status
is a boolean indicating if errros were encountered in processing.
"""
_NormalizeAllAttributeValues(tree)
enums_tree = _GetTagSubTree(tree, 'enums', 2)
histograms_tree = _GetTagSubTree(tree, 'histograms', 2)
histogram_suffixes_tree = _GetTagSubTree(tree, 'histogram_suffixes_list', 2)
enums, enum_errors = ExtractEnumsFromXmlTree(enums_tree)
histograms, histogram_errors = _ExtractHistogramsFromXmlTree(
histograms_tree, enums)
histograms, update_token_errors = _UpdateHistogramsWithTokens(histograms)
update_suffix_errors = _UpdateHistogramsWithSuffixes(histogram_suffixes_tree,
histograms)
return histograms, (enum_errors or histogram_errors or update_suffix_errors
or update_token_errors)
def ExtractHistograms(filename):
"""Loads histogram definitions from a disk file.
Args:
filename: a file path to load data from.
Returns:
a dictionary of histogram descriptions.
Raises:
Error: if the file is not well-formatted.
"""
with open(filename, 'r') as f:
tree = xml.dom.minidom.parse(f)
histograms, had_errors = ExtractHistogramsFromDom(tree)
if had_errors:
logging.error('Error parsing %s', filename)
raise Error()
return histograms
def ExtractNames(histograms):
return sorted(histograms.keys())
def ExtractObsoleteNames(histograms):
return sorted(
filter(lambda name: histograms[name].get("obsolete"), histograms.keys()))