tools/metrics/histograms/extract_histograms.py - cobalt - Git at Google

 # Copyright 2013 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Extract histogram names from the description XML file.

 For more information on the format of the XML file, which is self-documenting,
 see histograms.xml; however, here is a simple example to get you started. The
 XML below will generate the following five histograms:

     HistogramTime
     HistogramEnum
     HistogramEnum_Chrome
     HistogramEnum_IE
     HistogramEnum_Firefox

 <histogram-configuration>

 <histograms>

 <histogram name="HistogramTime" units="milliseconds">
   <owner>person@chromium.org</owner>
   <owner>some-team@chromium.org</owner>
   <summary>A brief description.</summary>
 </histogram>

 <histogram name="HistogramEnum" enum="MyEnumType">
   <owner>person@chromium.org</owner>
   <summary>This histogram sports an enum value type.</summary>
 </histogram>

 </histograms>

 <enums>

 <enum name="MyEnumType">
   <summary>This is an example enum type, where the values mean little.</summary>
   <int value="1" label="FIRST_VALUE">This is the first value.</int>
   <int value="2" label="SECOND_VALUE">This is the second value.</int>
 </enum>

 </enums>

 <histogram_suffixes_list>

 <histogram_suffixes name="BrowserType" separator="_">
   <suffix name="Chrome"/>
   <suffix name="IE"/>
   <suffix name="Firefox"/>
   <affected-histogram name="HistogramEnum"/>
 </histogram_suffixes>

 </histogram_suffixes_list>

 </histogram-configuration>
 """

 import bisect
 import copy
 import datetime
 import itertools

 try:
   import HTMLParser
   html = HTMLParser.HTMLParser()
 except ImportError:  # For Py3 compatibility
   import html

 import logging
 import re
 import xml.dom.minidom

 import histogram_configuration_model

 BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'

 OWNER_PLACEHOLDER = (
     'Please list the metric\'s owners. Add more owner tags as needed.')

 MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5

 DEFAULT_BASE_HISTOGRAM_OBSOLETE_REASON = (
     'Base histogram. Use suffixes of this histogram instead.')

 EXPIRY_DATE_PATTERN = "%Y-%m-%d"
 EXPIRY_MILESTONE_RE = re.compile(r'M[0-9]{2,3}\Z')


 _ELEMENT_NODE = xml.dom.minidom.Node.ELEMENT_NODE


 class Error(Exception):
   pass


 def IterElementsWithTag(root, tag, depth=-1):
   """Iterates over DOM tree and yields elements matching tag name.

   It's meant to be replacement for `getElementsByTagName`,
   (which does recursive search) but without recursive search
   (nested tags are not supported in histograms files).

   Note: This generator stops going deeper in the tree when it detects
   that there are elements with given tag.

   Args:
     root: XML dom tree.
     tag: Element's tag name.
     depth: Defines how deep in the tree function should search for a match.

   Yields:
     xml.dom.minidom.Node: Element matching criteria.
   """
   if depth == 0 and root.nodeType == _ELEMENT_NODE and root.tagName == tag:
     yield root
     return

   had_tag = False

   skipped = 0

   for child in root.childNodes:
     if child.nodeType == _ELEMENT_NODE and child.tagName == tag:
       had_tag = True
       yield child
     else:
       skipped += 1

   depth -= 1

   if not had_tag and depth != 0:
     for child in root.childNodes:
       for match in IterElementsWithTag(child, tag, depth):
         yield match


 def _GetTextFromChildNodes(node):
   """Returns a string concatenation of the text of the given node's children.

   Comments are ignored, consecutive lines of text are joined with a single
   space, and paragraphs are maintained so that long text is more readable on
   dashboards.

   Args:
     node: The DOM Element whose children's text is to be extracted, processed,
       and returned.
   """
   paragraph_break = '\n\n'
   text_parts = []

   for child in node.childNodes:
     if child.nodeType != xml.dom.minidom.Node.COMMENT_NODE:
       child_text = child.toxml()
       if not child_text:
         continue

       # If the given node has the below XML representation, then the text
       # added to the list is 'Some words.\n\nWords.'
       # <tag>
       #   Some
       #   words.
       #
       #   <!--Child comment node.-->
       #
       #   Words.
       # </tag>

       # In the case of the first child text node, raw_paragraphs would store
       # ['\n  Some\n  words.', '  '], and in the case of the second,
       # raw_paragraphs would store ['', '  Words.\n'].
       raw_paragraphs = child_text.split(paragraph_break)

       # In the case of the first child text node, processed_paragraphs would
       # store ['Some words.', ''], and in the case of the second,
       # processed_paragraphs would store ['Words.'].
       processed_paragraphs = [NormalizeString(text)
                               for text in raw_paragraphs
                               if text]
       text_parts.append(paragraph_break.join(processed_paragraphs))

   return ''.join(text_parts).strip()


 def NormalizeString(text):
   r"""Replaces all white space sequences with a single space.

   Also, unescapes any HTML escaped characters, e.g. &quot; or &gt;.

   Args:
     text: The string to normalize, '\n\n a \n b&gt;c  '.

   Returns:
     The normalized string 'a b>c'.
   """
   line = ' '.join(text.split())

   # Unescape using default ASCII encoding. Unescapes any HTML escaped character
   # like &quot; etc.
   return html.unescape(line)


 def _NormalizeAllAttributeValues(node):
   """Recursively normalizes all tag attribute values in the given tree.

   Args:
     node: The minidom node to be normalized.

   Returns:
     The normalized minidom node.
   """
   if node.nodeType == _ELEMENT_NODE:
     for a in node.attributes.keys():
       node.attributes[a].value = NormalizeString(node.attributes[a].value)

   for c in node.childNodes:
     _NormalizeAllAttributeValues(c)
   return node


 def _ExpandHistogramNameWithSuffixes(suffix_name, histogram_name,
                                      histogram_suffixes_node):
   """Creates a new histogram name based on a histogram suffix.

   Args:
     suffix_name: The suffix string to apply to the histogram name. May be empty.
     histogram_name: The name of the histogram. May be of the form
       Group.BaseName or BaseName.
     histogram_suffixes_node: The histogram_suffixes XML node.

   Returns:
     A string with the expanded histogram name.

   Raises:
     Error: if the expansion can't be done.
   """
   if histogram_suffixes_node.hasAttribute('separator'):
     separator = histogram_suffixes_node.getAttribute('separator')
   else:
     separator = '_'

   if histogram_suffixes_node.hasAttribute('ordering'):
     ordering = histogram_suffixes_node.getAttribute('ordering')
   else:
     ordering = 'suffix'
   parts = ordering.split(',')
   ordering = parts[0]
   if len(parts) > 1:
     placement = int(parts[1])
   else:
     placement = 1
   if ordering not in ['prefix', 'suffix']:
     logging.error('ordering needs to be prefix or suffix, value is %s',
                   ordering)
     raise Error()

   if not suffix_name:
     return histogram_name

   if ordering == 'suffix':
     return histogram_name + separator + suffix_name

   # For prefixes, the suffix_name is inserted between the "cluster" and the
   # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
   sections = histogram_name.split('.')
   if len(sections) <= placement:
     logging.error(
         'Prefix histogram_suffixes expansions require histogram names which '
         'include a dot separator. Histogram name is %s, histogram_suffixes is '
         '%s, and placment is %d', histogram_name,
         histogram_suffixes_node.getAttribute('name'), placement)
     raise Error()

   cluster = '.'.join(sections[0:placement]) + '.'
   remainder = '.'.join(sections[placement:])
   return cluster + suffix_name + separator + remainder


 def ExtractEnumsFromXmlTree(tree):
   """Extracts all <enum> nodes in the tree into a dictionary."""

   enums = {}
   have_errors = False

   last_name = None
   for enum in IterElementsWithTag(tree, 'enum'):
     name = enum.getAttribute('name')
     if last_name is not None and name.lower() < last_name.lower():
       logging.error('Enums %s and %s are not in alphabetical order', last_name,
                     name)
       have_errors = True
     last_name = name

     if name in enums:
       logging.error('Duplicate enum %s', name)
       have_errors = True
       continue

     enum_dict = {}
     enum_dict['name'] = name
     enum_dict['values'] = {}
     labels = set()

     nodes = list(IterElementsWithTag(enum, 'int'))

     obsolete_nodes = list(IterElementsWithTag(enum, 'obsolete', 1))
     if not nodes and not obsolete_nodes:
       logging.error('Non-obsolete enum %s should have at least one <int>', name)
       have_errors = True
       continue

     for int_tag in nodes:
       value_dict = {}
       int_value = int(int_tag.getAttribute('value'))
       if int_value in enum_dict['values']:
         logging.error('Duplicate enum value %d for enum %s', int_value, name)
         have_errors = True
         continue
       label = int_tag.getAttribute('label')
       if label in labels:
         logging.error('Duplicate enum label "%s" for enum %s', label, name)
         have_errors = True
         continue
       labels.add(label)
       value_dict['label'] = label
       value_dict['summary'] = _GetTextFromChildNodes(int_tag)
       enum_dict['values'][int_value] = value_dict

     enum_int_values = sorted(enum_dict['values'].keys())

     last_int_value = None
     for int_tag in nodes:
       int_value = int(int_tag.getAttribute('value'))
       if last_int_value is not None and int_value < last_int_value:
         logging.error('Enum %s int values %d and %d are not in numerical order',
                       name, last_int_value, int_value)
         have_errors = True
         left_item_index = bisect.bisect_left(enum_int_values, int_value)
         if left_item_index == 0:
           logging.warning('Insert value %d at the beginning', int_value)
         else:
           left_int_value = enum_int_values[left_item_index - 1]
           left_label = enum_dict['values'][left_int_value]['label']
           logging.warning('Insert value %d after %d ("%s")', int_value,
                           left_int_value, left_label)
       else:
         last_int_value = int_value

     for summary in IterElementsWithTag(enum, 'summary'):
       enum_dict['summary'] = _GetTextFromChildNodes(summary)
       break

     enums[name] = enum_dict

   return enums, have_errors


 def _ExtractOwners(node):
   """Extracts owners information from the given node, if exists.

   Args:
     node: A DOM Element.

   Returns:
     A tuple of owner-related info, e.g. (['alice@chromium.org'], True)

     The first element is a list of the owners' email addresses, excluding the
     owner placeholder string. The second element is a boolean indicating
     whether the node has an owner. A histogram whose owner is the owner
     placeholder string has an owner.
   """
   email_pattern = re.compile(BASIC_EMAIL_REGEXP)
   owners = []
   has_owner = False

   for owner_node in IterElementsWithTag(node, 'owner', 1):
     child = owner_node.firstChild
     owner_text = (child and child.nodeValue) or ''
     is_email = email_pattern.match(owner_text)

     if owner_text and (is_email or OWNER_PLACEHOLDER in owner_text):
       has_owner = True
       if is_email:
         owners.append(owner_text)

   return owners, has_owner


 def _ExtractImprovementDirection(histogram_node):
   """Extracts improvement direction from the given histogram element, if any.

   Args:
     histogram_node: A DOM Element corresponding to a histogram.

   Returns:
     A tuple, where the first element is the improvement direction, if any;
     the second element is an error message if the given direction is invalid.
   """
   direction = None
   error = None
   improvement_nodes = histogram_node.getElementsByTagName('improvement')
   if not improvement_nodes:
     return None, None
   if len(improvement_nodes) > 1:
     histogram_name = histogram_node.getAttribute('name')
     error = f'Histogram "{histogram_name}" has multiple <improvement> tags.'
     return None, error

   improvement_node = improvement_nodes[0]
   direction = improvement_node.getAttribute('direction')
   if (direction not in
       histogram_configuration_model.IMPROVEMENT_DIRECTION_VALID_VALUES):
     histogram_name = histogram_node.getAttribute('name')
     error = (
         f'Histogram "{histogram_name}" has an invalid direction "{direction}" '
         f'in its <improvement> tag.')
     return None, error

   return direction, None


 def _ExtractComponents(histogram):
   """Extracts component information from the given histogram element.

   Components are present when a histogram has a component tag, e.g.
   <component>UI&gt;Browser</component>. Components may also be present when an
   OWNERS file is given as a histogram owner, e.g. <owner>src/dir/OWNERS</owner>;
   in this case the component is extracted from adjacent DIR_METADATA files.
   See _ExtractComponentViaDirmd() in the following file for details:
   chromium/src/tools/metrics/histograms/expand_owners.py.

   Args:
     histogram: A DOM Element corresponding to a histogram.

   Returns:
     A list of the components associated with the histogram, e.g.
     ['UI>Browser>Spellcheck'].
   """
   component_nodes = histogram.getElementsByTagName('component')
   return [
       _GetTextFromChildNodes(component_node)
       for component_node in component_nodes
   ]


 def _ValidateDateString(date_str):
   """Checks if |date_str| matches 'YYYY-MM-DD'.

   Args:
     date_str: string

   Returns:
     True iff |date_str| matches 'YYYY-MM-DD' format.
   """
   try:
     _ = datetime.datetime.strptime(date_str, EXPIRY_DATE_PATTERN).date()
   except ValueError:
     return False
   return True

 def _ValidateMilestoneString(milestone_str):
   """Check if |milestone_str| matches 'M*'."""
   return EXPIRY_MILESTONE_RE.match(milestone_str) is not None

 def _ProcessBaseHistogramAttribute(node, histogram_entry):
   if node.hasAttribute('base'):
     is_base = node.getAttribute('base').lower() == 'true'
     histogram_entry['base'] = is_base
     if is_base and 'obsolete' not in histogram_entry:
       histogram_entry['obsolete'] = DEFAULT_BASE_HISTOGRAM_OBSOLETE_REASON

 # The following code represents several concepts as JSON objects
 #
 # Token: an analog of <token> tag, represented as a JSON object like:
 # {
 #   'key': 'token_key',
 #   'variants': [a list of Variant objects]
 # }
 #
 # Variant: an analog of <variant> tag, represented as a JSON object like:
 # {
 #   'name': 'variant_name',
 #   'summary': 'variant_summary',
 #   'obsolete': 'Obsolete text.',
 #   'owners': ['me@chromium.org', 'you@chromium.org']
 # }
 #
 # Variants: an analog of <variants> tag, represented as a JSON object like:
 # {
 #   'name: 'variants_name',
 #   'variants': [a list of Variant objects]
 # }


 def _ExtractTokens(histogram, variants_dict):
   """Extracts tokens and variants from the given histogram element.

   Args:
     histogram: A DOM Element corresponding to a histogram.
     variants_dict: A dictionary of variants extracted from the tree.

   Returns:
     A tuple where the first element is a list of extracted Tokens, and the
         second indicates if any errors were detected while extracting them.
   """
   tokens = []
   have_error = False
   histogram_name = histogram.getAttribute('name')

   for token_node in IterElementsWithTag(histogram, 'token', 1):
     token_key = token_node.getAttribute('key')
     if token_key in tokens:
       logging.error(
           "Histogram %s contains duplicate token key %s, please ensure token "
           "keys are unique." % (histogram_name, token_key))
       have_error = True
       continue

     token_key_format = '{' + token_key + '}'
     if token_key_format not in histogram_name:
       logging.error(
           "Histogram %s includes a token tag but the token key is not present "
           "in histogram name. Please insert the token key into the histogram "
           "name in order for the token to be added." % histogram_name)
       have_error = True
       continue

     token = dict(key=token_key)
     token['variants'] = []

     # If 'variants' attribute is set for the <token>, get the list of Variant
     # objects from from the |variants_dict|. Else, extract the <variant>
     # children nodes of the |token_node| as a list of Variant objects.
     if token_node.hasAttribute('variants'):
       variants_name = token_node.getAttribute('variants')
       variant_list = variants_dict.get(variants_name)
       if variant_list:
         token['variants'] = variant_list[:]
       else:
         logging.error(
             "The variants attribute %s of token key %s of histogram %s does "
             "not have a corresponding <variants> tag." %
             (variants_name, token_key, histogram_name))
         token['variants'] = []
         have_error = True
     # Inline and out-of-line variants can be combined.
     token['variants'].extend(_ExtractVariantNodes(token_node))

     tokens.append(token)

   return tokens, have_error


 def _ExtractVariantNodes(node):
   """Extracts the variants of a given node into a list of variant dictionaries.

   Args:
     node: A DOM element corresponding to <token> node

   Returns:
     A list of Variants.
   """
   variant_list = []
   for variant_node in IterElementsWithTag(node, 'variant', 1):
     name = variant_node.getAttribute('name')
     summary = variant_node.getAttribute('summary') if variant_node.hasAttribute(
         'summary') else name
     variant = dict(name=name, summary=summary)

     obsolete_text = _GetObsoleteReason(variant_node)
     if obsolete_text:
       variant['obsolete'] = obsolete_text

     owners, variant_has_owners = _ExtractOwners(variant_node)
     if variant_has_owners:
       variant['owners'] = owners

     variant_list.append(variant)

   return variant_list


 def _ExtractHistogramsFromXmlTree(tree, enums):
   """Extracts all <histogram> nodes in the tree into a dictionary."""

   # Process the histograms. The descriptions can include HTML tags.
   histograms = {}
   have_errors = False
   variants_dict, variants_errors = _ExtractVariantsFromXmlTree(tree)
   have_errors = have_errors or variants_errors

   last_name = None
   for histogram in IterElementsWithTag(tree, 'histogram'):
     name = histogram.getAttribute('name')
     if last_name is not None and name.lower() < last_name.lower():
       logging.error('Histograms %s and %s are not in alphabetical order',
                     last_name, name)
       have_errors = True
     last_name = name
     if name in histograms:
       logging.error('Duplicate histogram definition %s', name)
       have_errors = True
       continue
     histograms[name] = histogram_entry = {}

     # Handle expiry attribute.
     if histogram.hasAttribute('expires_after'):
       expiry_str = histogram.getAttribute('expires_after')
       if (expiry_str == "never" or _ValidateMilestoneString(expiry_str) or
           _ValidateDateString(expiry_str)):
         histogram_entry['expires_after'] = expiry_str
       else:
         logging.error(
             'Expiry of histogram %s does not match expected date format ("%s"),'
             ' milestone format (M*), or "never": found %s.', name,
             EXPIRY_DATE_PATTERN, expiry_str)
         have_errors = True
     else:
       logging.error(
           'Your histogram %s must have an expiry date. If you are marking a '
           'histogram as obsolete, please set the expiry date to the current '
           'date.', name)
       have_errors = True

     # Find <owner> tag.
     owners, has_owner = _ExtractOwners(histogram)
     if owners:
       histogram_entry['owners'] = owners

     # Find the <improvement> tag, if any.
     improvement_direction, improvement_error = _ExtractImprovementDirection(
         histogram)
     if improvement_direction:
       histogram_entry['improvement'] = improvement_direction
     if improvement_error:
       logging.error(improvement_error)
       have_errors = True

     # Find <component> tag.
     components = _ExtractComponents(histogram)
     if components:
       histogram_entry['components'] = components

     # Find <summary> tag.
     summary_nodes = list(IterElementsWithTag(histogram, 'summary'))

     if summary_nodes:
       histogram_entry['summary'] = _GetTextFromChildNodes(summary_nodes[0])
     else:
       histogram_entry['summary'] = 'TBD'

     # Find <obsolete> tag.
     obsolete_nodes = list(IterElementsWithTag(histogram, 'obsolete', 1))
     if obsolete_nodes:
       reason = _GetTextFromChildNodes(obsolete_nodes[0])
       histogram_entry['obsolete'] = reason

     # Non-obsolete histograms should provide a non-empty <summary>.
     if not obsolete_nodes and (not summary_nodes or
                                not histogram_entry['summary']):
       logging.error('histogram %s should provide a <summary>', name)
       have_errors = True

     # Non-obsolete histograms should specify <owner>s.
     if not obsolete_nodes and not has_owner:
       logging.error('histogram %s should specify <owner>s', name)
       have_errors = True

     # Histograms should have either units or enum.
     if (not histogram.hasAttribute('units') and
         not histogram.hasAttribute('enum')):
       logging.error('histogram %s should have either units or enum', name)
       have_errors = True

     # Histograms should not have both units and enum.
     if (histogram.hasAttribute('units') and
         histogram.hasAttribute('enum')):
       logging.error('histogram %s should not have both units and enum', name)
       have_errors = True

     # Handle units.
     if histogram.hasAttribute('units'):
       histogram_entry['units'] = histogram.getAttribute('units')

     # Handle enum types.
     if histogram.hasAttribute('enum'):
       enum_name = histogram.getAttribute('enum')
       if enum_name not in enums:
         logging.error('Unknown enum %s in histogram %s', enum_name, name)
         have_errors = True
       else:
         histogram_entry['enum'] = enums[enum_name]

     # Find <token> tag.
     tokens, have_token_errors = _ExtractTokens(histogram, variants_dict)
     have_errors = have_errors or have_token_errors
     if tokens:
       histogram_entry['tokens'] = tokens

     _ProcessBaseHistogramAttribute(histogram, histogram_entry)

   return histograms, have_errors


 def _ExtractVariantsFromXmlTree(tree):
   """Extracts all <variants> nodes in the tree into a dictionary.

   Args:
     tree: A DOM Element containing histograms and variants nodes.

   Returns:
     A tuple where the first element is a dictionary of extracted Variants, where
         the key is the variants name and the value is a list of Variant objects.
         The second element indicates if any errors were detected while
         extracting them.
   """
   variants_dict = {}
   have_errors = False
   for variants_node in IterElementsWithTag(tree, 'variants'):
     variants_name = variants_node.getAttribute('name')
     if variants_name in variants_dict:
       logging.error('Duplicate variants definition %s', variants_name)
       have_errors = True
       continue

     variants_dict[variants_name] = _ExtractVariantNodes(variants_node)

   return variants_dict, have_errors


 def _GetObsoleteReason(node):
   """If the node's histogram is obsolete, returns a string explanation.

   Otherwise, returns None.

   Args:
     node: A DOM Element associated with a histogram.
   """
   for child in node.childNodes:
     if child.localName == 'obsolete':
       # There can be at most 1 obsolete element per node.
       return _GetTextFromChildNodes(child)
   return None


 def _UpdateHistogramsWithSuffixes(tree, histograms):
   """Processes <histogram_suffixes> tags and combines with affected histograms.

   The histograms dictionary will be updated in-place by adding new histograms
   created by combining histograms themselves with histogram_suffixes targeting
   these histograms.

   Args:
     tree: XML dom tree.
     histograms: a dictionary of histograms previously extracted from the tree;

   Returns:
     True if any errors were found.
   """
   have_errors = False

   histogram_suffix_tag = 'histogram_suffixes'
   suffix_tag = 'suffix'
   with_tag = 'with-suffix'

   # Verify order of histogram_suffixes fields first.
   last_name = None

   for histogram_suffixes in IterElementsWithTag(
       tree, histogram_suffix_tag, depth=1):
     name = histogram_suffixes.getAttribute('name')
     if last_name is not None and name.lower() < last_name.lower():
       logging.error('histogram_suffixes %s and %s are not in alphabetical '
                     'order', last_name, name)
       have_errors = True
     last_name = name

   # histogram_suffixes can depend on other histogram_suffixes, so we need to be
   # careful. Make a temporary copy of the list of histogram_suffixes to use as a
   # queue. histogram_suffixes whose dependencies have not yet been processed
   # will get relegated to the back of the queue to be processed later.
   reprocess_queue = []

   def GenerateHistogramSuffixes():
     for f in IterElementsWithTag(tree, histogram_suffix_tag):
       yield 0, f
     for r, f in reprocess_queue:
       yield r, f

   for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes():
     # Check dependencies first.
     dependencies_valid = True
     affected_histograms = list(IterElementsWithTag(
         histogram_suffixes, 'affected-histogram', 1))
     for affected_histogram in affected_histograms:
       histogram_name = affected_histogram.getAttribute('name')
       if histogram_name not in histograms:
         # Base histogram is missing.
         dependencies_valid = False
         missing_dependency = histogram_name
         break
     if not dependencies_valid:
       if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH:
         reprocess_queue.append((reprocess_count + 1, histogram_suffixes))
         continue
       else:
         logging.error('histogram_suffixes %s is missing its dependency %s',
                       histogram_suffixes.getAttribute('name'),
                       missing_dependency)
         have_errors = True
         continue

     # If the suffix group has an obsolete tag, all suffixes it generates inherit
     # its reason.
     group_obsolete_reason = _GetObsoleteReason(histogram_suffixes)

     name = histogram_suffixes.getAttribute('name')
     suffix_nodes = list(IterElementsWithTag(histogram_suffixes, suffix_tag, 1))
     suffix_labels = {}
     for suffix in suffix_nodes:
       suffix_name = suffix.getAttribute('name')
       if not suffix.hasAttribute('label'):
         logging.error('suffix %s in histogram_suffixes %s should have a label',
                       suffix_name, name)
         have_errors = True
       suffix_labels[suffix_name] = suffix.getAttribute('label')
     # Find owners list under current histogram_suffixes tag.
     owners, _ = _ExtractOwners(histogram_suffixes)

     last_histogram_name = None
     for affected_histogram in affected_histograms:
       histogram_name = affected_histogram.getAttribute('name')
       if (last_histogram_name is not None and
           histogram_name.lower() < last_histogram_name.lower()):
         logging.error('Affected histograms %s and %s of histogram_suffixes %s '
                       'are not in alphabetical order', last_histogram_name,
                       histogram_name, name)
         have_errors = True
       last_histogram_name = histogram_name
       with_suffixes = list(IterElementsWithTag(affected_histogram, with_tag, 1))
       if with_suffixes:
         suffixes_to_add = with_suffixes
       else:
         suffixes_to_add = suffix_nodes
       for suffix in suffixes_to_add:
         suffix_name = suffix.getAttribute('name')
         try:
           new_histogram_name = _ExpandHistogramNameWithSuffixes(
               suffix_name, histogram_name, histogram_suffixes)
           if new_histogram_name != histogram_name:
             new_histogram = copy.deepcopy(histograms[histogram_name])
             # Do not copy forward base histogram state to suffixed
             # histograms. Any suffixed histograms that wish to remain base
             # histograms must explicitly re-declare themselves as base
             # histograms.
             if new_histogram.get('base', False):
               del new_histogram['base']
               if (new_histogram.get(
                   'obsolete', '') == DEFAULT_BASE_HISTOGRAM_OBSOLETE_REASON):
                 del new_histogram['obsolete']
             histograms[new_histogram_name] = new_histogram

           suffix_label = suffix_labels.get(suffix_name, '')

           histogram_entry = histograms[new_histogram_name]

           # If no owners are added for this histogram-suffixes, it inherits the
           # owners of its parents.
           if owners:
             histogram_entry['owners'] = owners

           # If a suffix has an obsolete node, it's marked as obsolete for the
           # specified reason, overwriting its group's obsoletion reason if the
           # group itself was obsolete as well.
           obsolete_reason = _GetObsoleteReason(suffix)
           if not obsolete_reason:
             obsolete_reason = _GetObsoleteReason(affected_histogram)
           if not obsolete_reason:
             obsolete_reason = group_obsolete_reason

           # If the suffix has an obsolete tag, all histograms it generates
           # inherit it.
           if obsolete_reason:
             histogram_entry['obsolete'] = obsolete_reason

           _ProcessBaseHistogramAttribute(suffix, histogram_entry)

         except Error:
           have_errors = True

   return have_errors


 class TokenAssignment(object):
   """Assignment of a Variant for each Token of histogram pattern.

   Attributes:
     pairings: A token_name to Variant map.
   """

   def __init__(self, pairings):
     self.pairings = pairings


 def _GetTokenAssignments(tokens):
   """Get all possible TokenAssignments for the listed tokens.

   Args:
     tokens: The list of Tokens to create assignments for.

   Returns:
     A list of TokenAssignments.
   """
   token_keys = [token['key'] for token in tokens]
   token_variants = [token['variants'] for token in tokens]

   return [
       TokenAssignment(pairings=dict(zip(token_keys, selected_variants)))
       for selected_variants in itertools.product(*token_variants)
   ]


 def _GenerateNewHistogramsFromTokens(histogram_name, histograms_dict,
                                      new_histograms_dict):
   """For a histogram with tokens, generates new histograms and adds to dict.

   Args:
     histogram_name: The name of the histogram.
     histograms_dict: The dictionary of all histograms extracted from the tree.
     new_histograms_dict: The dictionary of histograms to add newly generated
         histograms to.

   Returns:
     A boolean that is True if a generated histogram name already exists in the
         |new_histograms_dict|.
   """
   have_error = False
   histogram_node = histograms_dict[histogram_name]
   summary_text = histogram_node['summary']

   # |token_assignments| contains all the cross-product combinations of token
   # variants, representing all the possible histogram names that could be
   # generated.
   token_assignments = _GetTokenAssignments(histogram_node['tokens'])

   # Each |token_assignment| contains one of the cross-product combinations and
   # corresponds to one new generated histogram.
   for token_assignment in token_assignments:
     new_obsolete_reason = ''
     new_owners = []
     # Dictionaries of pairings used for string formatting of histogram name and
     # summary.
     token_name_pairings = {}
     token_summary_pairings = {}

     for token_key, variant in token_assignment.pairings.items():
       token_name_pairings[token_key] = variant['name']
       token_summary_pairings[token_key] = variant['summary']

       # If a variant has an obsolete reason, the new reason overwrites the
       # obsolete reason of the original histogram.
       if 'obsolete' in variant:
         new_obsolete_reason = variant['obsolete']

       # If a variant has owner(s), append to |new_owners|, overwriting the
       # owners of the original histogram.
       if 'owners' in variant:
         new_owners += variant['owners']

     # Replace token in histogram name with variant name.
     new_histogram_name = histogram_name.format(**token_name_pairings)
     # Replace token in summary with variant summary.
     new_summary_text = summary_text.format(**token_summary_pairings)

     if new_histogram_name in new_histograms_dict:
       logging.error(
           "Duplicate histogram name %s generated. Please remove identical "
           "variants in different tokens in %s." %
           (new_histogram_name, histogram_name))
       have_error = True
       continue

     new_histogram_node = dict(histogram_node, summary=new_summary_text)
     # Do not copy the <token> nodes to the generated histograms.
     del new_histogram_node['tokens']

     if new_obsolete_reason:
       new_histogram_node['obsolete'] = new_obsolete_reason

     if new_owners:
       new_histogram_node['owners'] = new_owners

     new_histograms_dict[new_histogram_name] = new_histogram_node

   return have_error


 def _UpdateHistogramsWithTokens(histograms_dict):
   """Processes histograms and combines with variants of tokens.

   Args:
     histograms_dict: A dictionary of all the histograms extracted from the tree.

   Returns:
     A tuple where the first element is the replacement histograms dictionary,
         containing the original histograms without tokens and histograms
         whose tokens are replaced by newly variant combinations.
         The second element is a boolean is there is error.
   """
   have_error = False
   # Create new dict instead of modify in place because newly generated
   # histograms will be added when iterating through |histograms_dict|.
   new_histograms_dict = {}
   for histogram_name, histogram_node in histograms_dict.items():
     if 'tokens' in histogram_node:
       have_error = have_error or _GenerateNewHistogramsFromTokens(
           histogram_name, histograms_dict, new_histograms_dict)
     # For histograms without tokens, copy to new histograms dict.
     else:
       new_histograms_dict[histogram_name] = histogram_node

   return new_histograms_dict, have_error


 def _GetTagSubTree(tree, tag, depth):
   """Returns sub tree with tag element as a root.

   When no element with tag name is found or there are many of them
   original tree is returned.

   Args:
     tree: XML dom tree.
     tag: Element's tag name.
     depth: Defines how deep in the tree function should search for a match.

   Returns:
     xml.dom.minidom.Node: Sub tree (matching criteria) or original one.
   """
   entries = list(IterElementsWithTag(tree, tag, depth))
   if len(entries) == 1:
     tree = entries[0]
   return tree


 def ExtractHistogramsFromDom(tree):
   """Computes the histogram names and descriptions from the XML representation.

   Args:
     tree: A DOM tree of XML content.

   Returns:
     a tuple of (histograms, status) where histograms is a dictionary mapping
     histogram names to dictionaries containing histogram descriptions and status
     is a boolean indicating if errros were encountered in processing.
   """
   _NormalizeAllAttributeValues(tree)

   enums_tree = _GetTagSubTree(tree, 'enums', 2)
   histograms_tree = _GetTagSubTree(tree, 'histograms', 2)
   histogram_suffixes_tree = _GetTagSubTree(tree, 'histogram_suffixes_list', 2)
   enums, enum_errors = ExtractEnumsFromXmlTree(enums_tree)
   histograms, histogram_errors = _ExtractHistogramsFromXmlTree(
       histograms_tree, enums)
   histograms, update_token_errors = _UpdateHistogramsWithTokens(histograms)
   update_suffix_errors = _UpdateHistogramsWithSuffixes(histogram_suffixes_tree,
                                                        histograms)

   return histograms, (enum_errors or histogram_errors or update_suffix_errors
                       or update_token_errors)


 def ExtractHistograms(filename):
   """Loads histogram definitions from a disk file.

   Args:
     filename: a file path to load data from.

   Returns:
     a dictionary of histogram descriptions.

   Raises:
     Error: if the file is not well-formatted.
   """
   with open(filename, 'r') as f:
     tree = xml.dom.minidom.parse(f)
     histograms, had_errors = ExtractHistogramsFromDom(tree)
     if had_errors:
       logging.error('Error parsing %s', filename)
       raise Error()
     return histograms


 def ExtractNames(histograms):
   return sorted(histograms.keys())


 def ExtractObsoleteNames(histograms):
   return sorted(
       filter(lambda name: histograms[name].get("obsolete"), histograms.keys()))