blob: 2e82e8e7d928b4869e9d5078f2f2c26d6d73d8ea [file] [log] [blame]
# Copyright 2019 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Functions for extracting emails and components from OWNERS files."""
import extract_histograms
import json
import os
import subprocess
import sys
import re
_EMAIL_PATTERN = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
_OWNERS = 'OWNERS'
# Three '..' are used because calling dirname() yields the path to this
# module's directory, histograms, and the directory above tools, which may or
# may not be src depending on the machine running the code, is up three
# directory levels from the histograms directory.
DIR_ABOVE_TOOLS = [os.path.dirname(__file__), '..', '..', '..']
SRC = 'src/'
class Error(Exception):
pass
def _AddTextNodeWithNewLineAndIndent(histogram, node_to_insert_before):
"""Creates and adds a DOM Text Node before the given node in the histogram.
Args:
histogram: The histogram node in which to insert a text node.
node_to_insert_before: A node before which to add the text node.
"""
histogram.insertBefore(
histogram.ownerDocument.createTextNode('\n '),
node_to_insert_before)
def _IsValidPrimaryOwnerEmail(owner_tag_text):
"""Returns true if |owner_tag_text| is a valid primary owner.
A valid primary owner is an individual (not a team) with a Chromium or Google
email address.
Args:
owner_tag_text: The text in an owner tag
"""
if '-' in owner_tag_text: # Check whether it's a team email address.
return False
return (owner_tag_text.endswith('@chromium.org')
or owner_tag_text.endswith('@google.com'))
def _IsEmailOrPlaceholder(is_first_owner, owner_tag_text, histogram_name,
is_obsolete):
"""Returns true if owner_tag_text is an email or the placeholder text.
Also, for histograms that are not obsolete, verifies that a histogram's first
owner tag contains a valid primary owner.
Args:
is_first_owner: True if a histogram's first owner tag is being checked.
owner_tag_text: The text of the owner tag being checked, e.g.
'julie@google.com' or 'src/ios/net/cookies/OWNERS'.
histogram_name: The string name of the histogram.
is_obsolete: True if the histogram is obsolete.
Raises:
Error: Raised if (A) the text is from the first owner tag, (B) the histogram
is not obsolete, and (C) the text is not a valid primary owner.
"""
is_email = re.match(_EMAIL_PATTERN, owner_tag_text)
is_placeholder = owner_tag_text == extract_histograms.OWNER_PLACEHOLDER
should_check_owner_email = (is_first_owner and not is_obsolete
and not is_placeholder)
if should_check_owner_email and not _IsValidPrimaryOwnerEmail(owner_tag_text):
raise Error(
'The histogram {} must have a valid primary owner, i.e. a Googler '
'with an @google.com or @chromium.org email address. Please '
'manually update the histogram with a valid primary owner.'.format(
histogram_name))
return is_email or is_placeholder
def _IsWellFormattedFilePath(path):
"""Returns True if the given path begins with 'src/' and ends with 'OWNERS'.
Args:
path: The path to an OWNERS file, e.g. 'src/gin/OWNERS'.
"""
return path.startswith(SRC) and path.endswith(_OWNERS)
def _GetHigherLevelOwnersFilePath(path):
"""Returns a path to an OWNERS file at a higher level than the given path.
Returns an empty string if an OWNERS file path in a higher level directory
cannot be found.
Suppose the given path is //stuff/chromium/src/jam/tea/milk/OWNERS. The
path //stuff/chromium/src/jam/tea/OWNERS will then be generated, and if it
exists, it will be returned. If not, the path //stuff/chromium/src/jam/OWNERS
will be generated, and if it exists, it will be returned.
Args:
path: The path to an OWNERS file.
"""
# The highest directory that is searched for component information is one
# directory lower than the directory above tools. Depending on the machine
# running this code, the directory above tools may or may not be src.
path_to_limiting_dir = os.path.abspath(os.path.join(*DIR_ABOVE_TOOLS))
limiting_dir = path_to_limiting_dir.split(os.sep)[-1]
owners_file_limit = (os.sep).join([limiting_dir, _OWNERS])
if path.endswith(owners_file_limit):
return ''
parent_directory = os.path.dirname(os.path.dirname(path))
parent_owners_file_path = os.path.join(parent_directory, _OWNERS)
if (os.path.exists(parent_owners_file_path) and
os.path.isfile(parent_owners_file_path)):
return parent_owners_file_path
return _GetHigherLevelOwnersFilePath(parent_owners_file_path)
def _GetOwnersFilePath(path):
"""Returns an absolute path that can be opened.
Args:
path: A well-formatted path to an OWNERS file, e.g. 'src/courgette/OWNERS'.
Raises:
Error: Raised if the given path is not well-formatted.
"""
if _IsWellFormattedFilePath(path):
# _SRC is removed because the file system on the machine running the code
# may not have a(n) src directory.
path_without_src = path[len(SRC):]
return os.path.abspath(
os.path.join(*(DIR_ABOVE_TOOLS + path_without_src.split(os.sep))))
raise Error(
'The given path {} is not well-formatted. Well-formatted paths begin '
'with "src/" and end with "OWNERS"'.format(path))
def _ExtractEmailAddressesFromOWNERS(path, depth=0):
"""Returns a list of email addresses in the given file.
Args:
path: The path to an OWNERS file.
depth: The depth of the recursion, which is used to fail fast in the rare
case that the OWNERS file path results in a loop.
Raises:
Error: Raised in two situations. First, raised if (A) the OWNERS file with
the given path has a file directive and (B) the OWNERS file indicated by
the directive does not exist. Second, raised if the depth reaches a
certain limit.
"""
# It is unlikely that any chain of OWNERS files will exceed 10 redirections
# via file:// directives.
limit = 10
if (depth > limit):
raise Error('_ExtractEmailAddressesFromOWNERS has been called {} times. The'
' path {} may be part of an OWNERS loop.'.format(limit, path))
directive = 'file://'
email_pattern = re.compile(_EMAIL_PATTERN)
extracted_emails = []
with open(path, 'r') as owners_file:
for line in [line.lstrip()
for line in owners_file.read().splitlines() if line]:
index = line.find(' ')
first_word = line[:index] if index != -1 else line
if email_pattern.match(first_word):
extracted_emails.append(first_word)
elif first_word.startswith(directive):
next_path = _GetOwnersFilePath(
os.path.join(SRC, first_word[len(directive):]))
if os.path.exists(next_path) and os.path.isfile(next_path):
extracted_emails.extend(
_ExtractEmailAddressesFromOWNERS(next_path, depth + 1))
else:
raise Error('The path derived from {} does not exist. '
'Derived path: {}'.format(first_word, next_path))
return extracted_emails
def _ComponentFromDirmd(json_data, subpath):
"""Returns the component for a subpath based on dirmd output.
Returns an empty string if no component can be extracted
Args:
json_data: json object output from dirmd.
subpath: The subpath for the directory being queried, e.g. src/storage'.
"""
# If no component exists for the directory, or if METADATA migration is
# incomplete there will be no component information.
return json_data.get('dirs', {}).get(subpath,
{}).get('monorail',
{}).get('component', '')
# Memoize decorator from: https://stackoverflow.com/a/1988826
# TODO(asvitkine): Replace with @functools.cache once we're on Python 3.9+.
class Memoize:
def __init__(self, f):
self.f = f
self.memo = {}
def __call__(self, *args):
if not args in self.memo:
self.memo[args] = self.f(*args)
return self.memo[args]
@Memoize
def _ExtractComponentViaDirmd(path):
"""Returns the component for monorail issues at the given path.
Examples are 'Blink>Storage>FileAPI' and 'UI'.
Uses dirmd in third_party/depot_tools to parse metadata and walk parent
directories up to the top level of the repo.
Returns an empty string if no component can be extracted.
Args:
path: The path to a directory to query, e.g. 'src/storage'.
"""
# Verify that the paths are absolute and the root is a parent of the
# passed in path.
root_path = os.path.abspath(os.path.join(*DIR_ABOVE_TOOLS))
path = os.path.abspath(path)
if not path.startswith(root_path):
raise Error('Path {} is not a subpath of the root path {}.'.format(
path, root_path))
subpath = path[len(root_path) + 1:] or '.' # E.g. content/public.
dirmd_exe = 'dirmd'
if sys.platform == 'win32':
dirmd_exe = 'dirmd.bat'
dirmd_path = os.path.join(*(DIR_ABOVE_TOOLS +
['third_party', 'depot_tools', dirmd_exe]))
dirmd_command = [dirmd_path, 'read', '-form', 'sparse', root_path, path]
dirmd = subprocess.Popen(
dirmd_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if dirmd.wait() != 0:
raise Error('dirmd failed: "' + ' '.join(dirmd_command) + '": ' +
dirmd.stderr.read().decode('utf-8'))
json_out = json.load(dirmd.stdout)
# On Windows, dirmd output still uses Unix path separators.
if sys.platform == 'win32':
subpath = subpath.replace('\\', '/')
return _ComponentFromDirmd(json_out, subpath)
def _MakeOwners(document, path, emails_with_dom_elements):
"""Makes DOM Elements for owners and returns the elements.
The owners are extracted from the OWNERS file with the given path and
deduped using the given set emails_with_dom_elements. This set has email
addresses that were explicitly listed as histogram owners, e.g.
<owner>liz@chromium.org</owner>. If a histogram has multiple OWNERS file
paths, e.g. <owner>src/cc/OWNERS</owner> and <owner>src/ui/OWNERS</owner>,
then the given set also contains any email addresses that have already been
extracted from OWNERS files.
New owners that are extracted from the given file are also added to
emails_with_dom_elements.
Args:
document: The Document to which the new owners elements will belong.
path: The absolute path to an OWNERS file.
emails_with_dom_elements: The set of email addresses that already have
corresponding DOM Elements.
Returns:
A collection of DOM Elements made from owners in the given OWNERS file.
"""
owner_elements = []
# TODO(crbug.com/987709): An OWNERS file API would be ideal.
emails_from_owners_file = _ExtractEmailAddressesFromOWNERS(path)
if not emails_from_owners_file:
raise Error('No emails could be derived from {}.'.format(path))
# A list is used to respect the order of email addresses in the OWNERS file.
deduped_emails_from_owners_file = []
for email in emails_from_owners_file:
if email not in emails_with_dom_elements:
deduped_emails_from_owners_file.append(email)
emails_with_dom_elements.add(email)
for email in deduped_emails_from_owners_file:
owner_element = document.createElement('owner')
owner_element.appendChild(document.createTextNode(email))
owner_elements.append(owner_element)
return owner_elements
def _UpdateHistogramOwners(histogram, owner_to_replace, owners_to_add):
"""Replaces |owner_to_replace| with |owners_to_add| for the given histogram.
Args:
histogram: The DOM Element to update.
owner: The DOM Element to be replaced. This is a child node of histogram,
and its text is a file path to an OWNERS file, e.g. 'src/mojo/OWNERS'
owners_to_add: A collection of DOM Elements with which to replace
owner_to_replace.
"""
node_after_owners_file = owner_to_replace.nextSibling
replacement_done = False
for owner_to_add in owners_to_add:
if not replacement_done:
histogram.replaceChild(owner_to_add, owner_to_replace)
replacement_done = True
else:
_AddTextNodeWithNewLineAndIndent(histogram, node_after_owners_file)
histogram.insertBefore(owner_to_add, node_after_owners_file)
def AddHistogramComponent(histogram, component):
"""Makes a DOM Element for the component and adds it to the given histogram.
Args:
histogram: The DOM Element to update.
component: A string component to add, e.g. 'Internals>Network' or 'Build'.
"""
node_to_insert_before = histogram.lastChild
_AddTextNodeWithNewLineAndIndent(histogram, node_to_insert_before)
document = histogram.ownerDocument
component_element = document.createElement('component')
component_element.appendChild(document.createTextNode(component))
histogram.insertBefore(component_element, node_to_insert_before)
def ExpandHistogramsOWNERS(histograms):
"""Updates the given DOM Element's descendants, if necessary.
When a histogram has an owner node whose text is an OWNERS file path rather
than an email address, e.g. <owner>src/base/android/OWNERS</owner> instead of
<owner>joy@chromium.org</owner>, then (A) the histogram's owners need to be
updated and (B) a component may be added.
If the text of an owner node is an OWNERS file path, then this node is
replaced by owner nodes for the emails derived from the OWNERS file. If a
component, e.g. UI>GFX, can be derived from the OWNERS file or an OWNERS file
in a higher-level directory, then a component tag will be added to the
histogram, e.g. <component>UI&gt;GFX</component>.
Args:
histograms: The DOM Element whose descendants may be updated.
Raises:
Error: Raised if the OWNERS file with the given path does not exist.
"""
email_pattern = re.compile(_EMAIL_PATTERN)
iter_matches = extract_histograms.IterElementsWithTag
for histogram in iter_matches(histograms, 'histogram'):
owners = [owner for owner in iter_matches(histogram, 'owner', 1)]
# owner is a DOM Element with a single child, which is a DOM Text Node.
emails_with_dom_elements = set([
owner.childNodes[0].data
for owner in owners
if email_pattern.match(owner.childNodes[0].data)])
# component is a DOM Element with a single child, which is a DOM Text Node.
components_with_dom_elements = set([
extract_histograms.NormalizeString(component.childNodes[0].data)
for component in iter_matches(histogram, 'component', 1)])
for index, owner in enumerate(owners):
owner_text = owner.childNodes[0].data.strip()
name = histogram.getAttribute('name')
obsolete_tags = [tag for tag in iter_matches(histogram, 'obsolete', 1)]
is_obsolete = len(obsolete_tags) > 0
if _IsEmailOrPlaceholder(index == 0, owner_text, name, is_obsolete):
continue
path = _GetOwnersFilePath(owner_text)
if not os.path.exists(path) or not os.path.isfile(path):
raise Error('The file at {} does not exist.'.format(path))
owners_to_add = _MakeOwners(
owner.ownerDocument, path, emails_with_dom_elements)
if not owners_to_add:
continue
_UpdateHistogramOwners(histogram, owner, owners_to_add)
component = _ExtractComponentViaDirmd(os.path.dirname(path))
if component and component not in components_with_dom_elements:
components_with_dom_elements.add(component)
AddHistogramComponent(histogram, component)