src/build/lacros/lacros_resource_sizes.py - cobalt - Git at Google

 #!/usr/bin/env python
 # Copyright 2020 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Reports binary size metrics for LaCrOS build artifacts.

 More information at //docs/speed/binary_size/metrics.md.
 """

 import argparse
 import collections
 import contextlib
 import json
 import logging
 import os
 import subprocess
 import sys
 import tempfile


 @contextlib.contextmanager
 def _SysPath(path):
   """Library import context that temporarily appends |path| to |sys.path|."""
   if path and path not in sys.path:
     sys.path.insert(0, path)
   else:
     path = None  # Indicates that |sys.path| is not modified.
   try:
     yield
   finally:
     if path:
       sys.path.pop(0)


 DIR_SOURCE_ROOT = os.environ.get(
     'CHECKOUT_SOURCE_ROOT',
     os.path.abspath(
         os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)))

 BUILD_COMMON_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util', 'lib',
                                  'common')

 TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult',
                             'tracing')

 EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party',
                              'eu-strip', 'bin', 'eu-strip')

 with _SysPath(BUILD_COMMON_PATH):
   import perf_tests_results_helper  # pylint: disable=import-error

 with _SysPath(TRACING_PATH):
   from tracing.value import convert_chart_json  # pylint: disable=import-error

 _BASE_CHART = {
     'format_version': '0.1',
     'benchmark_name': 'resource_sizes',
     'benchmark_description': 'LaCrOS resource size information.',
     'trace_rerun_options': [],
     'charts': {}
 }

 _KEY_RAW = 'raw'
 _KEY_GZIPPED = 'gzipped'
 _KEY_STRIPPED = 'stripped'
 _KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped'


 class _Group:
   """A group of build artifacts whose file sizes are summed and tracked.

   Build artifacts for size tracking fall under these categories:
   * File: A single file.
   * Group: A collection of files.
   * Dir: All files under a directory.

   Attributes:
     paths: A list of files or directories to be tracked together.
     title: The display name of the group.
     track_stripped: Whether to also track summed stripped ELF sizes.
     track_compressed: Whether to also track summed compressed sizes.
   """

   def __init__(self, paths, title, track_stripped=False,
                track_compressed=False):
     self.paths = paths
     self.title = title
     self.track_stripped = track_stripped
     self.track_compressed = track_compressed


 # List of disjoint build artifact groups for size tracking. This list should be
 # synched with lacros-amd64-generic-binary-size-rel builder contents (specified
 # in # //infra/config/subprojects/chromium/ci.star) and
 # chromeos-amd64-generic-lacros-internal builder (specified in src-internal).
 _TRACKED_GROUPS = [
     _Group(paths=['chrome'],
            title='File: chrome',
            track_stripped=True,
            track_compressed=True),
     _Group(paths=['crashpad_handler'], title='File: crashpad_handler'),
     _Group(paths=['icudtl.dat'], title='File: icudtl.dat'),
     _Group(paths=['nacl_helper'], title='File: nacl_helper'),
     _Group(paths=['nacl_irt_x86_64.nexe'], title='File: nacl_irt_x86_64.nexe'),
     _Group(paths=['resources.pak'], title='File: resources.pak'),
     _Group(paths=[
         'chrome_100_percent.pak', 'chrome_200_percent.pak', 'headless_lib.pak'
     ],
            title='Group: Other PAKs'),
     _Group(paths=['snapshot_blob.bin'], title='Group: Misc'),
     _Group(paths=['locales/'], title='Dir: locales'),
     _Group(paths=['swiftshader/'], title='Dir: swiftshader'),
     _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'),
 ]


 def _visit_paths(base_dir, paths):
   """Itemizes files specified by a list of paths.

   Args:
     base_dir: Base directory for all elements in |paths|.
     paths: A list of filenames or directory names to specify files whose sizes
       to be counted. Directories are recursed. There's no de-duping effort.
       Non-existing files or directories are ignored (with warning message).
   """
   for path in paths:
     full_path = os.path.join(base_dir, path)
     if os.path.exists(full_path):
       if os.path.isdir(full_path):
         for dirpath, _, filenames in os.walk(full_path):
           for filename in filenames:
             yield os.path.join(dirpath, filename)
       else:  # Assume is file.
         yield full_path
     else:
       logging.critical('Not found: %s', path)


 def _is_probably_elf(filename):
   """Heuristically decides whether |filename| is ELF via magic signature."""
   with open(filename, 'rb') as fh:
     return fh.read(4) == '\x7FELF'


 def _is_unstrippable_elf(filename):
   """Identifies known-unstrippable ELF files to denoise the system."""
   return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so')


 def _get_filesize(filename):
   """Returns the size of a file, or 0 if file is not found."""
   try:
     return os.path.getsize(filename)
   except OSError:
     logging.critical('Failed to get size: %s', filename)
   return 0


 def _get_gzipped_filesize(filename):
   """Returns the gzipped size of a file, or 0 if file is not found."""
   BUFFER_SIZE = 65536
   if not os.path.isfile(filename):
     return 0
   try:
     # Call gzip externally instead of using gzip package since it's > 2x faster.
     cmd = ['gzip', '-c', filename]
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
     # Manually counting bytes instead of using len(p.communicate()[0]) to avoid
     # buffering the entire compressed data (can be ~100 MB).
     ret = 0
     while True:
       chunk = len(p.stdout.read(BUFFER_SIZE))
       if chunk == 0:
         break
       ret += chunk
     return ret
   except OSError:
     logging.critical('Failed to get gzipped size: %s', filename)
   return 0


 def _get_catagorized_filesizes(filename):
   """Measures |filename| sizes under various transforms.

   Returns: A Counter (keyed by _Key_* constants) that stores measured sizes.
   """
   sizes = collections.Counter()
   sizes[_KEY_RAW] = _get_filesize(filename)
   sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename)

   # Pre-assign values for non-ELF, or in case of failure for ELF.
   sizes[_KEY_STRIPPED] = sizes[_KEY_RAW]
   sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED]

   if _is_probably_elf(filename) and not _is_unstrippable_elf(filename):
     try:
       fd, temp_file = tempfile.mkstemp()
       os.close(fd)
       cmd = [EU_STRIP_PATH, filename, '-o', temp_file]
       subprocess.check_output(cmd)
       sizes[_KEY_STRIPPED] = _get_filesize(temp_file)
       sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file)
       if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]:
         # This weird case has been observed for libwidevinecdm.so.
         logging.critical('Stripping made things worse for %s' % filename)
     except subprocess.CalledProcessError:
       logging.critical('Failed to strip file: %s' % filename)
     finally:
       os.unlink(temp_file)
   return sizes


 def _dump_chart_json(output_dir, chartjson):
   """Writes chart histogram to JSON files.

   Output files:
     results-chart.json contains the chart JSON.
     perf_results.json contains histogram JSON for Catapult.

   Args:
     output_dir: Directory to place the JSON files.
     chartjson: Source JSON data for output files.
   """
   results_path = os.path.join(output_dir, 'results-chart.json')
   logging.critical('Dumping chartjson to %s', results_path)
   with open(results_path, 'w') as json_file:
     json.dump(chartjson, json_file, indent=2)

   # We would ideally generate a histogram set directly instead of generating
   # chartjson then converting. However, perf_tests_results_helper is in
   # //build, which doesn't seem to have any precedent for depending on
   # anything in Catapult. This can probably be fixed, but since this doesn't
   # need to be super fast or anything, converting is a good enough solution
   # for the time being.
   histogram_result = convert_chart_json.ConvertChartJson(results_path)
   if histogram_result.returncode != 0:
     raise Exception('chartjson conversion failed with error: ' +
                     histogram_result.stdout)

   histogram_path = os.path.join(output_dir, 'perf_results.json')
   logging.critical('Dumping histograms to %s', histogram_path)
   with open(histogram_path, 'w') as json_file:
     json_file.write(histogram_result.stdout)


 def _run_resource_sizes(args):
   """Main flow to extract and output size data."""
   chartjson = _BASE_CHART.copy()
   report_func = perf_tests_results_helper.ReportPerfResult
   total_sizes = collections.Counter()

   def report_sizes(sizes, title, track_stripped, track_compressed):
     report_func(chart_data=chartjson,
                 graph_title=title,
                 trace_title='size',
                 value=sizes[_KEY_RAW],
                 units='bytes')

     if track_stripped:
       report_func(chart_data=chartjson,
                   graph_title=title + ' (Stripped)',
                   trace_title='size',
                   value=sizes[_KEY_STRIPPED],
                   units='bytes')

     if track_compressed:
       report_func(chart_data=chartjson,
                   graph_title=title + ' (Gzipped)',
                   trace_title='size',
                   value=sizes[_KEY_GZIPPED],
                   units='bytes')

     if track_stripped and track_compressed:
       report_func(chart_data=chartjson,
                   graph_title=title + ' (Stripped, Gzipped)',
                   trace_title='size',
                   value=sizes[_KEY_STRIPPED_GZIPPED],
                   units='bytes')

   for g in _TRACKED_GROUPS:
     sizes = sum(
         map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)),
         collections.Counter())
     report_sizes(sizes, g.title, g.track_stripped, g.track_compressed)

     # Total compressed size is summed over individual compressed sizes, instead
     # of concatanating first, then compress everything. This is done for
     # simplicity. It also gives a conservative size estimate (assuming file
     # metadata and overheads are negligible).
     total_sizes += sizes

   report_sizes(total_sizes, 'Total', True, True)

   _dump_chart_json(args.output_dir, chartjson)


 def main():
   """Parses arguments and runs high level flows."""
   argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.')

   argparser.add_argument('--chromium-output-directory',
                          dest='out_dir',
                          required=True,
                          type=os.path.realpath,
                          help='Location of the build artifacts.')

   output_group = argparser.add_mutually_exclusive_group()

   output_group.add_argument('--output-dir',
                             default='.',
                             help='Directory to save chartjson to.')

   # Accepted to conform to the isolated script interface, but ignored.
   argparser.add_argument('--isolated-script-test-filter',
                          help=argparse.SUPPRESS)
   argparser.add_argument('--isolated-script-test-perf-output',
                          type=os.path.realpath,
                          help=argparse.SUPPRESS)

   output_group.add_argument(
       '--isolated-script-test-output',
       type=os.path.realpath,
       help='File to which results will be written in the simplified JSON '
       'output format.')

   args = argparser.parse_args()

   isolated_script_output = {'valid': False, 'failures': []}
   if args.isolated_script_test_output:
     test_name = 'lacros_resource_sizes'
     args.output_dir = os.path.join(
         os.path.dirname(args.isolated_script_test_output), test_name)
     if not os.path.exists(args.output_dir):
       os.makedirs(args.output_dir)

   try:
     _run_resource_sizes(args)
     isolated_script_output = {'valid': True, 'failures': []}
   finally:
     if args.isolated_script_test_output:
       results_path = os.path.join(args.output_dir, 'test_results.json')
       with open(results_path, 'w') as output_file:
         json.dump(isolated_script_output, output_file)
       with open(args.isolated_script_test_output, 'w') as output_file:
         json.dump(isolated_script_output, output_file)


 if __name__ == '__main__':
   main()
	#!/usr/bin/env python
	# Copyright 2020 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Reports binary size metrics for LaCrOS build artifacts.

	More information at //docs/speed/binary_size/metrics.md.
	"""

	import argparse
	import collections
	import contextlib
	import json
	import logging
	import os
	import subprocess
	import sys
	import tempfile


	@contextlib.contextmanager
	def _SysPath(path):
	"""Library import context that temporarily appends \|path\| to \|sys.path\|."""
	if path and path not in sys.path:
	sys.path.insert(0, path)
	else:
	path = None # Indicates that \|sys.path\| is not modified.
	try:
	yield
	finally:
	if path:
	sys.path.pop(0)


	DIR_SOURCE_ROOT = os.environ.get(
	'CHECKOUT_SOURCE_ROOT',
	os.path.abspath(
	os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)))

	BUILD_COMMON_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util', 'lib',
	'common')

	TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult',
	'tracing')

	EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party',
	'eu-strip', 'bin', 'eu-strip')

	with _SysPath(BUILD_COMMON_PATH):
	import perf_tests_results_helper # pylint: disable=import-error

	with _SysPath(TRACING_PATH):
	from tracing.value import convert_chart_json # pylint: disable=import-error

	_BASE_CHART = {
	'format_version': '0.1',
	'benchmark_name': 'resource_sizes',
	'benchmark_description': 'LaCrOS resource size information.',
	'trace_rerun_options': [],
	'charts': {}
	}

	_KEY_RAW = 'raw'
	_KEY_GZIPPED = 'gzipped'
	_KEY_STRIPPED = 'stripped'
	_KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped'


	class _Group:
	"""A group of build artifacts whose file sizes are summed and tracked.

	Build artifacts for size tracking fall under these categories:
	* File: A single file.
	* Group: A collection of files.
	* Dir: All files under a directory.

	Attributes:
	paths: A list of files or directories to be tracked together.
	title: The display name of the group.
	track_stripped: Whether to also track summed stripped ELF sizes.
	track_compressed: Whether to also track summed compressed sizes.
	"""

	def __init__(self, paths, title, track_stripped=False,
	track_compressed=False):
	self.paths = paths
	self.title = title
	self.track_stripped = track_stripped
	self.track_compressed = track_compressed


	# List of disjoint build artifact groups for size tracking. This list should be
	# synched with lacros-amd64-generic-binary-size-rel builder contents (specified
	# in # //infra/config/subprojects/chromium/ci.star) and
	# chromeos-amd64-generic-lacros-internal builder (specified in src-internal).
	_TRACKED_GROUPS = [
	_Group(paths=['chrome'],
	title='File: chrome',
	track_stripped=True,
	track_compressed=True),
	_Group(paths=['crashpad_handler'], title='File: crashpad_handler'),
	_Group(paths=['icudtl.dat'], title='File: icudtl.dat'),
	_Group(paths=['nacl_helper'], title='File: nacl_helper'),
	_Group(paths=['nacl_irt_x86_64.nexe'], title='File: nacl_irt_x86_64.nexe'),
	_Group(paths=['resources.pak'], title='File: resources.pak'),
	_Group(paths=[
	'chrome_100_percent.pak', 'chrome_200_percent.pak', 'headless_lib.pak'
	],
	title='Group: Other PAKs'),
	_Group(paths=['snapshot_blob.bin'], title='Group: Misc'),
	_Group(paths=['locales/'], title='Dir: locales'),
	_Group(paths=['swiftshader/'], title='Dir: swiftshader'),
	_Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'),
	]


	def _visit_paths(base_dir, paths):
	"""Itemizes files specified by a list of paths.

	Args:
	base_dir: Base directory for all elements in \|paths\|.
	paths: A list of filenames or directory names to specify files whose sizes
	to be counted. Directories are recursed. There's no de-duping effort.
	Non-existing files or directories are ignored (with warning message).
	"""
	for path in paths:
	full_path = os.path.join(base_dir, path)
	if os.path.exists(full_path):
	if os.path.isdir(full_path):
	for dirpath, _, filenames in os.walk(full_path):
	for filename in filenames:
	yield os.path.join(dirpath, filename)
	else: # Assume is file.
	yield full_path
	else:
	logging.critical('Not found: %s', path)


	def _is_probably_elf(filename):
	"""Heuristically decides whether \|filename\| is ELF via magic signature."""
	with open(filename, 'rb') as fh:
	return fh.read(4) == '\x7FELF'


	def _is_unstrippable_elf(filename):
	"""Identifies known-unstrippable ELF files to denoise the system."""
	return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so')


	def _get_filesize(filename):
	"""Returns the size of a file, or 0 if file is not found."""
	try:
	return os.path.getsize(filename)
	except OSError:
	logging.critical('Failed to get size: %s', filename)
	return 0


	def _get_gzipped_filesize(filename):
	"""Returns the gzipped size of a file, or 0 if file is not found."""
	BUFFER_SIZE = 65536
	if not os.path.isfile(filename):
	return 0
	try:
	# Call gzip externally instead of using gzip package since it's > 2x faster.
	cmd = ['gzip', '-c', filename]
	p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
	# Manually counting bytes instead of using len(p.communicate()[0]) to avoid
	# buffering the entire compressed data (can be ~100 MB).
	ret = 0
	while True:
	chunk = len(p.stdout.read(BUFFER_SIZE))
	if chunk == 0:
	break
	ret += chunk
	return ret
	except OSError:
	logging.critical('Failed to get gzipped size: %s', filename)
	return 0


	def _get_catagorized_filesizes(filename):
	"""Measures \|filename\| sizes under various transforms.

	Returns: A Counter (keyed by _Key_* constants) that stores measured sizes.
	"""
	sizes = collections.Counter()
	sizes[_KEY_RAW] = _get_filesize(filename)
	sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename)

	# Pre-assign values for non-ELF, or in case of failure for ELF.
	sizes[_KEY_STRIPPED] = sizes[_KEY_RAW]
	sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED]

	if _is_probably_elf(filename) and not _is_unstrippable_elf(filename):
	try:
	fd, temp_file = tempfile.mkstemp()
	os.close(fd)
	cmd = [EU_STRIP_PATH, filename, '-o', temp_file]
	subprocess.check_output(cmd)
	sizes[_KEY_STRIPPED] = _get_filesize(temp_file)
	sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file)
	if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]:
	# This weird case has been observed for libwidevinecdm.so.
	logging.critical('Stripping made things worse for %s' % filename)
	except subprocess.CalledProcessError:
	logging.critical('Failed to strip file: %s' % filename)
	finally:
	os.unlink(temp_file)
	return sizes


	def _dump_chart_json(output_dir, chartjson):
	"""Writes chart histogram to JSON files.

	Output files:
	results-chart.json contains the chart JSON.
	perf_results.json contains histogram JSON for Catapult.

	Args:
	output_dir: Directory to place the JSON files.
	chartjson: Source JSON data for output files.
	"""
	results_path = os.path.join(output_dir, 'results-chart.json')
	logging.critical('Dumping chartjson to %s', results_path)
	with open(results_path, 'w') as json_file:
	json.dump(chartjson, json_file, indent=2)

	# We would ideally generate a histogram set directly instead of generating
	# chartjson then converting. However, perf_tests_results_helper is in
	# //build, which doesn't seem to have any precedent for depending on
	# anything in Catapult. This can probably be fixed, but since this doesn't
	# need to be super fast or anything, converting is a good enough solution
	# for the time being.
	histogram_result = convert_chart_json.ConvertChartJson(results_path)
	if histogram_result.returncode != 0:
	raise Exception('chartjson conversion failed with error: ' +
	histogram_result.stdout)

	histogram_path = os.path.join(output_dir, 'perf_results.json')
	logging.critical('Dumping histograms to %s', histogram_path)
	with open(histogram_path, 'w') as json_file:
	json_file.write(histogram_result.stdout)


	def _run_resource_sizes(args):
	"""Main flow to extract and output size data."""
	chartjson = _BASE_CHART.copy()
	report_func = perf_tests_results_helper.ReportPerfResult
	total_sizes = collections.Counter()

	def report_sizes(sizes, title, track_stripped, track_compressed):
	report_func(chart_data=chartjson,
	graph_title=title,
	trace_title='size',
	value=sizes[_KEY_RAW],
	units='bytes')

	if track_stripped:
	report_func(chart_data=chartjson,
	graph_title=title + ' (Stripped)',
	trace_title='size',
	value=sizes[_KEY_STRIPPED],
	units='bytes')

	if track_compressed:
	report_func(chart_data=chartjson,
	graph_title=title + ' (Gzipped)',
	trace_title='size',
	value=sizes[_KEY_GZIPPED],
	units='bytes')

	if track_stripped and track_compressed:
	report_func(chart_data=chartjson,
	graph_title=title + ' (Stripped, Gzipped)',
	trace_title='size',
	value=sizes[_KEY_STRIPPED_GZIPPED],
	units='bytes')

	for g in _TRACKED_GROUPS:
	sizes = sum(
	map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)),
	collections.Counter())
	report_sizes(sizes, g.title, g.track_stripped, g.track_compressed)

	# Total compressed size is summed over individual compressed sizes, instead
	# of concatanating first, then compress everything. This is done for
	# simplicity. It also gives a conservative size estimate (assuming file
	# metadata and overheads are negligible).
	total_sizes += sizes

	report_sizes(total_sizes, 'Total', True, True)

	_dump_chart_json(args.output_dir, chartjson)


	def main():
	"""Parses arguments and runs high level flows."""
	argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.')

	argparser.add_argument('--chromium-output-directory',
	dest='out_dir',
	required=True,
	type=os.path.realpath,
	help='Location of the build artifacts.')

	output_group = argparser.add_mutually_exclusive_group()

	output_group.add_argument('--output-dir',
	default='.',
	help='Directory to save chartjson to.')

	# Accepted to conform to the isolated script interface, but ignored.
	argparser.add_argument('--isolated-script-test-filter',
	help=argparse.SUPPRESS)
	argparser.add_argument('--isolated-script-test-perf-output',
	type=os.path.realpath,
	help=argparse.SUPPRESS)

	output_group.add_argument(
	'--isolated-script-test-output',
	type=os.path.realpath,
	help='File to which results will be written in the simplified JSON '
	'output format.')

	args = argparser.parse_args()

	isolated_script_output = {'valid': False, 'failures': []}
	if args.isolated_script_test_output:
	test_name = 'lacros_resource_sizes'
	args.output_dir = os.path.join(
	os.path.dirname(args.isolated_script_test_output), test_name)
	if not os.path.exists(args.output_dir):
	os.makedirs(args.output_dir)

	try:
	_run_resource_sizes(args)
	isolated_script_output = {'valid': True, 'failures': []}
	finally:
	if args.isolated_script_test_output:
	results_path = os.path.join(args.output_dir, 'test_results.json')
	with open(results_path, 'w') as output_file:
	json.dump(isolated_script_output, output_file)
	with open(args.isolated_script_test_output, 'w') as output_file:
	json.dump(isolated_script_output, output_file)


	if __name__ == '__main__':
	main()