src/v8/tools/sanitizers/sancov_merger.py - cobalt - Git at Google

 #!/usr/bin/env python
 # Copyright 2016 the V8 project authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Script for merging sancov files in parallel.

 When merging test runner output, the sancov files are expected
 to be located in one directory with the file-name pattern:
 <executable name>.test.<id>.<attempt>.sancov

 For each executable, this script writes a new file:
 <executable name>.result.sancov

 When --swarming-output-dir is specified, this script will merge the result
 files found there into the coverage folder.

 The sancov tool is expected to be in the llvm compiler-rt third-party
 directory. It's not checked out by default and must be added as a custom deps:
 'v8/third_party/llvm/projects/compiler-rt':
     'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
 """

 import argparse
 import logging
 import math
 import os
 import re
 import subprocess
 import sys

 from multiprocessing import Pool, cpu_count


 logging.basicConfig(level=logging.INFO)

 # V8 checkout directory.
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
     os.path.abspath(__file__))))

 # The sancov tool location.
 SANCOV_TOOL = os.path.join(
     BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
     'lib', 'sanitizer_common', 'scripts', 'sancov.py')

 # Number of cpus.
 CPUS = cpu_count()

 # Regexp to find sancov file as output by the v8 test runner. Also grabs the
 # executable name in group 1.
 SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.\d+\.sancov$')

 # Regexp to find sancov result files as returned from swarming.
 SANCOV_RESULTS_FILE_RE = re.compile(r'^.*\.result\.sancov$')


 def merge(args):
   """Merge several sancov files into one.

   Called trough multiprocessing pool. The args are expected to unpack to:
     keep: Option if source and intermediate sancov files should be kept.
     coverage_dir: Folder where to find the sancov files.
     executable: Name of the executable whose sancov files should be merged.
     index: A number to be put into the intermediate result file name.
            If None, this is a final result.
     bucket: The list of sancov files to be merged.
   Returns: A tuple with the executable name and the result file name.
   """
   keep, coverage_dir, executable, index, bucket = args
   process = subprocess.Popen(
       [SANCOV_TOOL, 'merge'] + bucket,
       stdout=subprocess.PIPE,
       stderr=subprocess.PIPE,
       cwd=coverage_dir,
   )
   output, _ = process.communicate()
   assert process.returncode == 0
   if index is not None:
     # This is an intermediate result, add the bucket index to the file name.
     result_file_name = '%s.result.%d.sancov' % (executable, index)
   else:
     # This is the final result without bucket index.
     result_file_name = '%s.result.sancov' % executable
   with open(os.path.join(coverage_dir, result_file_name), "wb") as f:
     f.write(output)
   if not keep:
     for f in bucket:
       os.remove(os.path.join(coverage_dir, f))
   return executable, result_file_name


 def generate_inputs(keep, coverage_dir, file_map, cpus):
   """Generate inputs for multiprocessed merging.

   Splits the sancov files into several buckets, so that each bucket can be
   merged in a separate process. We have only few executables in total with
   mostly lots of associated files. In the general case, with many executables
   we might need to avoid splitting buckets of executables with few files.

   Returns: List of args as expected by merge above.
   """
   inputs = []
   for executable, files in file_map.iteritems():
     # What's the bucket size for distributing files for merging? E.g. with
     # 2 cpus and 9 files we want bucket size 5.
     n = max(2, int(math.ceil(len(files) / float(cpus))))

     # Chop files into buckets.
     buckets = [files[i:i+n] for i in xrange(0, len(files), n)]

     # Inputs for multiprocessing. List of tuples containing:
     # Keep-files option, base path, executable name, index of bucket,
     # list of files.
     inputs.extend([(keep, coverage_dir, executable, i, b)
                    for i, b in enumerate(buckets)])
   return inputs


 def merge_parallel(inputs, merge_fun=merge):
   """Process several merge jobs in parallel."""
   pool = Pool(CPUS)
   try:
     return pool.map(merge_fun, inputs)
   finally:
     pool.close()


 def merge_test_runner_output(options):
   # Map executable names to their respective sancov files.
   file_map = {}
   for f in os.listdir(options.coverage_dir):
     match = SANCOV_FILE_RE.match(f)
     if match:
       file_map.setdefault(match.group(1), []).append(f)

   inputs = generate_inputs(
       options.keep, options.coverage_dir, file_map, CPUS)

   logging.info('Executing %d merge jobs in parallel for %d executables.' %
                (len(inputs), len(file_map)))

   results = merge_parallel(inputs)

   # Map executable names to intermediate bucket result files.
   file_map = {}
   for executable, f in results:
     file_map.setdefault(executable, []).append(f)

   # Merge the bucket results for each executable.
   # The final result has index None, so no index will appear in the
   # file name.
   inputs = [(options.keep, options.coverage_dir, executable, None, files)
              for executable, files in file_map.iteritems()]

   logging.info('Merging %d intermediate results.' % len(inputs))

   merge_parallel(inputs)


 def merge_two(args):
   """Merge two sancov files.

   Called trough multiprocessing pool. The args are expected to unpack to:
     swarming_output_dir: Folder where to find the new file.
     coverage_dir: Folder where to find the existing file.
     f: File name of the file to be merged.
   """
   swarming_output_dir, coverage_dir, f = args
   input_file = os.path.join(swarming_output_dir, f)
   output_file = os.path.join(coverage_dir, f)
   process = subprocess.Popen(
       [SANCOV_TOOL, 'merge', input_file, output_file],
       stdout=subprocess.PIPE,
       stderr=subprocess.PIPE,
   )
   output, _ = process.communicate()
   assert process.returncode == 0
   with open(output_file, "wb") as f:
     f.write(output)


 def merge_swarming_output(options):
   # Iterate sancov files from swarming.
   files = []
   for f in os.listdir(options.swarming_output_dir):
     match = SANCOV_RESULTS_FILE_RE.match(f)
     if match:
       if os.path.exists(os.path.join(options.coverage_dir, f)):
         # If the same file already exists, we'll merge the data.
         files.append(f)
       else:
         # No file yet? Just move it.
         os.rename(os.path.join(options.swarming_output_dir, f),
                   os.path.join(options.coverage_dir, f))

   inputs = [(options.swarming_output_dir, options.coverage_dir, f)
             for f in files]

   logging.info('Executing %d merge jobs in parallel.' % len(inputs))
   merge_parallel(inputs, merge_two)


 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument('--coverage-dir', required=True,
                       help='Path to the sancov output files.')
   parser.add_argument('--keep', default=False, action='store_true',
                       help='Keep sancov output files after merging.')
   parser.add_argument('--swarming-output-dir',
                       help='Folder containing a results shard from swarming.')
   options = parser.parse_args()

   # Check if folder with coverage output exists.
   assert (os.path.exists(options.coverage_dir) and
           os.path.isdir(options.coverage_dir))

   if options.swarming_output_dir:
     # Check if folder with swarming output exists.
     assert (os.path.exists(options.swarming_output_dir) and
             os.path.isdir(options.swarming_output_dir))
     merge_swarming_output(options)
   else:
     merge_test_runner_output(options)

   return 0


 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright 2016 the V8 project authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Script for merging sancov files in parallel.

	When merging test runner output, the sancov files are expected
	to be located in one directory with the file-name pattern:
	<executable name>.test.<id>.<attempt>.sancov

	For each executable, this script writes a new file:
	<executable name>.result.sancov

	When --swarming-output-dir is specified, this script will merge the result
	files found there into the coverage folder.

	The sancov tool is expected to be in the llvm compiler-rt third-party
	directory. It's not checked out by default and must be added as a custom deps:
	'v8/third_party/llvm/projects/compiler-rt':
	'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
	"""

	import argparse
	import logging
	import math
	import os
	import re
	import subprocess
	import sys

	from multiprocessing import Pool, cpu_count


	logging.basicConfig(level=logging.INFO)

	# V8 checkout directory.
	BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
	os.path.abspath(__file__))))

	# The sancov tool location.
	SANCOV_TOOL = os.path.join(
	BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
	'lib', 'sanitizer_common', 'scripts', 'sancov.py')

	# Number of cpus.
	CPUS = cpu_count()

	# Regexp to find sancov file as output by the v8 test runner. Also grabs the
	# executable name in group 1.
	SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.\d+\.sancov$')

	# Regexp to find sancov result files as returned from swarming.
	SANCOV_RESULTS_FILE_RE = re.compile(r'^.*\.result\.sancov$')


	def merge(args):
	"""Merge several sancov files into one.

	Called trough multiprocessing pool. The args are expected to unpack to:
	keep: Option if source and intermediate sancov files should be kept.
	coverage_dir: Folder where to find the sancov files.
	executable: Name of the executable whose sancov files should be merged.
	index: A number to be put into the intermediate result file name.
	If None, this is a final result.
	bucket: The list of sancov files to be merged.
	Returns: A tuple with the executable name and the result file name.
	"""
	keep, coverage_dir, executable, index, bucket = args
	process = subprocess.Popen(
	[SANCOV_TOOL, 'merge'] + bucket,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	cwd=coverage_dir,
	)
	output, _ = process.communicate()
	assert process.returncode == 0
	if index is not None:
	# This is an intermediate result, add the bucket index to the file name.
	result_file_name = '%s.result.%d.sancov' % (executable, index)
	else:
	# This is the final result without bucket index.
	result_file_name = '%s.result.sancov' % executable
	with open(os.path.join(coverage_dir, result_file_name), "wb") as f:
	f.write(output)
	if not keep:
	for f in bucket:
	os.remove(os.path.join(coverage_dir, f))
	return executable, result_file_name


	def generate_inputs(keep, coverage_dir, file_map, cpus):
	"""Generate inputs for multiprocessed merging.

	Splits the sancov files into several buckets, so that each bucket can be
	merged in a separate process. We have only few executables in total with
	mostly lots of associated files. In the general case, with many executables
	we might need to avoid splitting buckets of executables with few files.

	Returns: List of args as expected by merge above.
	"""
	inputs = []
	for executable, files in file_map.iteritems():
	# What's the bucket size for distributing files for merging? E.g. with
	# 2 cpus and 9 files we want bucket size 5.
	n = max(2, int(math.ceil(len(files) / float(cpus))))

	# Chop files into buckets.
	buckets = [files[i:i+n] for i in xrange(0, len(files), n)]

	# Inputs for multiprocessing. List of tuples containing:
	# Keep-files option, base path, executable name, index of bucket,
	# list of files.
	inputs.extend([(keep, coverage_dir, executable, i, b)
	for i, b in enumerate(buckets)])
	return inputs


	def merge_parallel(inputs, merge_fun=merge):
	"""Process several merge jobs in parallel."""
	pool = Pool(CPUS)
	try:
	return pool.map(merge_fun, inputs)
	finally:
	pool.close()


	def merge_test_runner_output(options):
	# Map executable names to their respective sancov files.
	file_map = {}
	for f in os.listdir(options.coverage_dir):
	match = SANCOV_FILE_RE.match(f)
	if match:
	file_map.setdefault(match.group(1), []).append(f)

	inputs = generate_inputs(
	options.keep, options.coverage_dir, file_map, CPUS)

	logging.info('Executing %d merge jobs in parallel for %d executables.' %
	(len(inputs), len(file_map)))

	results = merge_parallel(inputs)

	# Map executable names to intermediate bucket result files.
	file_map = {}
	for executable, f in results:
	file_map.setdefault(executable, []).append(f)

	# Merge the bucket results for each executable.
	# The final result has index None, so no index will appear in the
	# file name.
	inputs = [(options.keep, options.coverage_dir, executable, None, files)
	for executable, files in file_map.iteritems()]

	logging.info('Merging %d intermediate results.' % len(inputs))

	merge_parallel(inputs)


	def merge_two(args):
	"""Merge two sancov files.

	Called trough multiprocessing pool. The args are expected to unpack to:
	swarming_output_dir: Folder where to find the new file.
	coverage_dir: Folder where to find the existing file.
	f: File name of the file to be merged.
	"""
	swarming_output_dir, coverage_dir, f = args
	input_file = os.path.join(swarming_output_dir, f)
	output_file = os.path.join(coverage_dir, f)
	process = subprocess.Popen(
	[SANCOV_TOOL, 'merge', input_file, output_file],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	)
	output, _ = process.communicate()
	assert process.returncode == 0
	with open(output_file, "wb") as f:
	f.write(output)


	def merge_swarming_output(options):
	# Iterate sancov files from swarming.
	files = []
	for f in os.listdir(options.swarming_output_dir):
	match = SANCOV_RESULTS_FILE_RE.match(f)
	if match:
	if os.path.exists(os.path.join(options.coverage_dir, f)):
	# If the same file already exists, we'll merge the data.
	files.append(f)
	else:
	# No file yet? Just move it.
	os.rename(os.path.join(options.swarming_output_dir, f),
	os.path.join(options.coverage_dir, f))

	inputs = [(options.swarming_output_dir, options.coverage_dir, f)
	for f in files]

	logging.info('Executing %d merge jobs in parallel.' % len(inputs))
	merge_parallel(inputs, merge_two)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--coverage-dir', required=True,
	help='Path to the sancov output files.')
	parser.add_argument('--keep', default=False, action='store_true',
	help='Keep sancov output files after merging.')
	parser.add_argument('--swarming-output-dir',
	help='Folder containing a results shard from swarming.')
	options = parser.parse_args()

	# Check if folder with coverage output exists.
	assert (os.path.exists(options.coverage_dir) and
	os.path.isdir(options.coverage_dir))

	if options.swarming_output_dir:
	# Check if folder with swarming output exists.
	assert (os.path.exists(options.swarming_output_dir) and
	os.path.isdir(options.swarming_output_dir))
	merge_swarming_output(options)
	else:
	merge_test_runner_output(options)

	return 0


	if __name__ == '__main__':
	sys.exit(main())