| #!/usr/bin/env python | 
 | # Copyright (c) 2015 The Chromium Authors. All rights reserved. | 
 | # Use of this source code is governed by a BSD-style license that can be | 
 | # found in the LICENSE file. | 
 |  | 
 |  | 
 | """Parse an LLVM coverage report to generate useable results.""" | 
 |  | 
 |  | 
 | import argparse | 
 | import json | 
 | import os | 
 | import re | 
 | import subprocess | 
 | import sys | 
 |  | 
 |  | 
 | def _fix_filename(filename): | 
 |   """Return a filename which we can use to identify the file. | 
 |  | 
 |   The file paths printed by llvm-cov take the form: | 
 |  | 
 |       /path/to/repo/out/dir/../../src/filename.cpp | 
 |  | 
 |   And then they're truncated to 22 characters with leading ellipses: | 
 |  | 
 |       ...../../src/filename.cpp | 
 |  | 
 |   This makes it really tough to determine whether the file actually belongs in | 
 |   the Skia repo.  This function strips out the leading junk so that, if the file | 
 |   exists in the repo, the returned string matches the end of some relative path | 
 |   in the repo. This doesn't guarantee correctness, but it's about as close as | 
 |   we can get. | 
 |   """ | 
 |   return filename.split('..')[-1].lstrip('./') | 
 |  | 
 |  | 
 | def _file_in_repo(filename, all_files): | 
 |   """Return the name of the checked-in file matching the given filename. | 
 |  | 
 |   Use suffix matching to determine which checked-in files the given filename | 
 |   matches. If there are no matches or multiple matches, return None. | 
 |   """ | 
 |   new_file = _fix_filename(filename) | 
 |   matched = [] | 
 |   for f in all_files: | 
 |     if f.endswith(new_file): | 
 |       matched.append(f) | 
 |   if len(matched) == 1: | 
 |     return matched[0] | 
 |   elif len(matched) > 1: | 
 |     print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' | 
 |                           % (new_file, '\n\t'.join(matched))) | 
 |   return None | 
 |  | 
 |  | 
 | def _get_per_file_per_line_coverage(report): | 
 |   """Return a dict whose keys are file names and values are coverage data. | 
 |  | 
 |   Values are lists which take the form (lineno, coverage, code). | 
 |   """ | 
 |   all_files = [] | 
 |   for root, dirs, files in os.walk(os.getcwd()): | 
 |     if 'third_party/externals' in root: | 
 |       continue | 
 |     files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))] | 
 |     dirs[:] = [d for d in dirs if not d[0] == '.'] | 
 |     for name in files: | 
 |       all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name)) | 
 |   all_files.sort() | 
 |  | 
 |   lines = report.splitlines() | 
 |   current_file = None | 
 |   file_lines = [] | 
 |   files = {} | 
 |   not_checked_in = '%' # Use this as the file name for not-checked-in files. | 
 |   for line in lines: | 
 |     m = re.match('([a-zA-Z0-9\./_-]+):', line) | 
 |     if m: | 
 |       if current_file and current_file != not_checked_in: | 
 |         files[current_file] = file_lines | 
 |       match_filename = _file_in_repo(m.groups()[0], all_files) | 
 |       current_file = match_filename or not_checked_in | 
 |       file_lines = [] | 
 |     else: | 
 |       if current_file != not_checked_in: | 
 |         skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) | 
 |         if line and not skip: | 
 |           cov, linenum, code = line.split('|', 2) | 
 |           cov = cov.strip() | 
 |           if cov: | 
 |             cov = int(cov) | 
 |           else: | 
 |             cov = None # We don't care about coverage for this line. | 
 |           linenum = int(linenum.strip()) | 
 |           assert linenum == len(file_lines) + 1 | 
 |           file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) | 
 |   return files | 
 |  | 
 |  | 
 |  | 
 | def _testname(filename): | 
 |   """Transform the file name into an ingestible test name.""" | 
 |   return re.sub(r'[^a-zA-Z0-9]', '_', filename) | 
 |  | 
 |  | 
 | def _nanobench_json(results, properties, key): | 
 |   """Return the results in JSON format like that produced by nanobench.""" | 
 |   rv = {} | 
 |   # Copy over the properties first, then set the 'key' and 'results' keys, | 
 |   # in order to avoid bad formatting in case the user passes in a properties | 
 |   # dict containing those keys. | 
 |   rv.update(properties) | 
 |   rv['key'] = key | 
 |   rv['results'] = { | 
 |     _testname(f): { | 
 |       'coverage': { | 
 |         'percent': percent, | 
 |         'lines_not_covered': not_covered_lines, | 
 |         'options': { | 
 |           'fullname': f, | 
 |           'dir': os.path.dirname(f), | 
 |           'source_type': 'coverage', | 
 |         }, | 
 |       }, | 
 |     } for percent, not_covered_lines, f in results | 
 |   } | 
 |   return rv | 
 |  | 
 |  | 
 | def _parse_key_value(kv_list): | 
 |   """Return a dict whose key/value pairs are derived from the given list. | 
 |  | 
 |   For example: | 
 |  | 
 |       ['k1', 'v1', 'k2', 'v2'] | 
 |   becomes: | 
 |  | 
 |       {'k1': 'v1', | 
 |        'k2': 'v2'} | 
 |   """ | 
 |   if len(kv_list) % 2 != 0: | 
 |     raise Exception('Invalid key/value pairs: %s' % kv_list) | 
 |  | 
 |   rv = {} | 
 |   for i in xrange(len(kv_list) / 2): | 
 |     rv[kv_list[i*2]] = kv_list[i*2+1] | 
 |   return rv | 
 |  | 
 |  | 
 | def _get_per_file_summaries(line_by_line): | 
 |   """Summarize the full line-by-line coverage report by file.""" | 
 |   per_file = [] | 
 |   for filepath, lines in line_by_line.iteritems(): | 
 |     total_lines = 0 | 
 |     covered_lines = 0 | 
 |     for _, cov, _ in lines: | 
 |       if cov is not None: | 
 |         total_lines += 1 | 
 |         if cov > 0: | 
 |           covered_lines += 1 | 
 |     if total_lines > 0: | 
 |       per_file.append((float(covered_lines)/float(total_lines)*100.0, | 
 |                        total_lines - covered_lines, | 
 |                        filepath)) | 
 |   return per_file | 
 |  | 
 |  | 
 | def main(): | 
 |   """Generate useful data from a coverage report.""" | 
 |   # Parse args. | 
 |   parser = argparse.ArgumentParser() | 
 |   parser.add_argument('--report', help='input file; an llvm coverage report.', | 
 |                       required=True) | 
 |   parser.add_argument('--nanobench', help='output file for nanobench data.') | 
 |   parser.add_argument( | 
 |       '--key', metavar='key_or_value', nargs='+', | 
 |       help='key/value pairs identifying this bot.') | 
 |   parser.add_argument( | 
 |       '--properties', metavar='key_or_value', nargs='+', | 
 |       help='key/value pairs representing properties of this build.') | 
 |   parser.add_argument('--linebyline', | 
 |                       help='output file for line-by-line JSON data.') | 
 |   args = parser.parse_args() | 
 |  | 
 |   if args.nanobench and not (args.key and args.properties): | 
 |     raise Exception('--key and --properties are required with --nanobench') | 
 |  | 
 |   with open(args.report) as f: | 
 |     report = f.read() | 
 |  | 
 |   line_by_line = _get_per_file_per_line_coverage(report) | 
 |  | 
 |   if args.linebyline: | 
 |     with open(args.linebyline, 'w') as f: | 
 |       json.dump(line_by_line, f) | 
 |  | 
 |   if args.nanobench: | 
 |     # Parse the key and properties for use in the nanobench JSON output. | 
 |     key = _parse_key_value(args.key) | 
 |     properties = _parse_key_value(args.properties) | 
 |  | 
 |     # Get per-file summaries. | 
 |     per_file = _get_per_file_summaries(line_by_line) | 
 |  | 
 |     # Write results. | 
 |     format_results = _nanobench_json(per_file, properties, key) | 
 |     with open(args.nanobench, 'w') as f: | 
 |       json.dump(format_results, f) | 
 |  | 
 |  | 
 | if __name__ == '__main__': | 
 |   main() |