| #!/usr/bin/python2 |
| # |
| # Copyright 2019 Google Inc. |
| # |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| # |
| # Helper script that takes as input 2 CSVs downloaded from perf.skia.org and |
| # outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2), |
| # perc_diff between avg_value1 and avg_value2. |
| # This script also discards NUM_OUTLIERS_TO_REMOVE min values and |
| # NUM_OUTLIERS_TO_REMOVE max values. |
| |
| |
| import csv |
| import optparse |
| import sys |
| import re |
| |
| |
| MISSING_STR = 'N/A' |
| NUM_OUTLIERS_TO_REMOVE = 2 |
| |
| |
| def read_from_csv(csv_file): |
| test_to_avg = {} |
| with open(csv_file, 'rb') as f: |
| csv_reader = csv.reader(f, delimiter=',') |
| # First row should contain headers. Validate that it does. |
| header_row = csv_reader.next() |
| if header_row[0] != 'id': |
| raise Exception('%s in unexpected format' % csv_file) |
| p = re.compile('^.*,test=(.*),$') |
| for v in csv_reader: |
| # Extract the test name. |
| result = p.search(v[0]) |
| test_name = result.group(1) |
| |
| vals = [float(i) for i in v[1:]] |
| vals.sort() |
| # Discard outliers. |
| vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE] |
| # Find the avg val. |
| avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals)) |
| test_to_avg[test_name] = avg_val |
| return test_to_avg |
| |
| |
| def combine_results(d1, d2): |
| test_to_result = {} |
| for test1, v1 in d1.items(): |
| v2 = d2.get(test1, MISSING_STR) |
| perc_diff = MISSING_STR |
| if v2 != MISSING_STR: |
| diff = v2 - v1 |
| avg = (v2 + v1)/2 |
| perc_diff = 0 if avg == 0 else diff/avg * 100 |
| result = { |
| 'test_name': test1, |
| 'csv1': v1, |
| 'csv2': v2, |
| 'perc_diff': perc_diff, |
| } |
| test_to_result[test1] = result |
| |
| # Also add keys in d2 and not d1. |
| for test2, v2 in d2.items(): |
| if test2 in test_to_result: |
| continue |
| test_to_result[test2] = { |
| 'test_name': test2, |
| 'csv1': MISSING_STR, |
| 'csv2': v2, |
| 'perc_diff': MISSING_STR, |
| } |
| |
| return test_to_result |
| |
| |
| def write_to_csv(output_dict, output_csv): |
| with open(output_csv, 'w') as f: |
| fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff'] |
| writer = csv.DictWriter(f, fieldnames=fieldnames) |
| writer.writeheader() |
| tests = output_dict.keys() |
| tests.sort() |
| for test in tests: |
| writer.writerow(output_dict[test]) |
| |
| |
| def parse_and_output(csv1, csv2, output_csv): |
| test_to_avg1 = read_from_csv(csv1) |
| test_to_avg2 = read_from_csv(csv2) |
| output_dict = combine_results(test_to_avg1, test_to_avg2) |
| write_to_csv(output_dict, output_csv) |
| |
| |
| def main(): |
| option_parser = optparse.OptionParser() |
| option_parser.add_option( |
| '', '--csv1', type=str, |
| help='The first CSV to parse.') |
| option_parser.add_option( |
| '', '--csv2', type=str, |
| help='The second CSV to parse.') |
| option_parser.add_option( |
| '', '--output_csv', type=str, |
| help='The file to write the output CSV to.') |
| options, _ = option_parser.parse_args() |
| sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv)) |
| |
| |
| if __name__ == '__main__': |
| main() |