| #!/usr/bin/env python |
| # Copyright 2017 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import os |
| import re |
| import sys |
| |
| kUsage = '''Usage: truncate_net_log.py INPUT_FILE OUTPUT_FILE TRUNCATED_SIZE |
| |
| Creates a smaller version of INPUT_FILE (which is a chrome-net-export-log.json |
| formatted NetLog file) and saves it to OUTPUT_FILE. Note that this works by |
| reading the file line by line and not fully parsing the JSON, so it must match |
| the exact format (whitespace and all). |
| |
| File truncation is done by dropping the oldest events and keeping everything |
| else. |
| |
| Parameters: |
| |
| INPUT_FILE: |
| Path to net-export JSON file |
| |
| OUTPUT_FILE: |
| Path to save truncated file to |
| |
| TRUNCATED_SIZE: |
| The desired (approximate) size for the truncated file. May use a suffix to |
| indicate units. Examples: |
| 2003 --> 2003 bytes |
| 100K --> 100 KiB |
| 8M --> 8 MiB |
| 1.5m --> 1.5 MiB |
| ''' |
| |
| def get_file_size(path): |
| '''Returns the filesize of |path| in bytes''' |
| return os.stat(path).st_size |
| |
| |
| def truncate_log_file(in_path, out_path, desired_size): |
| '''Copies |in_path| to |out_path| such that it is approximately |
| |desired_size| bytes large. This is accomplished by dropping the oldest |
| events first. The final file size may not be exactly |desired_size| as only |
| complete event lines are skipped.''' |
| orig_size = get_file_size(in_path) |
| bytes_to_truncate = orig_size - desired_size |
| |
| # This variable is True if the current line being processed is an Event line. |
| inside_events = False |
| with open(out_path, 'w') as out_file: |
| with open(in_path, 'r') as in_file: |
| for line in in_file: |
| # The final line before polledData closes the events array, and hence |
| # ends in "],". The check for polledData is more for documentation |
| # sake. |
| if inside_events and (line.startswith('"polledData": {' or |
| line.endswith('],\n'))): |
| inside_events = False |
| |
| # If this is an event line and need to drop more bytes, go ahead and |
| # skip the line. Otherwise copy it to the output file. |
| if inside_events and bytes_to_truncate > 0: |
| bytes_to_truncate -= len(line) |
| else: |
| out_file.write(line) |
| |
| # All lines after this are events (up until the closing square |
| # bracket). |
| if line.startswith('"events": ['): |
| inside_events = True |
| |
| sys.stdout.write( |
| 'Truncated file from %d to %d bytes\n' % (orig_size, |
| get_file_size(out_path))) |
| |
| def parse_filesize_str(filesize_str): |
| '''Parses a string representation of a file size into a byte value, or None |
| on failure''' |
| filesize_str = filesize_str.lower() |
| m = re.match('([0-9\.]+)([km]?)', filesize_str) |
| |
| if not m: |
| return None |
| |
| # Try to parse as decimal (regex above accepts some invalid decimals too). |
| float_value = 0.0 |
| try: |
| float_value = float(m.group(1)) |
| except ValueError: |
| return None |
| |
| kSuffixValueBytes = { |
| 'k': 1024, |
| 'm': 1024 * 1024, |
| '': 1, |
| } |
| |
| suffix = m.group(2) |
| return int(float_value * kSuffixValueBytes[suffix]) |
| |
| |
| def main(): |
| if len(sys.argv) != 4: |
| sys.stderr.write('ERROR: Requires 3 command line arguments\n') |
| sys.stderr.write(kUsage) |
| sys.exit(1) |
| |
| in_path = os.path.normpath(sys.argv[1]) |
| out_path = os.path.normpath(sys.argv[2]) |
| |
| if in_path == out_path: |
| sys.stderr.write('ERROR: OUTPUT_FILE must be different from INPUT_FILE\n') |
| sys.stderr.write(kUsage) |
| sys.exit(1) |
| |
| size_str = sys.argv[3] |
| size_bytes = parse_filesize_str(size_str) |
| if size_bytes is None: |
| sys.stderr.write('ERROR: Could not parse TRUNCATED_SIZE: %s\n' % size_str) |
| sys.stderr.write(kUsage) |
| sys.exit(1) |
| |
| truncate_log_file(in_path, out_path, size_bytes) |
| |
| |
| if __name__ == '__main__': |
| main() |