| #!/usr/bin/env python |
| # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Wrapper script to help run clang tools across Chromium code. |
| |
| How to use run_tool.py: |
| If you want to run a clang tool across all Chromium code: |
| run_tool.py <tool> <path/to/compiledb> |
| |
| If you want to include all files mentioned in the compilation database |
| (this will also include generated files, unlike the previous command): |
| run_tool.py <tool> <path/to/compiledb> --all |
| |
| If you want to run the clang tool across only chrome/browser and |
| content/browser: |
| run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser |
| |
| Please see docs/clang_tool_refactoring.md for more information, which documents |
| the entire automated refactoring flow in Chromium. |
| |
| Why use run_tool.py (instead of running a clang tool directly): |
| The clang tool implementation doesn't take advantage of multiple cores, and if |
| it fails mysteriously in the middle, all the generated replacements will be |
| lost. Additionally, if the work is simply sharded across multiple cores by |
| running multiple RefactoringTools, problems arise when they attempt to rewrite a |
| file at the same time. |
| |
| run_tool.py will |
| 1) run multiple instances of clang tool in parallel |
| 2) gather stdout from clang tool invocations |
| 3) "atomically" forward #2 to stdout |
| |
| Output of run_tool.py can be piped into extract_edits.py and then into |
| apply_edits.py. These tools will extract individual edits and apply them to the |
| source files. These tools assume the clang tool emits the edits in the |
| following format: |
| ... |
| ==== BEGIN EDITS ==== |
| r:::<file path>:::<offset>:::<length>:::<replacement text> |
| r:::<file path>:::<offset>:::<length>:::<replacement text> |
| ...etc... |
| ==== END EDITS ==== |
| ... |
| |
| extract_edits.py extracts only lines between BEGIN/END EDITS markers |
| apply_edits.py reads edit lines from stdin and applies the edits |
| """ |
| |
| import argparse |
| from collections import namedtuple |
| import functools |
| import json |
| import multiprocessing |
| import os |
| import os.path |
| import re |
| import subprocess |
| import shlex |
| import sys |
| |
| script_dir = os.path.dirname(os.path.realpath(__file__)) |
| tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) |
| sys.path.insert(0, tool_dir) |
| |
| from clang import compile_db |
| |
| |
| CompDBEntry = namedtuple('CompDBEntry', ['directory', 'filename', 'command']) |
| |
| def _PruneGitFiles(git_files, paths): |
| """Prunes the list of files from git to include only those that are either in |
| |paths| or start with one item in |paths|. |
| |
| Args: |
| git_files: List of all repository files. |
| paths: Prefix filter for the returned paths. May contain multiple entries, |
| and the contents should be absolute paths. |
| |
| Returns: |
| Pruned list of files. |
| """ |
| if not git_files: |
| return [] |
| git_files.sort() |
| pruned_list = [] |
| git_index = 0 |
| for path in sorted(paths): |
| least = git_index |
| most = len(git_files) - 1 |
| while least <= most: |
| middle = (least + most ) / 2 |
| if git_files[middle] == path: |
| least = middle |
| break |
| elif git_files[middle] > path: |
| most = middle - 1 |
| else: |
| least = middle + 1 |
| while least < len(git_files) and git_files[least].startswith(path): |
| pruned_list.append(git_files[least]) |
| least += 1 |
| git_index = least |
| |
| return pruned_list |
| |
| |
| def _GetFilesFromGit(paths=None): |
| """Gets the list of files in the git repository if |paths| includes prefix |
| path filters or is empty. All complete filenames in |paths| are also included |
| in the output. |
| |
| Args: |
| paths: Prefix filter for the returned paths. May contain multiple entries. |
| """ |
| partial_paths = [] |
| files = [] |
| for p in paths: |
| real_path = os.path.realpath(p) |
| if os.path.isfile(real_path): |
| files.append(real_path) |
| else: |
| partial_paths.append(real_path) |
| if partial_paths or not files: |
| args = [] |
| if sys.platform == 'win32': |
| args.append('git.bat') |
| else: |
| args.append('git') |
| args.append('ls-files') |
| command = subprocess.Popen(args, stdout=subprocess.PIPE) |
| output, _ = command.communicate() |
| git_files = [os.path.realpath(p) for p in output.splitlines()] |
| if partial_paths: |
| git_files = _PruneGitFiles(git_files, partial_paths) |
| files.extend(git_files) |
| return files |
| |
| |
| def _GetEntriesFromCompileDB(build_directory, source_filenames): |
| """ Gets the list of files and args mentioned in the compilation database. |
| |
| Args: |
| build_directory: Directory that contains the compile database. |
| source_filenames: If not None, only include entries for the given list of |
| filenames. |
| """ |
| |
| filenames_set = None if source_filenames is None else set(source_filenames) |
| return [ |
| CompDBEntry(entry['directory'], entry['file'], entry['command']) |
| for entry in compile_db.Read(build_directory) |
| if filenames_set is None or os.path.realpath( |
| os.path.join(entry['directory'], entry['file'])) in filenames_set |
| ] |
| |
| |
| def _UpdateCompileCommandsIfNeeded(compile_commands, files_list): |
| """ Filters compile database to only include required files, and makes it |
| more clang-tool friendly on Windows. |
| |
| Args: |
| compile_commands: List of the contents of compile database. |
| files_list: List of required files for processing. Can be None to specify |
| no filtering. |
| Returns: |
| List of the contents of the compile database after processing. |
| """ |
| if sys.platform == 'win32' and files_list: |
| relative_paths = set([os.path.relpath(f) for f in files_list]) |
| filtered_compile_commands = [] |
| for entry in compile_commands: |
| file_path = os.path.relpath( |
| os.path.join(entry['directory'], entry['file'])) |
| if file_path in relative_paths: |
| filtered_compile_commands.append(entry) |
| else: |
| filtered_compile_commands = compile_commands |
| |
| return compile_db.ProcessCompileDatabaseIfNeeded(filtered_compile_commands) |
| |
| |
| def _ExecuteTool(toolname, tool_args, build_directory, compdb_entry): |
| """Executes the clang tool. |
| |
| This is defined outside the class so it can be pickled for the multiprocessing |
| module. |
| |
| Args: |
| toolname: Name of the clang tool to execute. |
| tool_args: Arguments to be passed to the clang tool. Can be None. |
| build_directory: Directory that contains the compile database. |
| compdb_entry: The file and args to run the clang tool over. |
| |
| Returns: |
| A dictionary that must contain the key "status" and a boolean value |
| associated with it. |
| |
| If status is True, then the generated output is stored with the key |
| "stdout_text" in the dictionary. |
| |
| Otherwise, the filename and the output from stderr are associated with the |
| keys "filename" and "stderr_text" respectively. |
| """ |
| |
| args = [toolname, compdb_entry.filename] |
| if (tool_args): |
| args.extend(tool_args) |
| |
| args.append('--') |
| args.extend([ |
| a for a in shlex.split(compdb_entry.command, |
| posix=(sys.platform != 'win32')) |
| # 'command' contains the full command line, including the input |
| # source file itself. We need to filter it out otherwise it's |
| # passed to the tool twice - once directly and once via |
| # the compile args. |
| if a != compdb_entry.filename |
| # /showIncludes is used by Ninja to track header file dependencies on |
| # Windows. We don't need to do this here, and it results in lots of spam |
| # and a massive log file, so we strip it. |
| and a != '/showIncludes' |
| # -MMD has the same purpose on non-Windows. It may have a corresponding |
| # '-MF <filename>', which we strip below. |
| and a != '-MMD' |
| ]) |
| |
| for i, arg in enumerate(args): |
| if arg == '-MF': |
| del args[i:i+2] |
| break |
| |
| # shlex.split escapes double qoutes in non-Posix mode, so we need to strip |
| # them back. |
| if sys.platform == 'win32': |
| args = [a.replace('\\"', '"') for a in args] |
| command = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=build_directory) |
| stdout_text, stderr_text = command.communicate() |
| stderr_text = re.sub( |
| r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n", |
| "", stderr_text, flags=re.MULTILINE) |
| |
| if command.returncode != 0: |
| return { |
| 'status': False, |
| 'filename': compdb_entry.filename, |
| 'stderr_text': stderr_text, |
| } |
| else: |
| return { |
| 'status': True, |
| 'filename': compdb_entry.filename, |
| 'stdout_text': stdout_text, |
| 'stderr_text': stderr_text, |
| } |
| |
| |
| class _CompilerDispatcher(object): |
| """Multiprocessing controller for running clang tools in parallel.""" |
| |
| def __init__(self, toolname, tool_args, build_directory, compdb_entries): |
| """Initializer method. |
| |
| Args: |
| toolname: Path to the tool to execute. |
| tool_args: Arguments to be passed to the tool. Can be None. |
| build_directory: Directory that contains the compile database. |
| compdb_entries: The files and args to run the tool over. |
| """ |
| self.__toolname = toolname |
| self.__tool_args = tool_args |
| self.__build_directory = build_directory |
| self.__compdb_entries = compdb_entries |
| self.__success_count = 0 |
| self.__failed_count = 0 |
| |
| @property |
| def failed_count(self): |
| return self.__failed_count |
| |
| def Run(self): |
| """Does the grunt work.""" |
| pool = multiprocessing.Pool() |
| result_iterator = pool.imap_unordered( |
| functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, |
| self.__build_directory), |
| self.__compdb_entries) |
| for result in result_iterator: |
| self.__ProcessResult(result) |
| sys.stderr.write('\n') |
| |
| def __ProcessResult(self, result): |
| """Handles result processing. |
| |
| Args: |
| result: The result dictionary returned by _ExecuteTool. |
| """ |
| if result['status']: |
| self.__success_count += 1 |
| sys.stdout.write(result['stdout_text']) |
| sys.stderr.write(result['stderr_text']) |
| else: |
| self.__failed_count += 1 |
| sys.stderr.write('\nFailed to process %s\n' % result['filename']) |
| sys.stderr.write(result['stderr_text']) |
| sys.stderr.write('\n') |
| done_count = self.__success_count + self.__failed_count |
| percentage = (float(done_count) / len(self.__compdb_entries)) * 100 |
| # Only output progress for every 100th entry, to make log files easier to |
| # inspect. |
| if done_count % 100 == 0 or done_count == len(self.__compdb_entries): |
| sys.stderr.write( |
| 'Processed %d files with %s tool (%d failures) [%.2f%%]\r' % |
| (done_count, self.__toolname, self.__failed_count, percentage)) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| '--options-file', |
| help='optional file to read options from') |
| args, argv = parser.parse_known_args() |
| if args.options_file: |
| argv = open(args.options_file).read().split() |
| |
| parser.add_argument('--tool', required=True, help='clang tool to run') |
| parser.add_argument('--all', action='store_true') |
| parser.add_argument( |
| '--generate-compdb', |
| action='store_true', |
| help='regenerate the compile database before running the tool') |
| parser.add_argument( |
| '--shard', |
| metavar='<n>-of-<count>') |
| parser.add_argument( |
| '-p', |
| required=True, |
| help='path to the directory that contains the compile database') |
| parser.add_argument( |
| 'path_filter', |
| nargs='*', |
| help='optional paths to filter what files the tool is run on') |
| parser.add_argument( |
| '--tool-arg', nargs='?', action='append', |
| help='optional arguments passed to the tool') |
| parser.add_argument( |
| '--tool-path', nargs='?', |
| help='optional path to the tool directory') |
| args = parser.parse_args(argv) |
| |
| if args.tool_path: |
| tool_path = os.path.abspath(args.tool_path) |
| else: |
| tool_path = os.path.abspath(os.path.join( |
| os.path.dirname(__file__), |
| '../../../third_party/llvm-build/Release+Asserts/bin')) |
| |
| if args.all: |
| # Reading source files is postponed to after possible regeneration of |
| # compile_commands.json. |
| source_filenames = None |
| else: |
| git_filenames = set(_GetFilesFromGit(args.path_filter)) |
| # Filter out files that aren't C/C++/Obj-C/Obj-C++. |
| extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) |
| source_filenames = [f |
| for f in git_filenames |
| if os.path.splitext(f)[1] in extensions] |
| |
| if args.generate_compdb: |
| compile_commands = compile_db.GenerateWithNinja(args.p) |
| compile_commands = _UpdateCompileCommandsIfNeeded( |
| compile_commands, source_filenames) |
| with open(os.path.join(args.p, 'compile_commands.json'), 'w') as f: |
| f.write(json.dumps(compile_commands, indent=2)) |
| |
| compdb_entries = set(_GetEntriesFromCompileDB(args.p, source_filenames)) |
| |
| if args.shard: |
| total_length = len(compdb_entries) |
| match = re.match(r'(\d+)-of-(\d+)$', args.shard) |
| # Input is 1-based, but modular arithmetic is 0-based. |
| shard_number = int(match.group(1)) - 1 |
| shard_count = int(match.group(2)) |
| compdb_entries = [ |
| f for i, f in enumerate(sorted(compdb_entries)) |
| if i % shard_count == shard_number |
| ] |
| print 'Shard %d-of-%d will process %d entries out of %d' % ( |
| shard_number, shard_count, len(compdb_entries), total_length) |
| |
| dispatcher = _CompilerDispatcher(os.path.join(tool_path, args.tool), |
| args.tool_arg, |
| args.p, |
| compdb_entries) |
| dispatcher.Run() |
| return -dispatcher.failed_count |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |