| # -*- coding: utf-8 -*- |
| # The LLVM Compiler Infrastructure |
| # |
| # This file is distributed under the University of Illinois Open Source |
| # License. See LICENSE.TXT for details. |
| """ This module is responsible for to parse a compiler invocation. """ |
| |
| import re |
| import os |
| import collections |
| |
| __all__ = ['split_command', 'classify_source', 'compiler_language'] |
| |
| # Ignored compiler options map for compilation database creation. |
| # The map is used in `split_command` method. (Which does ignore and classify |
| # parameters.) Please note, that these are not the only parameters which |
| # might be ignored. |
| # |
| # Keys are the option name, value number of options to skip |
| IGNORED_FLAGS = { |
| # compiling only flag, ignored because the creator of compilation |
| # database will explicitly set it. |
| '-c': 0, |
| # preprocessor macros, ignored because would cause duplicate entries in |
| # the output (the only difference would be these flags). this is actual |
| # finding from users, who suffered longer execution time caused by the |
| # duplicates. |
| '-MD': 0, |
| '-MMD': 0, |
| '-MG': 0, |
| '-MP': 0, |
| '-MF': 1, |
| '-MT': 1, |
| '-MQ': 1, |
| # linker options, ignored because for compilation database will contain |
| # compilation commands only. so, the compiler would ignore these flags |
| # anyway. the benefit to get rid of them is to make the output more |
| # readable. |
| '-static': 0, |
| '-shared': 0, |
| '-s': 0, |
| '-rdynamic': 0, |
| '-l': 1, |
| '-L': 1, |
| '-u': 1, |
| '-z': 1, |
| '-T': 1, |
| '-Xlinker': 1 |
| } |
| |
| # Known C/C++ compiler executable name patterns |
| COMPILER_PATTERNS = frozenset([ |
| re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), |
| re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), |
| re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), |
| re.compile(r'^llvm-g(cc|\+\+)$'), |
| ]) |
| |
| |
| def split_command(command): |
| """ Returns a value when the command is a compilation, None otherwise. |
| |
| The value on success is a named tuple with the following attributes: |
| |
| files: list of source files |
| flags: list of compile options |
| compiler: string value of 'c' or 'c++' """ |
| |
| # the result of this method |
| result = collections.namedtuple('Compilation', |
| ['compiler', 'flags', 'files']) |
| result.compiler = compiler_language(command) |
| result.flags = [] |
| result.files = [] |
| # quit right now, if the program was not a C/C++ compiler |
| if not result.compiler: |
| return None |
| # iterate on the compile options |
| args = iter(command[1:]) |
| for arg in args: |
| # quit when compilation pass is not involved |
| if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: |
| return None |
| # ignore some flags |
| elif arg in IGNORED_FLAGS: |
| count = IGNORED_FLAGS[arg] |
| for _ in range(count): |
| next(args) |
| elif re.match(r'^-(l|L|Wl,).+', arg): |
| pass |
| # some parameters could look like filename, take as compile option |
| elif arg in {'-D', '-I'}: |
| result.flags.extend([arg, next(args)]) |
| # parameter which looks source file is taken... |
| elif re.match(r'^[^-].+', arg) and classify_source(arg): |
| result.files.append(arg) |
| # and consider everything else as compile option. |
| else: |
| result.flags.append(arg) |
| # do extra check on number of source files |
| return result if result.files else None |
| |
| |
| def classify_source(filename, c_compiler=True): |
| """ Return the language from file name extension. """ |
| |
| mapping = { |
| '.c': 'c' if c_compiler else 'c++', |
| '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', |
| '.ii': 'c++-cpp-output', |
| '.m': 'objective-c', |
| '.mi': 'objective-c-cpp-output', |
| '.mm': 'objective-c++', |
| '.mii': 'objective-c++-cpp-output', |
| '.C': 'c++', |
| '.cc': 'c++', |
| '.CC': 'c++', |
| '.cp': 'c++', |
| '.cpp': 'c++', |
| '.cxx': 'c++', |
| '.c++': 'c++', |
| '.C++': 'c++', |
| '.txx': 'c++' |
| } |
| |
| __, extension = os.path.splitext(os.path.basename(filename)) |
| return mapping.get(extension) |
| |
| |
| def compiler_language(command): |
| """ A predicate to decide the command is a compiler call or not. |
| |
| Returns 'c' or 'c++' when it match. None otherwise. """ |
| |
| cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') |
| |
| if command: |
| executable = os.path.basename(command[0]) |
| if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): |
| return 'c++' if cplusplus.match(executable) else 'c' |
| return None |