| # -*- coding: utf-8 -*- | 
 | #                     The LLVM Compiler Infrastructure | 
 | # | 
 | # This file is distributed under the University of Illinois Open Source | 
 | # License. See LICENSE.TXT for details. | 
 | """ This module is responsible for to parse a compiler invocation. """ | 
 |  | 
 | import re | 
 | import os | 
 | import collections | 
 |  | 
 | __all__ = ['split_command', 'classify_source', 'compiler_language'] | 
 |  | 
 | # Ignored compiler options map for compilation database creation. | 
 | # The map is used in `split_command` method. (Which does ignore and classify | 
 | # parameters.) Please note, that these are not the only parameters which | 
 | # might be ignored. | 
 | # | 
 | # Keys are the option name, value number of options to skip | 
 | IGNORED_FLAGS = { | 
 |     # compiling only flag, ignored because the creator of compilation | 
 |     # database will explicitly set it. | 
 |     '-c': 0, | 
 |     # preprocessor macros, ignored because would cause duplicate entries in | 
 |     # the output (the only difference would be these flags). this is actual | 
 |     # finding from users, who suffered longer execution time caused by the | 
 |     # duplicates. | 
 |     '-MD': 0, | 
 |     '-MMD': 0, | 
 |     '-MG': 0, | 
 |     '-MP': 0, | 
 |     '-MF': 1, | 
 |     '-MT': 1, | 
 |     '-MQ': 1, | 
 |     # linker options, ignored because for compilation database will contain | 
 |     # compilation commands only. so, the compiler would ignore these flags | 
 |     # anyway. the benefit to get rid of them is to make the output more | 
 |     # readable. | 
 |     '-static': 0, | 
 |     '-shared': 0, | 
 |     '-s': 0, | 
 |     '-rdynamic': 0, | 
 |     '-l': 1, | 
 |     '-L': 1, | 
 |     '-u': 1, | 
 |     '-z': 1, | 
 |     '-T': 1, | 
 |     '-Xlinker': 1 | 
 | } | 
 |  | 
 | # Known C/C++ compiler executable name patterns | 
 | COMPILER_PATTERNS = frozenset([ | 
 |     re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), | 
 |     re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), | 
 |     re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), | 
 |     re.compile(r'^llvm-g(cc|\+\+)$'), | 
 | ]) | 
 |  | 
 |  | 
 | def split_command(command): | 
 |     """ Returns a value when the command is a compilation, None otherwise. | 
 |  | 
 |     The value on success is a named tuple with the following attributes: | 
 |  | 
 |         files:    list of source files | 
 |         flags:    list of compile options | 
 |         compiler: string value of 'c' or 'c++' """ | 
 |  | 
 |     # the result of this method | 
 |     result = collections.namedtuple('Compilation', | 
 |                                     ['compiler', 'flags', 'files']) | 
 |     result.compiler = compiler_language(command) | 
 |     result.flags = [] | 
 |     result.files = [] | 
 |     # quit right now, if the program was not a C/C++ compiler | 
 |     if not result.compiler: | 
 |         return None | 
 |     # iterate on the compile options | 
 |     args = iter(command[1:]) | 
 |     for arg in args: | 
 |         # quit when compilation pass is not involved | 
 |         if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: | 
 |             return None | 
 |         # ignore some flags | 
 |         elif arg in IGNORED_FLAGS: | 
 |             count = IGNORED_FLAGS[arg] | 
 |             for _ in range(count): | 
 |                 next(args) | 
 |         elif re.match(r'^-(l|L|Wl,).+', arg): | 
 |             pass | 
 |         # some parameters could look like filename, take as compile option | 
 |         elif arg in {'-D', '-I'}: | 
 |             result.flags.extend([arg, next(args)]) | 
 |         # parameter which looks source file is taken... | 
 |         elif re.match(r'^[^-].+', arg) and classify_source(arg): | 
 |             result.files.append(arg) | 
 |         # and consider everything else as compile option. | 
 |         else: | 
 |             result.flags.append(arg) | 
 |     # do extra check on number of source files | 
 |     return result if result.files else None | 
 |  | 
 |  | 
 | def classify_source(filename, c_compiler=True): | 
 |     """ Return the language from file name extension. """ | 
 |  | 
 |     mapping = { | 
 |         '.c': 'c' if c_compiler else 'c++', | 
 |         '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', | 
 |         '.ii': 'c++-cpp-output', | 
 |         '.m': 'objective-c', | 
 |         '.mi': 'objective-c-cpp-output', | 
 |         '.mm': 'objective-c++', | 
 |         '.mii': 'objective-c++-cpp-output', | 
 |         '.C': 'c++', | 
 |         '.cc': 'c++', | 
 |         '.CC': 'c++', | 
 |         '.cp': 'c++', | 
 |         '.cpp': 'c++', | 
 |         '.cxx': 'c++', | 
 |         '.c++': 'c++', | 
 |         '.C++': 'c++', | 
 |         '.txx': 'c++' | 
 |     } | 
 |  | 
 |     __, extension = os.path.splitext(os.path.basename(filename)) | 
 |     return mapping.get(extension) | 
 |  | 
 |  | 
 | def compiler_language(command): | 
 |     """ A predicate to decide the command is a compiler call or not. | 
 |  | 
 |     Returns 'c' or 'c++' when it match. None otherwise. """ | 
 |  | 
 |     cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') | 
 |  | 
 |     if command: | 
 |         executable = os.path.basename(command[0]) | 
 |         if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): | 
 |             return 'c++' if cplusplus.match(executable) else 'c' | 
 |     return None |