Files
scan-build-mirror/libscanbuild/compilation.py

180 lines
6.1 KiB
Python

# -*- coding: utf-8 -*-
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
""" This module is responsible for to parse a compiler invocation. """
import re
import os
import collections
__all__ = ['split_command', 'split_compiler', 'classify_source']
# Ignored compiler options map for compilation database creation.
# The map is used in `split_command` method. (Which does ignore and classify
# parameters.) Please note, that these are not the only parameters which
# might be ignored.
#
# Keys are the option name, value number of options to skip
IGNORED_FLAGS = {
# compiling only flag, ignored because the creator of compilation
# database will explicitly set it.
'-c': 0,
# preprocessor macros, ignored because would cause duplicate entries in
# the output (the only difference would be these flags). this is actual
# finding from users, who suffered longer execution time caused by the
# duplicates.
'-MD': 0,
'-MMD': 0,
'-MG': 0,
'-MP': 0,
'-MF': 1,
'-MT': 1,
'-MQ': 1,
# linker options, ignored because for compilation database will contain
# compilation commands only. so, the compiler would ignore these flags
# anyway. the benefit to get rid of them is to make the output more
# readable.
'-static': 0,
'-shared': 0,
'-s': 0,
'-rdynamic': 0,
'-l': 1,
'-L': 1,
'-u': 1,
'-z': 1,
'-T': 1,
'-Xlinker': 1
}
# Known C/C++ compiler wrapper name patterns
COMPILER_WRAPPER_PATTERN = re.compile(r'^(distcc|ccache)$')
# Known C/C++ compiler executable name patterns
COMPILER_PATTERNS = frozenset([
re.compile(r'^(cc|c\+\+|cxx|CC)$'),
re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
re.compile(r'^llvm-g(cc|\+\+)$'),
re.compile(r'^i(cc|cpc)$'),
re.compile(r'^(g|)xl(c|C|c\+\+)$'),
])
# Known C++ compiler executable name patterns
COMPILER_CPP_PATTERNS = frozenset([
re.compile(r'^(.+)(\+\+)(-.+|)$'), # C++ compilers usually ends with '++'
re.compile(r'^(icpc|xlC|cxx|CC)$'),
])
def split_command(command):
""" Returns a value when the command is a compilation, None otherwise.
The value on success is a named tuple with the following attributes:
files: list of source files
flags: list of compile options
compiler: string value of 'c' or 'c++' """
# quit right now, if the program was not a C/C++ compiler
compiler_and_arguments = split_compiler(command)
if compiler_and_arguments is None:
return None
# the result of this method
result = collections.namedtuple('Compilation',
['compiler', 'flags', 'files'])
result.compiler = compiler_and_arguments[0]
result.flags = []
result.files = []
# iterate on the compile options
args = iter(compiler_and_arguments[1])
for arg in args:
# quit when compilation pass is not involved
if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
return None
# ignore some flags
elif arg in IGNORED_FLAGS:
count = IGNORED_FLAGS[arg]
for _ in range(count):
next(args)
elif re.match(r'^-(l|L|Wl,).+', arg):
pass
# some parameters could look like filename, take as compile option
elif arg in {'-D', '-I'}:
result.flags.extend([arg, next(args)])
# parameter which looks source file is taken...
elif re.match(r'^[^-].+', arg) and classify_source(arg):
result.files.append(arg)
# and consider everything else as compile option.
else:
result.flags.append(arg)
# do extra check on number of source files
return result if result.files else None
def classify_source(filename, c_compiler=True):
""" Classify source file names and returns the presumed language,
based on the file name extension.
:param filename: the source file name
:param c_compiler: indicate that the compiler is a C compiler,
:return: the language from file name extension. """
mapping = {
'.c': 'c' if c_compiler else 'c++',
'.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
'.ii': 'c++-cpp-output',
'.m': 'objective-c',
'.mi': 'objective-c-cpp-output',
'.mm': 'objective-c++',
'.mii': 'objective-c++-cpp-output',
'.C': 'c++',
'.cc': 'c++',
'.CC': 'c++',
'.cp': 'c++',
'.cpp': 'c++',
'.cxx': 'c++',
'.c++': 'c++',
'.C++': 'c++',
'.txx': 'c++'
}
__, extension = os.path.splitext(os.path.basename(filename))
return mapping.get(extension)
def split_compiler(command):
""" A predicate to decide the command is a compiler call or not.
:param command: the command to classify
:return: None if the command is not a compilation
(compiler_language, rest of the command) tuple if the
command is a compilation. """
def is_wrapper(candidate):
return True if COMPILER_WRAPPER_PATTERN.match(candidate) else False
def is_compiler(candidate):
return any(pattern.match(candidate) for pattern in COMPILER_PATTERNS)
def is_cplusplus(candidate):
return any(pattern.match(candidate)
for pattern in COMPILER_CPP_PATTERNS)
if command: # not empty list will allow to index '0' and '1:'
executable = os.path.basename(command[0])
parameters = command[1:]
# 'wrapper' 'parameters' and
# 'wrapper' 'compiler' 'parameters' are valid.
# plus, a wrapper can wrap wrapper too.
if is_wrapper(executable):
result = split_compiler(parameters)
return ('c', parameters) if result is None else result
# and 'compiler' 'parameters' is valid.
elif is_compiler(executable):
language = 'c++' if is_cplusplus(executable) else 'c'
return language, parameters
return None