Files
scan-build-mirror/libscanbuild/compilation.py
2019-01-01 13:20:31 +11:00

355 lines
13 KiB
Python

# -*- coding: utf-8 -*-
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
""" This module is responsible for parsing a compiler invocation. """
import re
import os
import collections
import logging
import json
import subprocess
from typing import List, Iterable, Dict, Tuple, Type, Any # noqa: ignore=F401
from typing import Optional # noqa: ignore=F401
from libscanbuild import Execution, shell_split, run_command
__all__ = ['classify_source', 'Compilation', 'CompilationDatabase']
# Map of ignored compiler option for the creation of a compilation database.
# This map is used in _split_command method, which classifies the parameters
# and ignores the selected ones. Please note that other parameters might be
# ignored as well.
#
# Option names are mapped to the number of following arguments which should
# be skipped.
IGNORED_FLAGS = {
# compiling only flag, ignored because the creator of compilation
# database will explicitly set it.
'-c': 0,
# preprocessor macros, ignored because would cause duplicate entries in
# the output (the only difference would be these flags). this is actual
# finding from users, who suffered longer execution time caused by the
# duplicates.
'-MD': 0,
'-MMD': 0,
'-MG': 0,
'-MP': 0,
'-MF': 1,
'-MT': 1,
'-MQ': 1,
# linker options, ignored because for compilation database will contain
# compilation commands only. so, the compiler would ignore these flags
# anyway. the benefit to get rid of them is to make the output more
# readable.
'-static': 0,
'-shared': 0,
'-s': 0,
'-rdynamic': 0,
'-l': 1,
'-L': 1,
'-u': 1,
'-z': 1,
'-T': 1,
'-Xlinker': 1,
# clang-cl / msvc cl specific flags
# consider moving visual studio specific warning flags also
'-nologo': 0,
'-EHsc': 0,
'-EHa': 0
} # type: Dict[str, int]
# Known C/C++ compiler wrapper name patterns.
COMPILER_PATTERN_WRAPPER = re.compile(r'^(distcc|ccache)$')
# Known MPI compiler wrapper name patterns.
COMPILER_PATTERNS_MPI_WRAPPER = re.compile(r'^mpi(cc|cxx|CC|c\+\+)$')
# Known C compiler executable name patterns.
COMPILER_PATTERNS_CC = (
re.compile(r'^([^-]*-)*[mg]cc(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^-]*-)*clang(-\d+(\.\d+){0,2})?$'),
re.compile(r'^(|i)cc$'),
re.compile(r'^(g|)xlc$'),
)
# Known C++ compiler executable name patterns.
COMPILER_PATTERNS_CXX = (
re.compile(r'^(c\+\+|cxx|CC)$'),
re.compile(r'^([^-]*-)*[mg]\+\+(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^-]*-)*clang\+\+(-\d+(\.\d+){0,2})?$'),
re.compile(r'^icpc$'),
re.compile(r'^(g|)xl(C|c\+\+)$'),
)
CompilationCommand = collections.namedtuple(
'CompilationCommand', ['compiler', 'flags', 'files'])
class Compilation:
""" Represents a compilation of a single module. """
def __init__(self, # type: Compilation
compiler, # type: str
flags, # type: List[str]
source, # type: str
directory # type: str
):
# type: (...) -> None
""" Constructor for a single compilation.
This method just normalize the paths and initialize values. """
self.compiler = compiler
self.flags = flags
self.directory = os.path.normpath(directory)
self.source = source if os.path.isabs(source) else \
os.path.normpath(os.path.join(self.directory, source))
def __hash__(self):
# type: (Compilation) -> int
return hash(str(self.as_dict()))
def __eq__(self, other):
# type: (Compilation, object) -> bool
return vars(self) == vars(other)
def as_dict(self):
# type: (Compilation) -> Dict[str, str]
""" This method dumps the object attributes into a dictionary. """
return vars(self)
def as_db_entry(self):
# type: (Compilation) -> Dict[str, Any]
""" This method creates a compilation database entry. """
relative = os.path.relpath(self.source, self.directory)
compiler = 'cc' if self.compiler == 'c' else 'c++'
return {
'file': relative,
'arguments': [compiler, '-c'] + self.flags + [relative],
'directory': self.directory
}
@classmethod
def from_db_entry(cls, entry):
# type: (Type[Compilation], Dict[str, str]) -> Iterable[Compilation]
""" Parser method for compilation entry.
From compilation database entry it creates the compilation object.
:param entry: the compilation database entry
:return: stream of CompilationDbEntry objects """
command = shell_split(entry['command']) if 'command' in entry else \
entry['arguments']
execution = Execution(cmd=command, cwd=entry['directory'], pid=0)
return cls.iter_from_execution(execution)
@classmethod
def iter_from_execution(cls, # type: Type[Compilation]
execution, # type: Execution
cc='cc', # type: str
cxx='c++' # type: str
):
# type: (...) -> Iterable[Compilation]
""" Generator method for compilation entries.
From a single compiler call it can generate zero or more entries.
:param execution: executed command and working directory
:param cc: user specified C compiler name
:param cxx: user specified C++ compiler name
:return: stream of CompilationDbEntry objects """
candidate = cls._split_command(execution.cmd, cc, cxx)
for source in candidate.files if candidate else []:
result = Compilation(directory=execution.cwd,
source=source,
compiler=candidate.compiler,
flags=candidate.flags)
if os.path.isfile(result.source):
yield result
@classmethod
def _split_compiler(cls, # type: Type[Compilation]
command, # type: List[str]
cc, # type: str
cxx # type: str
):
# type: (...) -> Optional[Tuple[str, List[str]]]
""" A predicate to decide whether the command is a compiler call.
:param command: the command to classify
:param cc: user specified C compiler name
:param cxx: user specified C++ compiler name
:return: None if the command is not a compilation, or a tuple
(compiler_language, rest of the command) otherwise """
def is_wrapper(cmd):
# type: (str) -> bool
return True if COMPILER_PATTERN_WRAPPER.match(cmd) else False
def is_mpi_wrapper(cmd):
# type: (str) -> bool
return True if COMPILER_PATTERNS_MPI_WRAPPER.match(cmd) else False
def is_c_compiler(cmd):
# type: (str) -> bool
return os.path.basename(cc) == cmd or \
any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CC)
def is_cxx_compiler(cmd):
# type: (str) -> bool
return os.path.basename(cxx) == cmd or \
any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CXX)
if command: # not empty list will allow to index '0' and '1:'
executable = os.path.basename(command[0]) # type: str
parameters = command[1:] # type: List[str]
# 'wrapper' 'parameters' and
# 'wrapper' 'compiler' 'parameters' are valid.
# Additionally, a wrapper can wrap another wrapper.
if is_wrapper(executable):
result = cls._split_compiler(parameters, cc, cxx)
# Compiler wrapper without compiler is a 'C' compiler.
return ('c', parameters) if result is None else result
# MPI compiler wrappers add extra parameters
elif is_mpi_wrapper(executable):
# Pass the executable with full path to avoid pick different
# executable from PATH.
mpi_call = get_mpi_call(command[0]) # type: List[str]
return cls._split_compiler(mpi_call + parameters, cc, cxx)
# and 'compiler' 'parameters' is valid.
elif is_c_compiler(executable):
return 'c', parameters
elif is_cxx_compiler(executable):
return 'c++', parameters
return None
@classmethod
def _split_command(cls, command, cc, cxx):
""" Returns a value when the command is a compilation, None otherwise.
:param command: the command to classify
:param cc: user specified C compiler name
:param cxx: user specified C++ compiler name
:return: stream of CompilationCommand objects """
logging.debug('input was: %s', command)
# quit right now, if the program was not a C/C++ compiler
compiler_and_arguments = cls._split_compiler(command, cc, cxx)
if compiler_and_arguments is None:
return None
# the result of this method
result = CompilationCommand(compiler=compiler_and_arguments[0],
flags=[],
files=[])
# iterate on the compile options
args = iter(compiler_and_arguments[1])
for arg in args:
# quit when compilation pass is not involved
if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
return None
# ignore some flags
elif arg in IGNORED_FLAGS:
count = IGNORED_FLAGS[arg]
for _ in range(count):
next(args)
elif re.match(r'^-(l|L|Wl,).+', arg):
pass
# some parameters look like a filename, take those explicitly
elif arg in {'-D', '-I'}:
result.flags.extend([arg, next(args)])
# parameter which looks source file is taken...
elif re.match(r'^[^-].+', arg) and classify_source(arg):
result.files.append(arg)
# and consider everything else as compile option.
else:
result.flags.append(arg)
logging.debug('output is: %s', result)
# do extra check on number of source files
return result if result.files else None
class CompilationDatabase:
""" Compilation Database persistence methods. """
@staticmethod
def save(filename, iterator):
# type: (str, Iterable[Compilation]) -> None
""" Saves compilations to given file.
:param filename: the destination file name
:param iterator: iterator of Compilation objects. """
entries = [entry.as_db_entry() for entry in iterator]
with open(filename, 'w') as handle:
json.dump(entries, handle, sort_keys=True, indent=4)
@staticmethod
def load(filename):
# type: (str) -> Iterable[Compilation]
""" Load compilations from file.
:param filename: the file to read from
:returns: iterator of Compilation objects. """
with open(filename, 'r') as handle:
for entry in json.load(handle):
for compilation in Compilation.from_db_entry(entry):
yield compilation
def classify_source(filename, c_compiler=True):
# type: (str, bool) -> Optional[str]
""" Classify source file names and returns the presumed language,
based on the file name extension.
:param filename: the source file name
:param c_compiler: indicate that the compiler is a C compiler,
:return: the language from file name extension. """
mapping = {
'.c': 'c' if c_compiler else 'c++',
'.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
'.ii': 'c++-cpp-output',
'.m': 'objective-c',
'.mi': 'objective-c-cpp-output',
'.mm': 'objective-c++',
'.mii': 'objective-c++-cpp-output',
'.C': 'c++',
'.cc': 'c++',
'.CC': 'c++',
'.cp': 'c++',
'.cpp': 'c++',
'.cxx': 'c++',
'.c++': 'c++',
'.C++': 'c++',
'.txx': 'c++'
}
__, extension = os.path.splitext(os.path.basename(filename))
return mapping.get(extension)
def get_mpi_call(wrapper):
# type: (str) -> List[str]
""" Provide information on how the underlying compiler would have been
invoked without the MPI compiler wrapper. """
for query_flags in [['-show'], ['--showme']]:
try:
output = run_command([wrapper] + query_flags)
if output:
return shell_split(output[0])
except (OSError, subprocess.CalledProcessError):
pass
# Fail loud
raise RuntimeError("Could not determinate MPI flags.")