Files
scan-build-mirror/libscanbuild/compilation.py
2017-04-07 22:00:15 +10:00

275 lines
9.8 KiB
Python

# -*- coding: utf-8 -*-
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
""" This module is responsible to parse a compiler invocation. """
import re
import os
import collections
import logging
import json
from libscanbuild import Execution, shell_split
__all__ = ['classify_source', 'Compilation', 'CompilationDatabase']
# Ignored compiler options map for compilation database creation.
# The map is used in `_split_command` method. (Which does ignore and classify
# parameters.) Please note, that these are not the only parameters which
# might be ignored.
#
# Keys are the option name, value number of options to skip
IGNORED_FLAGS = {
# compiling only flag, ignored because the creator of compilation
# database will explicitly set it.
'-c': 0,
# preprocessor macros, ignored because would cause duplicate entries in
# the output (the only difference would be these flags). this is actual
# finding from users, who suffered longer execution time caused by the
# duplicates.
'-MD': 0,
'-MMD': 0,
'-MG': 0,
'-MP': 0,
'-MF': 1,
'-MT': 1,
'-MQ': 1,
# linker options, ignored because for compilation database will contain
# compilation commands only. so, the compiler would ignore these flags
# anyway. the benefit to get rid of them is to make the output more
# readable.
'-static': 0,
'-shared': 0,
'-s': 0,
'-rdynamic': 0,
'-l': 1,
'-L': 1,
'-u': 1,
'-z': 1,
'-T': 1,
'-Xlinker': 1
}
# Known C/C++ compiler wrapper name patterns
COMPILER_PATTERN_WRAPPER = re.compile(r'^(distcc|ccache)$')
# Known C compiler executable name patterns
COMPILER_PATTERNS_CC = frozenset([
re.compile(r'^(|i|mpi)cc$'),
re.compile(r'^([^-]*-)*[mg]cc(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^-]*-)*clang(-\d+(\.\d+){0,2})?$'),
re.compile(r'^(g|)xlc$'),
])
# Known C++ compiler executable name patterns
COMPILER_PATTERNS_CXX = frozenset([
re.compile(r'^(c\+\+|cxx|CC)$'),
re.compile(r'^([^-]*-)*[mg]\+\+(-\d+(\.\d+){0,2})?$'),
re.compile(r'^([^-]*-)*clang\+\+(-\d+(\.\d+){0,2})?$'),
re.compile(r'^(icpc|mpiCC|mpicxx|mpic\+\+)$'),
re.compile(r'^(g|)xl(C|c\+\+)$'),
])
CompilationCommand = collections.namedtuple(
'CompilationCommand', ['compiler', 'flags', 'files'])
class Compilation:
def __init__(self, compiler, flags, source, directory):
""" Constructor for a single compilation.
This method just normalize the paths and initialize values. """
self.compiler = compiler
self.flags = flags
self.directory = os.path.normpath(directory)
self.source = source if os.path.isabs(source) else \
os.path.normpath(os.path.join(self.directory, source))
def __hash__(self):
return hash((self.compiler, self.source, self.directory,
':'.join(self.flags)))
def __eq__(self, other):
return vars(self) == vars(other)
def as_dict(self):
""" This method dumps the object attributes into a dictionary. """
return vars(self)
def as_db_entry(self):
""" This method creates a compilation database entry. """
relative = os.path.relpath(self.source, self.directory)
compiler = 'cc' if self.compiler == 'c' else 'c++'
return {
'file': relative,
'arguments': [compiler, '-c'] + self.flags + [relative],
'directory': self.directory
}
@staticmethod
def from_db_entry(entry):
""" Parser method for compilation entry.
From compilation database entry it creates the compilation object.
:param entry: the compilation database entry
:return: a single compilation object """
command = shell_split(entry['command']) if 'command' in entry else \
entry['arguments']
execution = Execution(cmd=command, cwd=entry['directory'], pid=0)
entries = list(Compilation.iter_from_execution(execution))
assert len(entries) == 1
return entries[0]
@staticmethod
def iter_from_execution(execution, cc='cc', cxx='c++'):
""" Generator method for compilation entries.
From a single compiler call it can generate zero or more entries.
:param execution: executed command and working directory
:param cc: user specified C compiler name
:param cxx: user specified C++ compiler name
:return: stream of CompilationDbEntry objects """
candidate = Compilation._split_command(execution.cmd, cc, cxx)
for source in (candidate.files if candidate else []):
result = Compilation(directory=execution.cwd,
source=source,
compiler=candidate.compiler,
flags=candidate.flags)
if os.path.isfile(result.source):
yield result
@staticmethod
def _split_compiler(command, cc, cxx):
""" A predicate to decide the command is a compiler call or not.
:param command: the command to classify
:param cc: user specified C compiler name
:param cxx: user specified C++ compiler name
:return: None if the command is not a compilation, or a tuple
(compiler_language, rest of the command) otherwise """
def is_wrapper(cmd):
return True if COMPILER_PATTERN_WRAPPER.match(cmd) else False
def is_c_compiler(cmd):
return os.path.basename(cc) == cmd or \
any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CC)
def is_cxx_compiler(cmd):
return os.path.basename(cxx) == cmd or \
any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CXX)
if command: # not empty list will allow to index '0' and '1:'
executable = os.path.basename(command[0])
parameters = command[1:]
# 'wrapper' 'parameters' and
# 'wrapper' 'compiler' 'parameters' are valid.
# plus, a wrapper can wrap wrapper too.
if is_wrapper(executable):
result = Compilation._split_compiler(parameters, cc, cxx)
return ('c', parameters) if result is None else result
# and 'compiler' 'parameters' is valid.
elif is_c_compiler(executable):
return 'c', parameters
elif is_cxx_compiler(executable):
return 'c++', parameters
return None
@staticmethod
def _split_command(command, cc, cxx):
""" Returns a value when the command is a compilation, None otherwise.
:param command: the command to classify
:param cc: user specified C compiler name
:param cxx: user specified C++ compiler name
:return: stream of CompilationCommand objects """
logging.debug('input was: %s', command)
# quit right now, if the program was not a C/C++ compiler
compiler_and_arguments = Compilation._split_compiler(command, cc, cxx)
if compiler_and_arguments is None:
return None
# the result of this method
result = CompilationCommand(compiler=compiler_and_arguments[0],
flags=[],
files=[])
# iterate on the compile options
args = iter(compiler_and_arguments[1])
for arg in args:
# quit when compilation pass is not involved
if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
return None
# ignore some flags
elif arg in IGNORED_FLAGS:
count = IGNORED_FLAGS[arg]
for _ in range(count):
next(args)
elif re.match(r'^-(l|L|Wl,).+', arg):
pass
# some parameters could look like filename, take as compile option
elif arg in {'-D', '-I'}:
result.flags.extend([arg, next(args)])
# parameter which looks source file is taken...
elif re.match(r'^[^-].+', arg) and classify_source(arg):
result.files.append(arg)
# and consider everything else as compile option.
else:
result.flags.append(arg)
logging.debug('output is: %s', result)
# do extra check on number of source files
return result if result.files else None
class CompilationDatabase:
@staticmethod
def save(filename, iterator):
entries = [entry.as_db_entry() for entry in iterator]
with open(filename, 'w+') as handle:
json.dump(entries, handle, sort_keys=True, indent=4)
@staticmethod
def load(filename):
with open(filename, 'r') as handle:
for entry in json.load(handle):
yield Compilation.from_db_entry(entry)
def classify_source(filename, c_compiler=True):
""" Classify source file names and returns the presumed language,
based on the file name extension.
:param filename: the source file name
:param c_compiler: indicate that the compiler is a C compiler,
:return: the language from file name extension. """
mapping = {
'.c': 'c' if c_compiler else 'c++',
'.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
'.ii': 'c++-cpp-output',
'.m': 'objective-c',
'.mi': 'objective-c-cpp-output',
'.mm': 'objective-c++',
'.mii': 'objective-c++-cpp-output',
'.C': 'c++',
'.cc': 'c++',
'.CC': 'c++',
'.cp': 'c++',
'.cpp': 'c++',
'.cxx': 'c++',
'.c++': 'c++',
'.C++': 'c++',
'.txx': 'c++'
}
__, extension = os.path.splitext(os.path.basename(filename))
return mapping.get(extension)