mirror of
https://github.com/rizsotto/scan-build.git
synced 2025-12-16 12:00:08 +01:00
275 lines
9.8 KiB
Python
275 lines
9.8 KiB
Python
# -*- coding: utf-8 -*-
|
|
# The LLVM Compiler Infrastructure
|
|
#
|
|
# This file is distributed under the University of Illinois Open Source
|
|
# License. See LICENSE.TXT for details.
|
|
""" This module is responsible to parse a compiler invocation. """
|
|
|
|
import re
|
|
import os
|
|
import collections
|
|
import logging
|
|
import json
|
|
from libscanbuild import Execution, shell_split
|
|
|
|
__all__ = ['classify_source', 'Compilation', 'CompilationDatabase']
|
|
|
|
# Ignored compiler options map for compilation database creation.
|
|
# The map is used in `_split_command` method. (Which does ignore and classify
|
|
# parameters.) Please note, that these are not the only parameters which
|
|
# might be ignored.
|
|
#
|
|
# Keys are the option name, value number of options to skip
|
|
IGNORED_FLAGS = {
|
|
# compiling only flag, ignored because the creator of compilation
|
|
# database will explicitly set it.
|
|
'-c': 0,
|
|
# preprocessor macros, ignored because would cause duplicate entries in
|
|
# the output (the only difference would be these flags). this is actual
|
|
# finding from users, who suffered longer execution time caused by the
|
|
# duplicates.
|
|
'-MD': 0,
|
|
'-MMD': 0,
|
|
'-MG': 0,
|
|
'-MP': 0,
|
|
'-MF': 1,
|
|
'-MT': 1,
|
|
'-MQ': 1,
|
|
# linker options, ignored because for compilation database will contain
|
|
# compilation commands only. so, the compiler would ignore these flags
|
|
# anyway. the benefit to get rid of them is to make the output more
|
|
# readable.
|
|
'-static': 0,
|
|
'-shared': 0,
|
|
'-s': 0,
|
|
'-rdynamic': 0,
|
|
'-l': 1,
|
|
'-L': 1,
|
|
'-u': 1,
|
|
'-z': 1,
|
|
'-T': 1,
|
|
'-Xlinker': 1
|
|
}
|
|
|
|
# Known C/C++ compiler wrapper name patterns
|
|
COMPILER_PATTERN_WRAPPER = re.compile(r'^(distcc|ccache)$')
|
|
|
|
# Known C compiler executable name patterns
|
|
COMPILER_PATTERNS_CC = frozenset([
|
|
re.compile(r'^(|i|mpi)cc$'),
|
|
re.compile(r'^([^-]*-)*[mg]cc(-\d+(\.\d+){0,2})?$'),
|
|
re.compile(r'^([^-]*-)*clang(-\d+(\.\d+){0,2})?$'),
|
|
re.compile(r'^(g|)xlc$'),
|
|
])
|
|
|
|
# Known C++ compiler executable name patterns
|
|
COMPILER_PATTERNS_CXX = frozenset([
|
|
re.compile(r'^(c\+\+|cxx|CC)$'),
|
|
re.compile(r'^([^-]*-)*[mg]\+\+(-\d+(\.\d+){0,2})?$'),
|
|
re.compile(r'^([^-]*-)*clang\+\+(-\d+(\.\d+){0,2})?$'),
|
|
re.compile(r'^(icpc|mpiCC|mpicxx|mpic\+\+)$'),
|
|
re.compile(r'^(g|)xl(C|c\+\+)$'),
|
|
])
|
|
|
|
CompilationCommand = collections.namedtuple(
|
|
'CompilationCommand', ['compiler', 'flags', 'files'])
|
|
|
|
|
|
class Compilation:
|
|
def __init__(self, compiler, flags, source, directory):
|
|
""" Constructor for a single compilation.
|
|
|
|
This method just normalize the paths and initialize values. """
|
|
|
|
self.compiler = compiler
|
|
self.flags = flags
|
|
self.directory = os.path.normpath(directory)
|
|
self.source = source if os.path.isabs(source) else \
|
|
os.path.normpath(os.path.join(self.directory, source))
|
|
|
|
def __hash__(self):
|
|
return hash((self.compiler, self.source, self.directory,
|
|
':'.join(self.flags)))
|
|
|
|
def __eq__(self, other):
|
|
return vars(self) == vars(other)
|
|
|
|
def as_dict(self):
|
|
""" This method dumps the object attributes into a dictionary. """
|
|
|
|
return vars(self)
|
|
|
|
def as_db_entry(self):
|
|
""" This method creates a compilation database entry. """
|
|
|
|
relative = os.path.relpath(self.source, self.directory)
|
|
compiler = 'cc' if self.compiler == 'c' else 'c++'
|
|
return {
|
|
'file': relative,
|
|
'arguments': [compiler, '-c'] + self.flags + [relative],
|
|
'directory': self.directory
|
|
}
|
|
|
|
@staticmethod
|
|
def from_db_entry(entry):
|
|
""" Parser method for compilation entry.
|
|
|
|
From compilation database entry it creates the compilation object.
|
|
|
|
:param entry: the compilation database entry
|
|
:return: a single compilation object """
|
|
|
|
command = shell_split(entry['command']) if 'command' in entry else \
|
|
entry['arguments']
|
|
execution = Execution(cmd=command, cwd=entry['directory'], pid=0)
|
|
entries = list(Compilation.iter_from_execution(execution))
|
|
assert len(entries) == 1
|
|
return entries[0]
|
|
|
|
@staticmethod
|
|
def iter_from_execution(execution, cc='cc', cxx='c++'):
|
|
""" Generator method for compilation entries.
|
|
|
|
From a single compiler call it can generate zero or more entries.
|
|
|
|
:param execution: executed command and working directory
|
|
:param cc: user specified C compiler name
|
|
:param cxx: user specified C++ compiler name
|
|
:return: stream of CompilationDbEntry objects """
|
|
|
|
candidate = Compilation._split_command(execution.cmd, cc, cxx)
|
|
for source in (candidate.files if candidate else []):
|
|
result = Compilation(directory=execution.cwd,
|
|
source=source,
|
|
compiler=candidate.compiler,
|
|
flags=candidate.flags)
|
|
if os.path.isfile(result.source):
|
|
yield result
|
|
|
|
@staticmethod
|
|
def _split_compiler(command, cc, cxx):
|
|
""" A predicate to decide the command is a compiler call or not.
|
|
|
|
:param command: the command to classify
|
|
:param cc: user specified C compiler name
|
|
:param cxx: user specified C++ compiler name
|
|
:return: None if the command is not a compilation, or a tuple
|
|
(compiler_language, rest of the command) otherwise """
|
|
|
|
def is_wrapper(cmd):
|
|
return True if COMPILER_PATTERN_WRAPPER.match(cmd) else False
|
|
|
|
def is_c_compiler(cmd):
|
|
return os.path.basename(cc) == cmd or \
|
|
any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CC)
|
|
|
|
def is_cxx_compiler(cmd):
|
|
return os.path.basename(cxx) == cmd or \
|
|
any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CXX)
|
|
|
|
if command: # not empty list will allow to index '0' and '1:'
|
|
executable = os.path.basename(command[0])
|
|
parameters = command[1:]
|
|
# 'wrapper' 'parameters' and
|
|
# 'wrapper' 'compiler' 'parameters' are valid.
|
|
# plus, a wrapper can wrap wrapper too.
|
|
if is_wrapper(executable):
|
|
result = Compilation._split_compiler(parameters, cc, cxx)
|
|
return ('c', parameters) if result is None else result
|
|
# and 'compiler' 'parameters' is valid.
|
|
elif is_c_compiler(executable):
|
|
return 'c', parameters
|
|
elif is_cxx_compiler(executable):
|
|
return 'c++', parameters
|
|
return None
|
|
|
|
@staticmethod
|
|
def _split_command(command, cc, cxx):
|
|
""" Returns a value when the command is a compilation, None otherwise.
|
|
|
|
:param command: the command to classify
|
|
:param cc: user specified C compiler name
|
|
:param cxx: user specified C++ compiler name
|
|
:return: stream of CompilationCommand objects """
|
|
|
|
logging.debug('input was: %s', command)
|
|
# quit right now, if the program was not a C/C++ compiler
|
|
compiler_and_arguments = Compilation._split_compiler(command, cc, cxx)
|
|
if compiler_and_arguments is None:
|
|
return None
|
|
|
|
# the result of this method
|
|
result = CompilationCommand(compiler=compiler_and_arguments[0],
|
|
flags=[],
|
|
files=[])
|
|
# iterate on the compile options
|
|
args = iter(compiler_and_arguments[1])
|
|
for arg in args:
|
|
# quit when compilation pass is not involved
|
|
if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
|
|
return None
|
|
# ignore some flags
|
|
elif arg in IGNORED_FLAGS:
|
|
count = IGNORED_FLAGS[arg]
|
|
for _ in range(count):
|
|
next(args)
|
|
elif re.match(r'^-(l|L|Wl,).+', arg):
|
|
pass
|
|
# some parameters could look like filename, take as compile option
|
|
elif arg in {'-D', '-I'}:
|
|
result.flags.extend([arg, next(args)])
|
|
# parameter which looks source file is taken...
|
|
elif re.match(r'^[^-].+', arg) and classify_source(arg):
|
|
result.files.append(arg)
|
|
# and consider everything else as compile option.
|
|
else:
|
|
result.flags.append(arg)
|
|
logging.debug('output is: %s', result)
|
|
# do extra check on number of source files
|
|
return result if result.files else None
|
|
|
|
|
|
class CompilationDatabase:
|
|
@staticmethod
|
|
def save(filename, iterator):
|
|
entries = [entry.as_db_entry() for entry in iterator]
|
|
with open(filename, 'w+') as handle:
|
|
json.dump(entries, handle, sort_keys=True, indent=4)
|
|
|
|
@staticmethod
|
|
def load(filename):
|
|
with open(filename, 'r') as handle:
|
|
for entry in json.load(handle):
|
|
yield Compilation.from_db_entry(entry)
|
|
|
|
|
|
def classify_source(filename, c_compiler=True):
|
|
""" Classify source file names and returns the presumed language,
|
|
based on the file name extension.
|
|
|
|
:param filename: the source file name
|
|
:param c_compiler: indicate that the compiler is a C compiler,
|
|
:return: the language from file name extension. """
|
|
|
|
mapping = {
|
|
'.c': 'c' if c_compiler else 'c++',
|
|
'.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
|
|
'.ii': 'c++-cpp-output',
|
|
'.m': 'objective-c',
|
|
'.mi': 'objective-c-cpp-output',
|
|
'.mm': 'objective-c++',
|
|
'.mii': 'objective-c++-cpp-output',
|
|
'.C': 'c++',
|
|
'.cc': 'c++',
|
|
'.CC': 'c++',
|
|
'.cp': 'c++',
|
|
'.cpp': 'c++',
|
|
'.cxx': 'c++',
|
|
'.c++': 'c++',
|
|
'.C++': 'c++',
|
|
'.txx': 'c++'
|
|
}
|
|
|
|
__, extension = os.path.splitext(os.path.basename(filename))
|
|
return mapping.get(extension)
|