142 lines
4.6 KiB
Python
142 lines
4.6 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
# See https://llvm.org/LICENSE.txt for license information.
|
||
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
""" This module is responsible for to parse a compiler invocation. """
|
||
|
|
||
|
import re
|
||
|
import os
|
||
|
import collections
|
||
|
|
||
|
__all__ = ["split_command", "classify_source", "compiler_language"]
|
||
|
|
||
|
# Ignored compiler options map for compilation database creation.
|
||
|
# The map is used in `split_command` method. (Which does ignore and classify
|
||
|
# parameters.) Please note, that these are not the only parameters which
|
||
|
# might be ignored.
|
||
|
#
|
||
|
# Keys are the option name, value number of options to skip
|
||
|
IGNORED_FLAGS = {
|
||
|
# compiling only flag, ignored because the creator of compilation
|
||
|
# database will explicitly set it.
|
||
|
"-c": 0,
|
||
|
# preprocessor macros, ignored because would cause duplicate entries in
|
||
|
# the output (the only difference would be these flags). this is actual
|
||
|
# finding from users, who suffered longer execution time caused by the
|
||
|
# duplicates.
|
||
|
"-MD": 0,
|
||
|
"-MMD": 0,
|
||
|
"-MG": 0,
|
||
|
"-MP": 0,
|
||
|
"-MF": 1,
|
||
|
"-MT": 1,
|
||
|
"-MQ": 1,
|
||
|
# linker options, ignored because for compilation database will contain
|
||
|
# compilation commands only. so, the compiler would ignore these flags
|
||
|
# anyway. the benefit to get rid of them is to make the output more
|
||
|
# readable.
|
||
|
"-static": 0,
|
||
|
"-shared": 0,
|
||
|
"-s": 0,
|
||
|
"-rdynamic": 0,
|
||
|
"-l": 1,
|
||
|
"-L": 1,
|
||
|
"-u": 1,
|
||
|
"-z": 1,
|
||
|
"-T": 1,
|
||
|
"-Xlinker": 1,
|
||
|
}
|
||
|
|
||
|
# Known C/C++ compiler executable name patterns
|
||
|
COMPILER_PATTERNS = frozenset(
|
||
|
[
|
||
|
re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"),
|
||
|
re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"),
|
||
|
re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"),
|
||
|
re.compile(r"^llvm-g(cc|\+\+)$"),
|
||
|
]
|
||
|
)
|
||
|
|
||
|
|
||
|
def split_command(command):
|
||
|
"""Returns a value when the command is a compilation, None otherwise.
|
||
|
|
||
|
The value on success is a named tuple with the following attributes:
|
||
|
|
||
|
files: list of source files
|
||
|
flags: list of compile options
|
||
|
compiler: string value of 'c' or 'c++'"""
|
||
|
|
||
|
# the result of this method
|
||
|
result = collections.namedtuple("Compilation", ["compiler", "flags", "files"])
|
||
|
result.compiler = compiler_language(command)
|
||
|
result.flags = []
|
||
|
result.files = []
|
||
|
# quit right now, if the program was not a C/C++ compiler
|
||
|
if not result.compiler:
|
||
|
return None
|
||
|
# iterate on the compile options
|
||
|
args = iter(command[1:])
|
||
|
for arg in args:
|
||
|
# quit when compilation pass is not involved
|
||
|
if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}:
|
||
|
return None
|
||
|
# ignore some flags
|
||
|
elif arg in IGNORED_FLAGS:
|
||
|
count = IGNORED_FLAGS[arg]
|
||
|
for _ in range(count):
|
||
|
next(args)
|
||
|
elif re.match(r"^-(l|L|Wl,).+", arg):
|
||
|
pass
|
||
|
# some parameters could look like filename, take as compile option
|
||
|
elif arg in {"-D", "-I"}:
|
||
|
result.flags.extend([arg, next(args)])
|
||
|
# parameter which looks source file is taken...
|
||
|
elif re.match(r"^[^-].+", arg) and classify_source(arg):
|
||
|
result.files.append(arg)
|
||
|
# and consider everything else as compile option.
|
||
|
else:
|
||
|
result.flags.append(arg)
|
||
|
# do extra check on number of source files
|
||
|
return result if result.files else None
|
||
|
|
||
|
|
||
|
def classify_source(filename, c_compiler=True):
|
||
|
"""Return the language from file name extension."""
|
||
|
|
||
|
mapping = {
|
||
|
".c": "c" if c_compiler else "c++",
|
||
|
".i": "c-cpp-output" if c_compiler else "c++-cpp-output",
|
||
|
".ii": "c++-cpp-output",
|
||
|
".m": "objective-c",
|
||
|
".mi": "objective-c-cpp-output",
|
||
|
".mm": "objective-c++",
|
||
|
".mii": "objective-c++-cpp-output",
|
||
|
".C": "c++",
|
||
|
".cc": "c++",
|
||
|
".CC": "c++",
|
||
|
".cp": "c++",
|
||
|
".cpp": "c++",
|
||
|
".cxx": "c++",
|
||
|
".c++": "c++",
|
||
|
".C++": "c++",
|
||
|
".txx": "c++",
|
||
|
}
|
||
|
|
||
|
__, extension = os.path.splitext(os.path.basename(filename))
|
||
|
return mapping.get(extension)
|
||
|
|
||
|
|
||
|
def compiler_language(command):
|
||
|
"""A predicate to decide the command is a compiler call or not.
|
||
|
|
||
|
Returns 'c' or 'c++' when it match. None otherwise."""
|
||
|
|
||
|
cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$")
|
||
|
|
||
|
if command:
|
||
|
executable = os.path.basename(command[0])
|
||
|
if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
|
||
|
return "c++" if cplusplus.match(executable) else "c"
|
||
|
return None
|