141 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			141 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
| # -*- coding: utf-8 -*-
 | |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| # See https://llvm.org/LICENSE.txt for license information.
 | |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| """ This module is responsible for to parse a compiler invocation. """
 | |
| 
 | |
| import re
 | |
| import os
 | |
| import collections
 | |
| 
 | |
| __all__ = ['split_command', 'classify_source', 'compiler_language']
 | |
| 
 | |
| # Ignored compiler options map for compilation database creation.
 | |
| # The map is used in `split_command` method. (Which does ignore and classify
 | |
| # parameters.) Please note, that these are not the only parameters which
 | |
| # might be ignored.
 | |
| #
 | |
| # Keys are the option name, value number of options to skip
 | |
| IGNORED_FLAGS = {
 | |
|     # compiling only flag, ignored because the creator of compilation
 | |
|     # database will explicitly set it.
 | |
|     '-c': 0,
 | |
|     # preprocessor macros, ignored because would cause duplicate entries in
 | |
|     # the output (the only difference would be these flags). this is actual
 | |
|     # finding from users, who suffered longer execution time caused by the
 | |
|     # duplicates.
 | |
|     '-MD': 0,
 | |
|     '-MMD': 0,
 | |
|     '-MG': 0,
 | |
|     '-MP': 0,
 | |
|     '-MF': 1,
 | |
|     '-MT': 1,
 | |
|     '-MQ': 1,
 | |
|     # linker options, ignored because for compilation database will contain
 | |
|     # compilation commands only. so, the compiler would ignore these flags
 | |
|     # anyway. the benefit to get rid of them is to make the output more
 | |
|     # readable.
 | |
|     '-static': 0,
 | |
|     '-shared': 0,
 | |
|     '-s': 0,
 | |
|     '-rdynamic': 0,
 | |
|     '-l': 1,
 | |
|     '-L': 1,
 | |
|     '-u': 1,
 | |
|     '-z': 1,
 | |
|     '-T': 1,
 | |
|     '-Xlinker': 1
 | |
| }
 | |
| 
 | |
| # Known C/C++ compiler executable name patterns
 | |
| COMPILER_PATTERNS = frozenset([
 | |
|     re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
 | |
|     re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
 | |
|     re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
 | |
|     re.compile(r'^llvm-g(cc|\+\+)$'),
 | |
| ])
 | |
| 
 | |
| 
 | |
| def split_command(command):
 | |
|     """ Returns a value when the command is a compilation, None otherwise.
 | |
| 
 | |
|     The value on success is a named tuple with the following attributes:
 | |
| 
 | |
|         files:    list of source files
 | |
|         flags:    list of compile options
 | |
|         compiler: string value of 'c' or 'c++' """
 | |
| 
 | |
|     # the result of this method
 | |
|     result = collections.namedtuple('Compilation',
 | |
|                                     ['compiler', 'flags', 'files'])
 | |
|     result.compiler = compiler_language(command)
 | |
|     result.flags = []
 | |
|     result.files = []
 | |
|     # quit right now, if the program was not a C/C++ compiler
 | |
|     if not result.compiler:
 | |
|         return None
 | |
|     # iterate on the compile options
 | |
|     args = iter(command[1:])
 | |
|     for arg in args:
 | |
|         # quit when compilation pass is not involved
 | |
|         if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
 | |
|             return None
 | |
|         # ignore some flags
 | |
|         elif arg in IGNORED_FLAGS:
 | |
|             count = IGNORED_FLAGS[arg]
 | |
|             for _ in range(count):
 | |
|                 next(args)
 | |
|         elif re.match(r'^-(l|L|Wl,).+', arg):
 | |
|             pass
 | |
|         # some parameters could look like filename, take as compile option
 | |
|         elif arg in {'-D', '-I'}:
 | |
|             result.flags.extend([arg, next(args)])
 | |
|         # parameter which looks source file is taken...
 | |
|         elif re.match(r'^[^-].+', arg) and classify_source(arg):
 | |
|             result.files.append(arg)
 | |
|         # and consider everything else as compile option.
 | |
|         else:
 | |
|             result.flags.append(arg)
 | |
|     # do extra check on number of source files
 | |
|     return result if result.files else None
 | |
| 
 | |
| 
 | |
| def classify_source(filename, c_compiler=True):
 | |
|     """ Return the language from file name extension. """
 | |
| 
 | |
|     mapping = {
 | |
|         '.c': 'c' if c_compiler else 'c++',
 | |
|         '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
 | |
|         '.ii': 'c++-cpp-output',
 | |
|         '.m': 'objective-c',
 | |
|         '.mi': 'objective-c-cpp-output',
 | |
|         '.mm': 'objective-c++',
 | |
|         '.mii': 'objective-c++-cpp-output',
 | |
|         '.C': 'c++',
 | |
|         '.cc': 'c++',
 | |
|         '.CC': 'c++',
 | |
|         '.cp': 'c++',
 | |
|         '.cpp': 'c++',
 | |
|         '.cxx': 'c++',
 | |
|         '.c++': 'c++',
 | |
|         '.C++': 'c++',
 | |
|         '.txx': 'c++'
 | |
|     }
 | |
| 
 | |
|     __, extension = os.path.splitext(os.path.basename(filename))
 | |
|     return mapping.get(extension)
 | |
| 
 | |
| 
 | |
| def compiler_language(command):
 | |
|     """ A predicate to decide the command is a compiler call or not.
 | |
| 
 | |
|     Returns 'c' or 'c++' when it match. None otherwise. """
 | |
| 
 | |
|     cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
 | |
| 
 | |
|     if command:
 | |
|         executable = os.path.basename(command[0])
 | |
|         if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
 | |
|             return 'c++' if cplusplus.match(executable) else 'c'
 | |
|     return None
 |