169 lines
5.7 KiB
Python
Executable File
169 lines
5.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# ===----------------------------------------------------------------------===##
|
|
#
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
# ===----------------------------------------------------------------------===##
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import List # Needed for python 3.8 compatibility.
|
|
import argparse
|
|
import pathlib
|
|
import re
|
|
import sys
|
|
|
|
|
|
@dataclass
|
|
class header:
|
|
name: str = None
|
|
level: int = -1
|
|
|
|
|
|
def parse_line(line: str) -> header:
|
|
"""
|
|
Parse an output line from --trace-include into a `header`.
|
|
"""
|
|
match = re.match(r"(\.+) (.+)", line)
|
|
if not match:
|
|
sys.exit(f"Line {line} contains invalid data.")
|
|
|
|
# The number of periods in front of the header name is the nesting level of
|
|
# that header.
|
|
return header(match.group(2), len(match.group(1)))
|
|
|
|
|
|
# On Windows, the path separators can either be forward slash or backslash.
|
|
# If it is a backslash, Clang prints it escaped as two consecutive
|
|
# backslashes, and they need to be escaped in the RE. (Use a raw string for
|
|
# the pattern to avoid needing another level of escaping on the Python string
|
|
# literal level.)
|
|
LIBCXX_HEADER_REGEX = r".*c\+\+(?:/|\\\\)v[0-9]+(?:/|\\\\)(.+)"
|
|
|
|
def is_libcxx_public_header(header : str) -> bool:
|
|
"""
|
|
Returns whether a header is a C++ public header file.
|
|
"""
|
|
# Only keep files in the c++/vN directory.
|
|
match = re.match(LIBCXX_HEADER_REGEX, header)
|
|
if not match:
|
|
return False
|
|
|
|
# Skip C compatibility headers.
|
|
if header.endswith(".h"):
|
|
return False
|
|
|
|
# Skip all other detail headers (headers starting with __ or in a subdirectory starting with __).
|
|
relative = match.group(1)
|
|
if relative.startswith("__") or re.search(r"(/|\\\\)__", relative):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def is_libcxx_header(header : str) -> bool:
|
|
"""
|
|
Returns whether a header is a libc++ header, excluding the C-compatibility headers.
|
|
"""
|
|
# Only keep files in the c++/vN directory.
|
|
match = re.match(LIBCXX_HEADER_REGEX, header)
|
|
if not match:
|
|
return False
|
|
|
|
# Skip C compatibility headers (in particular, make sure not to skip libc++ detail headers).
|
|
relative = match.group(1)
|
|
if relative.endswith(".h") and not (relative.startswith("__") or re.search(r"(/|\\\\)__", relative)):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def parse_file(file: pathlib.Path) -> List[str]:
|
|
"""
|
|
Parse a file containing --trace-include output to generate a list of the top-level C++ includes
|
|
contained in it.
|
|
|
|
This effectively generates the dependency graph of C++ Standard Library headers of the header
|
|
whose --trace-include it is. In order to get the expected result of --trace-include, the
|
|
-fshow-skipped-includes flag also needs to be passed.
|
|
"""
|
|
result = list()
|
|
with file.open(encoding="utf-8") as f:
|
|
for line in f.readlines():
|
|
header = parse_line(line)
|
|
|
|
# Skip non-libc++ headers
|
|
if not is_libcxx_header(header.name):
|
|
continue
|
|
|
|
# Include top-level headers in the output. There's usually exactly one,
|
|
# except if the compiler is passed a file with `-include`. Top-level
|
|
# headers are transparent, in the sense that we want to go look at
|
|
# transitive includes underneath.
|
|
if header.level == 1:
|
|
level = 999
|
|
result.append(header)
|
|
continue
|
|
|
|
# Skip libc++ headers included transitively.
|
|
if header.level > level:
|
|
continue
|
|
|
|
# Detail headers are transparent too: we attribute all includes of public libc++
|
|
# headers under a detail header to the last public libc++ header that included it.
|
|
if header.name.startswith("__") or re.search(r"(/|\\\\)__", header.name):
|
|
level = 999
|
|
continue
|
|
|
|
# Add the non-detail libc++ header to the list.
|
|
level = header.level
|
|
result.append(header)
|
|
return result
|
|
|
|
|
|
def create_include_graph(path: pathlib.Path) -> List[str]:
|
|
result = list()
|
|
for file in sorted(path.glob("header.*")):
|
|
headers = parse_file(file)
|
|
|
|
# Get actual filenames relative to libc++'s installation directory instead of full paths
|
|
relative = lambda h: re.match(LIBCXX_HEADER_REGEX, h).group(1)
|
|
|
|
top_level = relative(next(h.name for h in headers if h.level == 1)) # There should be only one top-level header
|
|
includes = [relative(h.name) for h in headers if h.level != 1]
|
|
|
|
# Remove duplicates in all includes.
|
|
includes = list(set(includes))
|
|
|
|
if len(includes) != 0:
|
|
result.append([top_level] + includes)
|
|
return result
|
|
|
|
|
|
def print_csv(graph: List[str]) -> None:
|
|
for includes in graph:
|
|
header = includes[0]
|
|
for include in sorted(includes[1:]):
|
|
if header == include:
|
|
sys.exit(f"Cycle detected: header {header} includes itself.")
|
|
print(f"{header} {include}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="""Produce a dependency graph of libc++ headers, in CSV format.
|
|
Typically this script is executed by libcxx/test/libcxx/transitive_includes.sh.cpp""",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
parser.add_argument(
|
|
"input",
|
|
default=None,
|
|
metavar="DIR",
|
|
help="The directory containing the transitive includes of the headers.",
|
|
)
|
|
options = parser.parse_args()
|
|
|
|
root = pathlib.Path(options.input)
|
|
print_csv(create_include_graph(root))
|