[clangd] IncludeCleaner: Attribute symbols from non self-contained headers to their parents

When a symbol comes from the non self-contained header, we recursively uplift
the file we consider used to the first includer that has a header guard. We
need to do this while we still have FileIDs because every time a non
self-contained header is included, it gets a new FileID but is later
deduplicated by HeaderID and it's not possible to understand where it was
included from.

Based on D114370.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D114623
This commit is contained in:
Kirill Bobyrev 2021-11-26 16:20:40 +01:00
parent fc0aacf324
commit 34cc210aa8
No known key found for this signature in database
GPG Key ID: 2307C055C8384FA0
3 changed files with 117 additions and 11 deletions

View File

@ -8,6 +8,7 @@
#include "IncludeCleaner.h"
#include "Config.h"
#include "Headers.h"
#include "ParsedAST.h"
#include "Protocol.h"
#include "SourceCode.h"
@ -16,6 +17,7 @@
#include "clang/AST/ExprCXX.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Syntax/Tokens.h"
@ -221,6 +223,31 @@ bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
return true;
}
// In case symbols are coming from non self-contained header, we need to find
// its first includer that is self-contained. This is the header users can
// include, so it will be responsible for bringing the symbols from given
// header into the scope.
FileID headerResponsible(FileID ID, const SourceManager &SM,
const IncludeStructure &Includes) {
// Unroll the chain of non self-contained headers until we find the one that
// can be included.
for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
FE = SM.getFileEntryForID(ID)) {
// If FE is nullptr, we consider it to be the responsible header.
if (!FE)
break;
auto HID = Includes.getID(FE);
assert(HID && "We're iterating over headers already existing in "
"IncludeStructure");
if (Includes.isSelfContained(*HID))
break;
// The header is not self-contained: put the responsibility for its symbols
// on its includer.
ID = SM.getFileID(SM.getIncludeLoc(ID));
}
return ID;
}
} // namespace
ReferencedLocations findReferencedLocations(ParsedAST &AST) {
@ -234,20 +261,27 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST) {
llvm::DenseSet<FileID>
findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
const SourceManager &SM) {
const IncludeStructure &Includes, const SourceManager &SM) {
std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
llvm::sort(Sorted); // Group by FileID.
ReferencedFiles Result(SM);
ReferencedFiles Files(SM);
for (auto It = Sorted.begin(); It < Sorted.end();) {
FileID FID = SM.getFileID(*It);
Result.add(FID, *It);
Files.add(FID, *It);
// Cheaply skip over all the other locations from the same FileID.
// This avoids lots of redundant Loc->File lookups for the same file.
do
++It;
while (It != Sorted.end() && SM.isInFileID(*It, FID));
}
return std::move(Result.Files);
// If a header is not self-contained, we consider its symbols a logical part
// of the including file. Therefore, mark the parents of all used
// non-self-contained FileIDs as used. Perform this on FileIDs rather than
// HeaderIDs, as each inclusion of a non-self-contained file is distinct.
llvm::DenseSet<FileID> Result;
for (FileID ID : Files.Files)
Result.insert(headerResponsible(ID, SM, Includes));
return Result;
}
std::vector<const Inclusion *>
@ -304,12 +338,8 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
const auto &SM = AST.getSourceManager();
auto Refs = findReferencedLocations(AST);
// FIXME(kirillbobyrev): Attribute the symbols from non self-contained
// headers to their parents while the information about respective
// SourceLocations and FileIDs is not lost. It is not possible to do that
// later when non-guarded headers included multiple times will get same
// HeaderID.
auto ReferencedFileIDs = findReferencedFiles(Refs, SM);
auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
AST.getSourceManager());
auto ReferencedHeaders =
translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
return getUnused(AST, ReferencedHeaders);

View File

@ -52,6 +52,7 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST);
/// The output only includes things SourceManager sees as files (not macro IDs).
/// This can include <built-in>, <scratch space> etc that are not true files.
llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
const IncludeStructure &Includes,
const SourceManager &SM);
/// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).

View File

@ -281,7 +281,8 @@ TEST(IncludeCleaner, VirtualBuffers) {
auto &SM = AST.getSourceManager();
auto &Includes = AST.getIncludeStructure();
auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM);
auto ReferencedFiles =
findReferencedFiles(findReferencedLocations(AST), Includes, SM);
llvm::StringSet<> ReferencedFileNames;
for (FileID FID : ReferencedFiles)
ReferencedFileNames.insert(
@ -303,6 +304,80 @@ TEST(IncludeCleaner, VirtualBuffers) {
EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty());
}
TEST(IncludeCleaner, DistinctUnguardedInclusions) {
TestTU TU;
TU.Code = R"cpp(
#include "bar.h"
#include "foo.h"
int LocalFoo = foo::Variable;
)cpp";
TU.AdditionalFiles["foo.h"] = R"cpp(
#pragma once
namespace foo {
#include "unguarded.h"
}
)cpp";
TU.AdditionalFiles["bar.h"] = R"cpp(
#pragma once
namespace bar {
#include "unguarded.h"
}
)cpp";
TU.AdditionalFiles["unguarded.h"] = R"cpp(
constexpr int Variable = 42;
)cpp";
ParsedAST AST = TU.build();
auto ReferencedFiles =
findReferencedFiles(findReferencedLocations(AST),
AST.getIncludeStructure(), AST.getSourceManager());
llvm::StringSet<> ReferencedFileNames;
auto &SM = AST.getSourceManager();
for (FileID FID : ReferencedFiles)
ReferencedFileNames.insert(
SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
// Note that we have uplifted the referenced files from non self-contained
// headers to header-guarded ones.
EXPECT_THAT(ReferencedFileNames.keys(),
UnorderedElementsAre(testPath("foo.h")));
}
TEST(IncludeCleaner, NonSelfContainedHeaders) {
TestTU TU;
TU.Code = R"cpp(
#include "foo.h"
int LocalFoo = Variable;
)cpp";
TU.AdditionalFiles["foo.h"] = R"cpp(
#pragma once
#include "indirection.h"
)cpp";
TU.AdditionalFiles["indirection.h"] = R"cpp(
#include "unguarded.h"
)cpp";
TU.AdditionalFiles["unguarded.h"] = R"cpp(
constexpr int Variable = 42;
)cpp";
ParsedAST AST = TU.build();
auto ReferencedFiles =
findReferencedFiles(findReferencedLocations(AST),
AST.getIncludeStructure(), AST.getSourceManager());
llvm::StringSet<> ReferencedFileNames;
auto &SM = AST.getSourceManager();
for (FileID FID : ReferencedFiles)
ReferencedFileNames.insert(
SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
// Note that we have uplifted the referenced files from non self-contained
// headers to header-guarded ones.
EXPECT_THAT(ReferencedFileNames.keys(),
UnorderedElementsAre(testPath("foo.h")));
}
} // namespace
} // namespace clangd
} // namespace clang