forked from OSchip/llvm-project
Improve include fixer's ranking by taking the paths into account.
Instead of just using popularity, we also take into account how similar the path of the current file is to the path of the header. Our first approach is to get popularity into a reasonably small scale by taking log2 (which is roughly intuitive to how humans would bucket popularity), and multiply that with the number of matching prefix path fragments of the included header with the current file. Note that currently we do not take special care for unclean paths containing "../" or "./". Differential Revision: https://reviews.llvm.org/D28548 llvm-svn: 291664
This commit is contained in:
parent
c22c889f77
commit
a47515ec4a
|
|
@ -365,6 +365,9 @@ IncludeFixerSemaSource::query(StringRef Query, StringRef ScopedQualifiers,
|
||||||
.getLocWithOffset(Range.getOffset())
|
.getLocWithOffset(Range.getOffset())
|
||||||
.print(llvm::dbgs(), CI->getSourceManager()));
|
.print(llvm::dbgs(), CI->getSourceManager()));
|
||||||
DEBUG(llvm::dbgs() << " ...");
|
DEBUG(llvm::dbgs() << " ...");
|
||||||
|
llvm::StringRef FileName = CI->getSourceManager().getFilename(
|
||||||
|
CI->getSourceManager().getLocForStartOfFile(
|
||||||
|
CI->getSourceManager().getMainFileID()));
|
||||||
|
|
||||||
QuerySymbolInfos.push_back({Query.str(), ScopedQualifiers, Range});
|
QuerySymbolInfos.push_back({Query.str(), ScopedQualifiers, Range});
|
||||||
|
|
||||||
|
|
@ -385,9 +388,10 @@ IncludeFixerSemaSource::query(StringRef Query, StringRef ScopedQualifiers,
|
||||||
// context, it might treat the identifier as a nested class of the scoped
|
// context, it might treat the identifier as a nested class of the scoped
|
||||||
// namespace.
|
// namespace.
|
||||||
std::vector<find_all_symbols::SymbolInfo> MatchedSymbols =
|
std::vector<find_all_symbols::SymbolInfo> MatchedSymbols =
|
||||||
SymbolIndexMgr.search(QueryString, /*IsNestedSearch=*/false);
|
SymbolIndexMgr.search(QueryString, /*IsNestedSearch=*/false, FileName);
|
||||||
if (MatchedSymbols.empty())
|
if (MatchedSymbols.empty())
|
||||||
MatchedSymbols = SymbolIndexMgr.search(Query);
|
MatchedSymbols =
|
||||||
|
SymbolIndexMgr.search(Query, /*IsNestedSearch=*/true, FileName);
|
||||||
DEBUG(llvm::dbgs() << "Having found " << MatchedSymbols.size()
|
DEBUG(llvm::dbgs() << "Having found " << MatchedSymbols.size()
|
||||||
<< " symbols\n");
|
<< " symbols\n");
|
||||||
// We store a copy of MatchedSymbols in a place where it's globally reachable.
|
// We store a copy of MatchedSymbols in a place where it's globally reachable.
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
#include "llvm/Support/Path.h"
|
||||||
|
|
||||||
#define DEBUG_TYPE "include-fixer"
|
#define DEBUG_TYPE "include-fixer"
|
||||||
|
|
||||||
|
|
@ -20,30 +21,57 @@ namespace include_fixer {
|
||||||
|
|
||||||
using clang::find_all_symbols::SymbolInfo;
|
using clang::find_all_symbols::SymbolInfo;
|
||||||
|
|
||||||
/// Sorts SymbolInfos based on the popularity info in SymbolInfo.
|
// Calculate a score based on whether we think the given header is closely
|
||||||
static void rankByPopularity(std::vector<SymbolInfo> &Symbols) {
|
// related to the given source file.
|
||||||
// First collect occurrences per header file.
|
static double similarityScore(llvm::StringRef FileName,
|
||||||
llvm::DenseMap<llvm::StringRef, unsigned> HeaderPopularity;
|
llvm::StringRef Header) {
|
||||||
for (const SymbolInfo &Symbol : Symbols) {
|
// Compute the maximum number of common path segements between Header and
|
||||||
unsigned &Popularity = HeaderPopularity[Symbol.getFilePath()];
|
// a suffix of FileName.
|
||||||
Popularity = std::max(Popularity, Symbol.getNumOccurrences());
|
// We do not do a full longest common substring computation, as Header
|
||||||
|
// specifies the path we would directly #include, so we assume it is rooted
|
||||||
|
// relatively to a subproject of the repository.
|
||||||
|
int MaxSegments = 1;
|
||||||
|
for (auto FileI = llvm::sys::path::begin(FileName),
|
||||||
|
FileE = llvm::sys::path::end(FileName);
|
||||||
|
FileI != FileE; ++FileI) {
|
||||||
|
int Segments = 0;
|
||||||
|
for (auto HeaderI = llvm::sys::path::begin(Header),
|
||||||
|
HeaderE = llvm::sys::path::end(Header), I = FileI;
|
||||||
|
HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
|
||||||
|
++Segments;
|
||||||
|
}
|
||||||
|
MaxSegments = std::max(Segments, MaxSegments);
|
||||||
}
|
}
|
||||||
|
return MaxSegments;
|
||||||
|
}
|
||||||
|
|
||||||
// Sort by the gathered popularities. Use file name as a tie breaker so we can
|
static void rank(std::vector<SymbolInfo> &Symbols,
|
||||||
|
llvm::StringRef FileName) {
|
||||||
|
llvm::DenseMap<llvm::StringRef, double> Score;
|
||||||
|
for (const SymbolInfo &Symbol : Symbols) {
|
||||||
|
// Calculate a score from the similarity of the header the symbol is in
|
||||||
|
// with the current file and the popularity of the symbol.
|
||||||
|
double NewScore = similarityScore(FileName, Symbol.getFilePath()) *
|
||||||
|
(1.0 + std::log2(1 + Symbol.getNumOccurrences()));
|
||||||
|
double &S = Score[Symbol.getFilePath()];
|
||||||
|
S = std::max(S, NewScore);
|
||||||
|
}
|
||||||
|
// Sort by the gathered scores. Use file name as a tie breaker so we can
|
||||||
// deduplicate.
|
// deduplicate.
|
||||||
std::sort(Symbols.begin(), Symbols.end(),
|
std::sort(Symbols.begin(), Symbols.end(),
|
||||||
[&](const SymbolInfo &A, const SymbolInfo &B) {
|
[&](const SymbolInfo &A, const SymbolInfo &B) {
|
||||||
auto APop = HeaderPopularity[A.getFilePath()];
|
auto AS = Score[A.getFilePath()];
|
||||||
auto BPop = HeaderPopularity[B.getFilePath()];
|
auto BS = Score[B.getFilePath()];
|
||||||
if (APop != BPop)
|
if (AS != BS)
|
||||||
return APop > BPop;
|
return AS > BS;
|
||||||
return A.getFilePath() < B.getFilePath();
|
return A.getFilePath() < B.getFilePath();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<find_all_symbols::SymbolInfo>
|
std::vector<find_all_symbols::SymbolInfo>
|
||||||
SymbolIndexManager::search(llvm::StringRef Identifier,
|
SymbolIndexManager::search(llvm::StringRef Identifier,
|
||||||
bool IsNestedSearch) const {
|
bool IsNestedSearch,
|
||||||
|
llvm::StringRef FileName) const {
|
||||||
// The identifier may be fully qualified, so split it and get all the context
|
// The identifier may be fully qualified, so split it and get all the context
|
||||||
// names.
|
// names.
|
||||||
llvm::SmallVector<llvm::StringRef, 8> Names;
|
llvm::SmallVector<llvm::StringRef, 8> Names;
|
||||||
|
|
@ -119,7 +147,7 @@ SymbolIndexManager::search(llvm::StringRef Identifier,
|
||||||
TookPrefix = true;
|
TookPrefix = true;
|
||||||
} while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
|
} while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
|
||||||
|
|
||||||
rankByPopularity(MatchedSymbols);
|
rank(MatchedSymbols, FileName);
|
||||||
return MatchedSymbols;
|
return MatchedSymbols;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,8 @@ public:
|
||||||
///
|
///
|
||||||
/// \returns A list of symbol candidates.
|
/// \returns A list of symbol candidates.
|
||||||
std::vector<find_all_symbols::SymbolInfo>
|
std::vector<find_all_symbols::SymbolInfo>
|
||||||
search(llvm::StringRef Identifier, bool IsNestedSearch = true) const;
|
search(llvm::StringRef Identifier, bool IsNestedSearch = true,
|
||||||
|
llvm::StringRef FileName = "") const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<std::shared_future<std::unique_ptr<SymbolIndex>>> SymbolIndices;
|
std::vector<std::shared_future<std::unique_ptr<SymbolIndex>>> SymbolIndices;
|
||||||
|
|
|
||||||
|
|
@ -332,7 +332,8 @@ int includeFixerMain(int argc, const char **argv) {
|
||||||
|
|
||||||
// Query symbol mode.
|
// Query symbol mode.
|
||||||
if (!QuerySymbol.empty()) {
|
if (!QuerySymbol.empty()) {
|
||||||
auto MatchedSymbols = SymbolIndexMgr->search(QuerySymbol);
|
auto MatchedSymbols = SymbolIndexMgr->search(
|
||||||
|
QuerySymbol, /*IsNestedSearch=*/true, SourceFilePath);
|
||||||
for (auto &Symbol : MatchedSymbols) {
|
for (auto &Symbol : MatchedSymbols) {
|
||||||
std::string HeaderPath = Symbol.getFilePath().str();
|
std::string HeaderPath = Symbol.getFilePath().str();
|
||||||
Symbol.SetFilePath(((HeaderPath[0] == '"' || HeaderPath[0] == '<')
|
Symbol.SetFilePath(((HeaderPath[0] == '"' || HeaderPath[0] == '<')
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ FilePath: foo.h
|
||||||
LineNumber: 1
|
LineNumber: 1
|
||||||
Type: Class
|
Type: Class
|
||||||
NumOccurrences: 1
|
NumOccurrences: 1
|
||||||
...
|
|
||||||
---
|
---
|
||||||
Name: bar
|
Name: bar
|
||||||
Contexts:
|
Contexts:
|
||||||
|
|
@ -21,7 +20,7 @@ FilePath: ../include/bar.h
|
||||||
LineNumber: 1
|
LineNumber: 1
|
||||||
Type: Class
|
Type: Class
|
||||||
NumOccurrences: 1
|
NumOccurrences: 1
|
||||||
...
|
---
|
||||||
Name: bar
|
Name: bar
|
||||||
Contexts:
|
Contexts:
|
||||||
- ContextType: Namespace
|
- ContextType: Namespace
|
||||||
|
|
@ -32,7 +31,7 @@ FilePath: ../include/bar.h
|
||||||
LineNumber: 2
|
LineNumber: 2
|
||||||
Type: Class
|
Type: Class
|
||||||
NumOccurrences: 3
|
NumOccurrences: 3
|
||||||
...
|
---
|
||||||
Name: bar
|
Name: bar
|
||||||
Contexts:
|
Contexts:
|
||||||
- ContextType: Namespace
|
- ContextType: Namespace
|
||||||
|
|
@ -50,4 +49,12 @@ FilePath: var.h
|
||||||
LineNumber: 1
|
LineNumber: 1
|
||||||
Type: Variable
|
Type: Variable
|
||||||
NumOccurrences: 1
|
NumOccurrences: 1
|
||||||
...
|
---
|
||||||
|
Name: bar
|
||||||
|
Contexts:
|
||||||
|
- ContextType: Namespace
|
||||||
|
ContextName: c
|
||||||
|
FilePath: test/include-fixer/baz.h
|
||||||
|
LineNumber: 1
|
||||||
|
Type: Class
|
||||||
|
NumOccurrences: 1
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
// RUN: clang-include-fixer -db=yaml -input=%S/Inputs/fake_yaml_db.yaml -output-headers %s -- | FileCheck %s
|
// RUN: clang-include-fixer -db=yaml -input=%S/Inputs/fake_yaml_db.yaml -output-headers %s -- | FileCheck %s
|
||||||
|
// RUN: clang-include-fixer -query-symbol bar -db=yaml -input=%S/Inputs/fake_yaml_db.yaml -output-headers %s -- | FileCheck %s
|
||||||
|
|
||||||
// CHECK: "HeaderInfos": [
|
// CHECK: "HeaderInfos": [
|
||||||
|
// CHECK-NEXT: {"Header": "\"test/include-fixer/baz.h\"",
|
||||||
|
// CHECK-NEXT: "QualifiedName": "c::bar"},
|
||||||
// CHECK-NEXT: {"Header": "\"../include/bar.h\"",
|
// CHECK-NEXT: {"Header": "\"../include/bar.h\"",
|
||||||
// CHECK-NEXT: "QualifiedName": "b::a::bar"},
|
// CHECK-NEXT: "QualifiedName": "b::a::bar"},
|
||||||
// CHECK-NEXT: {"Header": "\"../include/zbar.h\"",
|
// CHECK-NEXT: {"Header": "\"../include/zbar.h\"",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue