159 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			159 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C++
		
	
	
	
//===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "SymbolIndexManager.h"
 | 
						|
#include "find-all-symbols/SymbolInfo.h"
 | 
						|
#include "llvm/ADT/DenseMap.h"
 | 
						|
#include "llvm/ADT/SmallVector.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
#include "llvm/Support/Path.h"
 | 
						|
 | 
						|
#define DEBUG_TYPE "clang-include-fixer"
 | 
						|
 | 
						|
namespace clang {
 | 
						|
namespace include_fixer {
 | 
						|
 | 
						|
using find_all_symbols::SymbolInfo;
 | 
						|
using find_all_symbols::SymbolAndSignals;
 | 
						|
 | 
						|
// Calculate a score based on whether we think the given header is closely
 | 
						|
// related to the given source file.
 | 
						|
static double similarityScore(llvm::StringRef FileName,
 | 
						|
                              llvm::StringRef Header) {
 | 
						|
  // Compute the maximum number of common path segments between Header and
 | 
						|
  // a suffix of FileName.
 | 
						|
  // We do not do a full longest common substring computation, as Header
 | 
						|
  // specifies the path we would directly #include, so we assume it is rooted
 | 
						|
  // relatively to a subproject of the repository.
 | 
						|
  int MaxSegments = 1;
 | 
						|
  for (auto FileI = llvm::sys::path::begin(FileName),
 | 
						|
            FileE = llvm::sys::path::end(FileName);
 | 
						|
       FileI != FileE; ++FileI) {
 | 
						|
    int Segments = 0;
 | 
						|
    for (auto HeaderI = llvm::sys::path::begin(Header),
 | 
						|
              HeaderE = llvm::sys::path::end(Header), I = FileI;
 | 
						|
         HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
 | 
						|
      ++Segments;
 | 
						|
    }
 | 
						|
    MaxSegments = std::max(Segments, MaxSegments);
 | 
						|
  }
 | 
						|
  return MaxSegments;
 | 
						|
}
 | 
						|
 | 
						|
static void rank(std::vector<SymbolAndSignals> &Symbols,
 | 
						|
                 llvm::StringRef FileName) {
 | 
						|
  llvm::DenseMap<llvm::StringRef, double> Score;
 | 
						|
  for (const auto &Symbol : Symbols) {
 | 
						|
    // Calculate a score from the similarity of the header the symbol is in
 | 
						|
    // with the current file and the popularity of the symbol.
 | 
						|
    double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
 | 
						|
                      (1.0 + std::log2(1 + Symbol.Signals.Seen));
 | 
						|
    double &S = Score[Symbol.Symbol.getFilePath()];
 | 
						|
    S = std::max(S, NewScore);
 | 
						|
  }
 | 
						|
  // Sort by the gathered scores. Use file name as a tie breaker so we can
 | 
						|
  // deduplicate.
 | 
						|
  std::sort(Symbols.begin(), Symbols.end(),
 | 
						|
            [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
 | 
						|
              auto AS = Score[A.Symbol.getFilePath()];
 | 
						|
              auto BS = Score[B.Symbol.getFilePath()];
 | 
						|
              if (AS != BS)
 | 
						|
                return AS > BS;
 | 
						|
              return A.Symbol.getFilePath() < B.Symbol.getFilePath();
 | 
						|
            });
 | 
						|
}
 | 
						|
 | 
						|
std::vector<find_all_symbols::SymbolInfo>
 | 
						|
SymbolIndexManager::search(llvm::StringRef Identifier,
 | 
						|
                           bool IsNestedSearch,
 | 
						|
                           llvm::StringRef FileName) const {
 | 
						|
  // The identifier may be fully qualified, so split it and get all the context
 | 
						|
  // names.
 | 
						|
  llvm::SmallVector<llvm::StringRef, 8> Names;
 | 
						|
  Identifier.split(Names, "::");
 | 
						|
 | 
						|
  bool IsFullyQualified = false;
 | 
						|
  if (Identifier.startswith("::")) {
 | 
						|
    Names.erase(Names.begin()); // Drop first (empty) element.
 | 
						|
    IsFullyQualified = true;
 | 
						|
  }
 | 
						|
 | 
						|
  // As long as we don't find a result keep stripping name parts from the end.
 | 
						|
  // This is to support nested classes which aren't recorded in the database.
 | 
						|
  // Eventually we will either hit a class (namespaces aren't in the database
 | 
						|
  // either) and can report that result.
 | 
						|
  bool TookPrefix = false;
 | 
						|
  std::vector<SymbolAndSignals> MatchedSymbols;
 | 
						|
  do {
 | 
						|
    std::vector<SymbolAndSignals> Symbols;
 | 
						|
    for (const auto &DB : SymbolIndices) {
 | 
						|
      auto Res = DB.get()->search(Names.back());
 | 
						|
      Symbols.insert(Symbols.end(), Res.begin(), Res.end());
 | 
						|
    }
 | 
						|
 | 
						|
    LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
 | 
						|
                            << Symbols.size() << " results...\n");
 | 
						|
 | 
						|
    for (auto &SymAndSig : Symbols) {
 | 
						|
      const SymbolInfo &Symbol = SymAndSig.Symbol;
 | 
						|
      // Match the identifier name without qualifier.
 | 
						|
      bool IsMatched = true;
 | 
						|
      auto SymbolContext = Symbol.getContexts().begin();
 | 
						|
      auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
 | 
						|
      // Match the remaining context names.
 | 
						|
      while (IdentiferContext != Names.rend() &&
 | 
						|
             SymbolContext != Symbol.getContexts().end()) {
 | 
						|
        if (SymbolContext->second == *IdentiferContext) {
 | 
						|
          ++IdentiferContext;
 | 
						|
          ++SymbolContext;
 | 
						|
        } else if (SymbolContext->first ==
 | 
						|
                   find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
 | 
						|
          // Skip non-scoped enum context.
 | 
						|
          ++SymbolContext;
 | 
						|
        } else {
 | 
						|
          IsMatched = false;
 | 
						|
          break;
 | 
						|
        }
 | 
						|
      }
 | 
						|
 | 
						|
      // If the name was qualified we only want to add results if we evaluated
 | 
						|
      // all contexts.
 | 
						|
      if (IsFullyQualified)
 | 
						|
        IsMatched &= (SymbolContext == Symbol.getContexts().end());
 | 
						|
 | 
						|
      // FIXME: Support full match. At this point, we only find symbols in
 | 
						|
      // database which end with the same contexts with the identifier.
 | 
						|
      if (IsMatched && IdentiferContext == Names.rend()) {
 | 
						|
        // If we're in a situation where we took a prefix but the thing we
 | 
						|
        // found couldn't possibly have a nested member ignore it.
 | 
						|
        if (TookPrefix &&
 | 
						|
            (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
 | 
						|
             Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
 | 
						|
             Symbol.getSymbolKind() ==
 | 
						|
                 SymbolInfo::SymbolKind::EnumConstantDecl ||
 | 
						|
             Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
 | 
						|
          continue;
 | 
						|
 | 
						|
        MatchedSymbols.push_back(std::move(SymAndSig));
 | 
						|
      }
 | 
						|
    }
 | 
						|
    Names.pop_back();
 | 
						|
    TookPrefix = true;
 | 
						|
  } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
 | 
						|
 | 
						|
  rank(MatchedSymbols, FileName);
 | 
						|
  // Strip signals, they are no longer needed.
 | 
						|
  std::vector<SymbolInfo> Res;
 | 
						|
  for (auto &SymAndSig : MatchedSymbols)
 | 
						|
    Res.push_back(std::move(SymAndSig.Symbol));
 | 
						|
  return Res;
 | 
						|
}
 | 
						|
 | 
						|
} // namespace include_fixer
 | 
						|
} // namespace clang
 |