llvm-project/clang-tools-extra/clang-include-fixer/SymbolIndexManager.cpp

//===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "SymbolIndexManager.h"
#include "find-all-symbols/SymbolInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Path.h"

#define DEBUG_TYPE "clang-include-fixer"

namespace clang {
namespace include_fixer {

using find_all_symbols::SymbolInfo;
using find_all_symbols::SymbolAndSignals;

// Calculate a score based on whether we think the given header is closely
// related to the given source file.
static double similarityScore(llvm::StringRef FileName,
                              llvm::StringRef Header) {
  // Compute the maximum number of common path segments between Header and
  // a suffix of FileName.
  // We do not do a full longest common substring computation, as Header
  // specifies the path we would directly #include, so we assume it is rooted
  // relatively to a subproject of the repository.
  int MaxSegments = 1;
  for (auto FileI = llvm::sys::path::begin(FileName),
            FileE = llvm::sys::path::end(FileName);
       FileI != FileE; ++FileI) {
    int Segments = 0;
    for (auto HeaderI = llvm::sys::path::begin(Header),
              HeaderE = llvm::sys::path::end(Header), I = FileI;
         HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
      ++Segments;
    }
    MaxSegments = std::max(Segments, MaxSegments);
  }
  return MaxSegments;
}

static void rank(std::vector<SymbolAndSignals> &Symbols,
                 llvm::StringRef FileName) {
  llvm::DenseMap<llvm::StringRef, double> Score;
  for (const auto &Symbol : Symbols) {
    // Calculate a score from the similarity of the header the symbol is in
    // with the current file and the popularity of the symbol.
    double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
                      (1.0 + std::log2(1 + Symbol.Signals.Seen));
    double &S = Score[Symbol.Symbol.getFilePath()];
    S = std::max(S, NewScore);
  }
  // Sort by the gathered scores. Use file name as a tie breaker so we can
  // deduplicate.
  std::sort(Symbols.begin(), Symbols.end(),
            [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
              auto AS = Score[A.Symbol.getFilePath()];
              auto BS = Score[B.Symbol.getFilePath()];
              if (AS != BS)
                return AS > BS;
              return A.Symbol.getFilePath() < B.Symbol.getFilePath();
            });
}

std::vector<find_all_symbols::SymbolInfo>
SymbolIndexManager::search(llvm::StringRef Identifier,
                           bool IsNestedSearch,
                           llvm::StringRef FileName) const {
  // The identifier may be fully qualified, so split it and get all the context
  // names.
  llvm::SmallVector<llvm::StringRef, 8> Names;
  Identifier.split(Names, "::");

  bool IsFullyQualified = false;
  if (Identifier.startswith("::")) {
    Names.erase(Names.begin()); // Drop first (empty) element.
    IsFullyQualified = true;
  }

  // As long as we don't find a result keep stripping name parts from the end.
  // This is to support nested classes which aren't recorded in the database.
  // Eventually we will either hit a class (namespaces aren't in the database
  // either) and can report that result.
  bool TookPrefix = false;
  std::vector<SymbolAndSignals> MatchedSymbols;
  do {
    std::vector<SymbolAndSignals> Symbols;
    for (const auto &DB : SymbolIndices) {
      auto Res = DB.get()->search(Names.back());
      Symbols.insert(Symbols.end(), Res.begin(), Res.end());
    }

    LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
                            << Symbols.size() << " results...\n");

    for (auto &SymAndSig : Symbols) {
      const SymbolInfo &Symbol = SymAndSig.Symbol;
      // Match the identifier name without qualifier.
      bool IsMatched = true;
      auto SymbolContext = Symbol.getContexts().begin();
      auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
      // Match the remaining context names.
      while (IdentiferContext != Names.rend() &&
             SymbolContext != Symbol.getContexts().end()) {
        if (SymbolContext->second == *IdentiferContext) {
          ++IdentiferContext;
          ++SymbolContext;
        } else if (SymbolContext->first ==
                   find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
          // Skip non-scoped enum context.
          ++SymbolContext;
        } else {
          IsMatched = false;
          break;
        }
      }

      // If the name was qualified we only want to add results if we evaluated
      // all contexts.
      if (IsFullyQualified)
        IsMatched &= (SymbolContext == Symbol.getContexts().end());

      // FIXME: Support full match. At this point, we only find symbols in
      // database which end with the same contexts with the identifier.
      if (IsMatched && IdentiferContext == Names.rend()) {
        // If we're in a situation where we took a prefix but the thing we
        // found couldn't possibly have a nested member ignore it.
        if (TookPrefix &&
            (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
             Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
             Symbol.getSymbolKind() ==
                 SymbolInfo::SymbolKind::EnumConstantDecl ||
             Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
          continue;

        MatchedSymbols.push_back(std::move(SymAndSig));
      }
    }
    Names.pop_back();
    TookPrefix = true;
  } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);

  rank(MatchedSymbols, FileName);
  // Strip signals, they are no longer needed.
  std::vector<SymbolInfo> Res;
  for (auto &SymAndSig : MatchedSymbols)
    Res.push_back(std::move(SymAndSig.Symbol));
  return Res;
}

} // namespace include_fixer
} // namespace clang