143 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			143 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
#include "FuzzySymbolIndex.h"
 | 
						|
#include "llvm/Support/Regex.h"
 | 
						|
 | 
						|
using clang::find_all_symbols::SymbolAndSignals;
 | 
						|
using llvm::StringRef;
 | 
						|
 | 
						|
namespace clang {
 | 
						|
namespace include_fixer {
 | 
						|
namespace {
 | 
						|
 | 
						|
class MemSymbolIndex : public FuzzySymbolIndex {
 | 
						|
public:
 | 
						|
  MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
 | 
						|
    for (auto &Symbol : Symbols) {
 | 
						|
      auto Tokens = tokenize(Symbol.Symbol.getName());
 | 
						|
      this->Symbols.emplace_back(
 | 
						|
          StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
 | 
						|
          std::move(Symbol));
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  std::vector<SymbolAndSignals> search(StringRef Query) override {
 | 
						|
    auto Tokens = tokenize(Query);
 | 
						|
    llvm::Regex Pattern("^" + queryRegexp(Tokens));
 | 
						|
    std::vector<SymbolAndSignals> Results;
 | 
						|
    for (const Entry &E : Symbols)
 | 
						|
      if (Pattern.match(E.first))
 | 
						|
        Results.push_back(E.second);
 | 
						|
    return Results;
 | 
						|
  }
 | 
						|
 | 
						|
private:
 | 
						|
  using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
 | 
						|
  std::vector<Entry> Symbols;
 | 
						|
};
 | 
						|
 | 
						|
// Helpers for tokenize state machine.
 | 
						|
enum TokenizeState {
 | 
						|
  EMPTY,      // No pending characters.
 | 
						|
  ONE_BIG,    // Read one uppercase letter, could be WORD or Word.
 | 
						|
  BIG_WORD,   // Reading an uppercase WORD.
 | 
						|
  SMALL_WORD, // Reading a lowercase word.
 | 
						|
  NUMBER      // Reading a number.
 | 
						|
};
 | 
						|
 | 
						|
enum CharType { UPPER, LOWER, DIGIT, MISC };
 | 
						|
CharType classify(char c) {
 | 
						|
  if (isupper(c))
 | 
						|
    return UPPER;
 | 
						|
  if (islower(c))
 | 
						|
    return LOWER;
 | 
						|
  if (isdigit(c))
 | 
						|
    return DIGIT;
 | 
						|
  return MISC;
 | 
						|
}
 | 
						|
 | 
						|
} // namespace
 | 
						|
 | 
						|
std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
 | 
						|
  std::vector<std::string> Result;
 | 
						|
  // State describes the treatment of text from Start to I.
 | 
						|
  // Once text is Flush()ed into Result, we're done with it and advance Start.
 | 
						|
  TokenizeState State = EMPTY;
 | 
						|
  size_t Start = 0;
 | 
						|
  auto Flush = [&](size_t End) {
 | 
						|
    if (State != EMPTY) {
 | 
						|
      Result.push_back(Text.substr(Start, End - Start).lower());
 | 
						|
      State = EMPTY;
 | 
						|
    }
 | 
						|
    Start = End;
 | 
						|
  };
 | 
						|
  for (size_t I = 0; I < Text.size(); ++I) {
 | 
						|
    CharType Type = classify(Text[I]);
 | 
						|
    if (Type == MISC)
 | 
						|
      Flush(I);
 | 
						|
    else if (Type == LOWER)
 | 
						|
      switch (State) {
 | 
						|
      case BIG_WORD:
 | 
						|
        Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
 | 
						|
        LLVM_FALLTHROUGH;
 | 
						|
      case ONE_BIG:
 | 
						|
        State = SMALL_WORD;
 | 
						|
        LLVM_FALLTHROUGH;
 | 
						|
      case SMALL_WORD:
 | 
						|
        break;
 | 
						|
      default:
 | 
						|
        Flush(I);
 | 
						|
        State = SMALL_WORD;
 | 
						|
      }
 | 
						|
    else if (Type == UPPER)
 | 
						|
      switch (State) {
 | 
						|
      case ONE_BIG:
 | 
						|
        State = BIG_WORD;
 | 
						|
        LLVM_FALLTHROUGH;
 | 
						|
      case BIG_WORD:
 | 
						|
        break;
 | 
						|
      default:
 | 
						|
        Flush(I);
 | 
						|
        State = ONE_BIG;
 | 
						|
      }
 | 
						|
    else if (Type == DIGIT && State != NUMBER) {
 | 
						|
      Flush(I);
 | 
						|
      State = NUMBER;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  Flush(Text.size());
 | 
						|
  return Result;
 | 
						|
}
 | 
						|
 | 
						|
std::string
 | 
						|
FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
 | 
						|
  std::string Result;
 | 
						|
  for (size_t I = 0; I < Tokens.size(); ++I) {
 | 
						|
    if (I)
 | 
						|
      Result.append("[[:alnum:]]* ");
 | 
						|
    for (size_t J = 0; J < Tokens[I].size(); ++J) {
 | 
						|
      if (J)
 | 
						|
        Result.append("([[:alnum:]]* )?");
 | 
						|
      Result.push_back(Tokens[I][J]);
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return Result;
 | 
						|
}
 | 
						|
 | 
						|
llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
 | 
						|
FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
 | 
						|
  auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
 | 
						|
  if (!Buffer)
 | 
						|
    return llvm::errorCodeToError(Buffer.getError());
 | 
						|
  return std::make_unique<MemSymbolIndex>(
 | 
						|
      find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
 | 
						|
}
 | 
						|
 | 
						|
} // namespace include_fixer
 | 
						|
} // namespace clang
 |