forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			143 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			143 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| #include "FuzzySymbolIndex.h"
 | |
| #include "llvm/Support/Regex.h"
 | |
| 
 | |
| using clang::find_all_symbols::SymbolAndSignals;
 | |
| using llvm::StringRef;
 | |
| 
 | |
| namespace clang {
 | |
| namespace include_fixer {
 | |
| namespace {
 | |
| 
 | |
| class MemSymbolIndex : public FuzzySymbolIndex {
 | |
| public:
 | |
|   MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
 | |
|     for (auto &Symbol : Symbols) {
 | |
|       auto Tokens = tokenize(Symbol.Symbol.getName());
 | |
|       this->Symbols.emplace_back(
 | |
|           StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
 | |
|           std::move(Symbol));
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   std::vector<SymbolAndSignals> search(StringRef Query) override {
 | |
|     auto Tokens = tokenize(Query);
 | |
|     llvm::Regex Pattern("^" + queryRegexp(Tokens));
 | |
|     std::vector<SymbolAndSignals> Results;
 | |
|     for (const Entry &E : Symbols)
 | |
|       if (Pattern.match(E.first))
 | |
|         Results.push_back(E.second);
 | |
|     return Results;
 | |
|   }
 | |
| 
 | |
| private:
 | |
|   using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
 | |
|   std::vector<Entry> Symbols;
 | |
| };
 | |
| 
 | |
| // Helpers for tokenize state machine.
 | |
| enum TokenizeState {
 | |
|   EMPTY,      // No pending characters.
 | |
|   ONE_BIG,    // Read one uppercase letter, could be WORD or Word.
 | |
|   BIG_WORD,   // Reading an uppercase WORD.
 | |
|   SMALL_WORD, // Reading a lowercase word.
 | |
|   NUMBER      // Reading a number.
 | |
| };
 | |
| 
 | |
| enum CharType { UPPER, LOWER, DIGIT, MISC };
 | |
| CharType classify(char c) {
 | |
|   if (isupper(c))
 | |
|     return UPPER;
 | |
|   if (islower(c))
 | |
|     return LOWER;
 | |
|   if (isdigit(c))
 | |
|     return DIGIT;
 | |
|   return MISC;
 | |
| }
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
 | |
|   std::vector<std::string> Result;
 | |
|   // State describes the treatment of text from Start to I.
 | |
|   // Once text is Flush()ed into Result, we're done with it and advance Start.
 | |
|   TokenizeState State = EMPTY;
 | |
|   size_t Start = 0;
 | |
|   auto Flush = [&](size_t End) {
 | |
|     if (State != EMPTY) {
 | |
|       Result.push_back(Text.substr(Start, End - Start).lower());
 | |
|       State = EMPTY;
 | |
|     }
 | |
|     Start = End;
 | |
|   };
 | |
|   for (size_t I = 0; I < Text.size(); ++I) {
 | |
|     CharType Type = classify(Text[I]);
 | |
|     if (Type == MISC)
 | |
|       Flush(I);
 | |
|     else if (Type == LOWER)
 | |
|       switch (State) {
 | |
|       case BIG_WORD:
 | |
|         Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
 | |
|         LLVM_FALLTHROUGH;
 | |
|       case ONE_BIG:
 | |
|         State = SMALL_WORD;
 | |
|         LLVM_FALLTHROUGH;
 | |
|       case SMALL_WORD:
 | |
|         break;
 | |
|       default:
 | |
|         Flush(I);
 | |
|         State = SMALL_WORD;
 | |
|       }
 | |
|     else if (Type == UPPER)
 | |
|       switch (State) {
 | |
|       case ONE_BIG:
 | |
|         State = BIG_WORD;
 | |
|         LLVM_FALLTHROUGH;
 | |
|       case BIG_WORD:
 | |
|         break;
 | |
|       default:
 | |
|         Flush(I);
 | |
|         State = ONE_BIG;
 | |
|       }
 | |
|     else if (Type == DIGIT && State != NUMBER) {
 | |
|       Flush(I);
 | |
|       State = NUMBER;
 | |
|     }
 | |
|   }
 | |
|   Flush(Text.size());
 | |
|   return Result;
 | |
| }
 | |
| 
 | |
| std::string
 | |
| FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
 | |
|   std::string Result;
 | |
|   for (size_t I = 0; I < Tokens.size(); ++I) {
 | |
|     if (I)
 | |
|       Result.append("[[:alnum:]]* ");
 | |
|     for (size_t J = 0; J < Tokens[I].size(); ++J) {
 | |
|       if (J)
 | |
|         Result.append("([[:alnum:]]* )?");
 | |
|       Result.push_back(Tokens[I][J]);
 | |
|     }
 | |
|   }
 | |
|   return Result;
 | |
| }
 | |
| 
 | |
| llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
 | |
| FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
 | |
|   auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
 | |
|   if (!Buffer)
 | |
|     return llvm::errorCodeToError(Buffer.getError());
 | |
|   return std::make_unique<MemSymbolIndex>(
 | |
|       find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
 | |
| }
 | |
| 
 | |
| } // namespace include_fixer
 | |
| } // namespace clang
 |