[clangd] Symbol index interfaces and an in-memory index implementation.
Summary: o Index interfaces to support using different index sources (e.g. AST index, global index) for code completion, cross-reference finding etc. This patch focuses on code completion. The following changes in the original patch has been split out. o Implement an AST-based index. o Add an option to replace sema code completion for qualified-id with index-based completion. o Implement an initial naive code completion index which matches symbols that have the query string as substring. Reviewers: malaperle, sammccall Reviewed By: sammccall Subscribers: hokein, klimek, malaperle, mgorny, ilya-biryukov, cfe-commits Differential Revision: https://reviews.llvm.org/D40548 llvm-svn: 320688
This commit is contained in:
parent
83bcc68afa
commit
3732cadc73
|
|
@ -19,6 +19,7 @@ add_clang_library(clangDaemon
|
|||
Protocol.cpp
|
||||
ProtocolHandlers.cpp
|
||||
Trace.cpp
|
||||
index/MemIndex.cpp
|
||||
index/Index.cpp
|
||||
index/SymbolCollector.cpp
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
||||
|
||||
#include "../Context.h"
|
||||
#include "clang/Index/IndexSymbol.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/Hashing.h"
|
||||
|
|
@ -110,6 +111,34 @@ private:
|
|||
llvm::DenseMap<SymbolID, Symbol> Symbols;
|
||||
};
|
||||
|
||||
struct FuzzyFindRequest {
|
||||
/// \brief A query string for the fuzzy find. This is matched against symbols'
|
||||
/// qualfified names.
|
||||
std::string Query;
|
||||
/// \brief The maxinum number of candidates to return.
|
||||
size_t MaxCandidateCount = UINT_MAX;
|
||||
};
|
||||
|
||||
/// \brief Interface for symbol indexes that can be used for searching or
|
||||
/// matching symbols among a set of symbols based on names or unique IDs.
|
||||
class SymbolIndex {
|
||||
public:
|
||||
virtual ~SymbolIndex() = default;
|
||||
|
||||
/// \brief Matches symbols in the index fuzzily and applies \p Callback on
|
||||
/// each matched symbol before returning.
|
||||
///
|
||||
/// Returns true if the result list is complete, false if it was truncated due
|
||||
/// to MaxCandidateCount
|
||||
virtual bool
|
||||
fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req,
|
||||
std::function<void(const Symbol &)> Callback) const = 0;
|
||||
|
||||
// FIXME: add interfaces for more index use cases:
|
||||
// - Symbol getSymbolInfo(SymbolID);
|
||||
// - getAllOccurrences(SymbolID);
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===-------------------------------------------------------------------===//
|
||||
|
||||
#include "MemIndex.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
|
||||
llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
|
||||
for (const Symbol *Sym : *Syms)
|
||||
TempIndex[Sym->ID] = Sym;
|
||||
|
||||
// Swap out the old symbols and index.
|
||||
{
|
||||
std::lock_guard<std::mutex> Lock(Mutex);
|
||||
Index = std::move(TempIndex);
|
||||
Symbols = std::move(Syms); // Relase old symbols.
|
||||
}
|
||||
}
|
||||
|
||||
bool MemIndex::fuzzyFind(Context & /*Ctx*/, const FuzzyFindRequest &Req,
|
||||
std::function<void(const Symbol &)> Callback) const {
|
||||
std::string LoweredQuery = llvm::StringRef(Req.Query).lower();
|
||||
unsigned Matched = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> Lock(Mutex);
|
||||
for (const auto Pair : Index) {
|
||||
const Symbol *Sym = Pair.second;
|
||||
// Find all symbols that contain the query, igoring cases.
|
||||
// FIXME: consider matching chunks in qualified names instead the whole
|
||||
// string.
|
||||
// FIXME: use better matching algorithm, e.g. fuzzy matcher.
|
||||
if (StringRef(StringRef(Sym->QualifiedName).lower())
|
||||
.contains(LoweredQuery)) {
|
||||
if (++Matched > Req.MaxCandidateCount)
|
||||
return false;
|
||||
Callback(*Sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
|
||||
|
||||
#include "Index.h"
|
||||
#include <mutex>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
/// \brief This implements an index for a (relatively small) set of symbols that
|
||||
/// can be easily managed in memory.
|
||||
class MemIndex : public SymbolIndex {
|
||||
public:
|
||||
/// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
|
||||
/// accessible as long as `Symbols` is kept alive.
|
||||
void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
|
||||
|
||||
bool fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req,
|
||||
std::function<void(const Symbol &)> Callback) const override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<std::vector<const Symbol *>> Symbols;
|
||||
// Index is a set of symbols that are deduplicated by symbol IDs.
|
||||
// FIXME: build smarter index structure.
|
||||
llvm::DenseMap<SymbolID, const Symbol *> Index;
|
||||
mutable std::mutex Mutex;
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
||||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
|
||||
|
|
@ -13,6 +13,7 @@ add_extra_unittest(ClangdTests
|
|||
CodeCompleteTests.cpp
|
||||
ContextTests.cpp
|
||||
FuzzyMatchTests.cpp
|
||||
IndexTests.cpp
|
||||
JSONExprTests.cpp
|
||||
TestFS.cpp
|
||||
TraceTests.cpp
|
||||
|
|
|
|||
|
|
@ -0,0 +1,114 @@
|
|||
//===-- IndexTests.cpp -------------------------------*- C++ -*-----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "index/Index.h"
|
||||
#include "index/MemIndex.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using testing::UnorderedElementsAre;
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
namespace {
|
||||
|
||||
Symbol symbol(llvm::StringRef ID) {
|
||||
Symbol Sym;
|
||||
Sym.ID = SymbolID(ID);
|
||||
Sym.QualifiedName = ID;
|
||||
return Sym;
|
||||
}
|
||||
|
||||
struct SlabAndPointers {
|
||||
SymbolSlab Slab;
|
||||
std::vector<const Symbol *> Pointers;
|
||||
};
|
||||
|
||||
// Create a slab of symbols with IDs and names [Begin, End]. The life time of
|
||||
// the slab is managed by the returned shared pointer. If \p WeakSymbols is
|
||||
// provided, it will be pointed to the managed object in the returned shared
|
||||
// pointer.
|
||||
std::shared_ptr<std::vector<const Symbol *>>
|
||||
generateNumSymbols(int Begin, int End,
|
||||
std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
|
||||
auto Slab = std::make_shared<SlabAndPointers>();
|
||||
if (WeakSymbols)
|
||||
*WeakSymbols = Slab;
|
||||
|
||||
for (int i = Begin; i <= End; i++)
|
||||
Slab->Slab.insert(symbol(std::to_string(i)));
|
||||
|
||||
for (const auto &Sym : Slab->Slab)
|
||||
Slab->Pointers.push_back(&Sym.second);
|
||||
|
||||
return {std::move(Slab), &Slab->Pointers};
|
||||
}
|
||||
|
||||
std::vector<std::string> match(const SymbolIndex &I,
|
||||
const FuzzyFindRequest &Req) {
|
||||
std::vector<std::string> Matches;
|
||||
auto Ctx = Context::empty();
|
||||
I.fuzzyFind(Ctx, Req,
|
||||
[&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
|
||||
return Matches;
|
||||
}
|
||||
|
||||
TEST(MemIndexTest, MemIndexSymbolsRecycled) {
|
||||
MemIndex I;
|
||||
std::weak_ptr<SlabAndPointers> Symbols;
|
||||
I.build(generateNumSymbols(0, 10, &Symbols));
|
||||
FuzzyFindRequest Req;
|
||||
Req.Query = "7";
|
||||
EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
|
||||
|
||||
EXPECT_FALSE(Symbols.expired());
|
||||
// Release old symbols.
|
||||
I.build(generateNumSymbols(0, 0));
|
||||
EXPECT_TRUE(Symbols.expired());
|
||||
}
|
||||
|
||||
TEST(MemIndexTest, MemIndexMatchSubstring) {
|
||||
MemIndex I;
|
||||
I.build(generateNumSymbols(5, 25));
|
||||
FuzzyFindRequest Req;
|
||||
Req.Query = "5";
|
||||
EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25"));
|
||||
}
|
||||
|
||||
TEST(MemIndexTest, MemIndexDeduplicate) {
|
||||
auto Symbols = generateNumSymbols(0, 10);
|
||||
|
||||
// Inject some duplicates and make sure we only match the same symbol once.
|
||||
auto Sym = symbol("7");
|
||||
Symbols->push_back(&Sym);
|
||||
Symbols->push_back(&Sym);
|
||||
Symbols->push_back(&Sym);
|
||||
|
||||
FuzzyFindRequest Req;
|
||||
Req.Query = "7";
|
||||
MemIndex I;
|
||||
I.build(std::move(Symbols));
|
||||
auto Matches = match(I, Req);
|
||||
EXPECT_EQ(Matches.size(), 1u);
|
||||
}
|
||||
|
||||
TEST(MemIndexTest, MemIndexLimitedNumMatches) {
|
||||
MemIndex I;
|
||||
I.build(generateNumSymbols(0, 100));
|
||||
FuzzyFindRequest Req;
|
||||
Req.Query = "5";
|
||||
Req.MaxCandidateCount = 3;
|
||||
auto Matches = match(I, Req);
|
||||
EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
Loading…
Reference in New Issue