[clangd] Symbol index interfaces and an in-memory index implementation.

Summary:
o Index interfaces to support using different index sources (e.g. AST index, global index) for code completion, cross-reference finding etc. This patch focuses on code completion.

The following changes in the original patch has been split out.
o Implement an AST-based index.
o Add an option to replace sema code completion for qualified-id with index-based completion.
o Implement an initial naive code completion index which matches symbols that have the query string as substring.

Reviewers: malaperle, sammccall

Reviewed By: sammccall

Subscribers: hokein, klimek, malaperle, mgorny, ilya-biryukov, cfe-commits

Differential Revision: https://reviews.llvm.org/D40548

llvm-svn: 320688
This commit is contained in:
Eric Liu 2017-12-14 11:25:49 +00:00
parent 83bcc68afa
commit 3732cadc73
6 changed files with 238 additions and 0 deletions

View File

@ -19,6 +19,7 @@ add_clang_library(clangDaemon
Protocol.cpp
ProtocolHandlers.cpp
Trace.cpp
index/MemIndex.cpp
index/Index.cpp
index/SymbolCollector.cpp

View File

@ -10,6 +10,7 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
#include "../Context.h"
#include "clang/Index/IndexSymbol.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
@ -110,6 +111,34 @@ private:
llvm::DenseMap<SymbolID, Symbol> Symbols;
};
struct FuzzyFindRequest {
/// \brief A query string for the fuzzy find. This is matched against symbols'
/// qualfified names.
std::string Query;
/// \brief The maxinum number of candidates to return.
size_t MaxCandidateCount = UINT_MAX;
};
/// \brief Interface for symbol indexes that can be used for searching or
/// matching symbols among a set of symbols based on names or unique IDs.
class SymbolIndex {
public:
virtual ~SymbolIndex() = default;
/// \brief Matches symbols in the index fuzzily and applies \p Callback on
/// each matched symbol before returning.
///
/// Returns true if the result list is complete, false if it was truncated due
/// to MaxCandidateCount
virtual bool
fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req,
std::function<void(const Symbol &)> Callback) const = 0;
// FIXME: add interfaces for more index use cases:
// - Symbol getSymbolInfo(SymbolID);
// - getAllOccurrences(SymbolID);
};
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,52 @@
//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===-------------------------------------------------------------------===//
#include "MemIndex.h"
namespace clang {
namespace clangd {
void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
for (const Symbol *Sym : *Syms)
TempIndex[Sym->ID] = Sym;
// Swap out the old symbols and index.
{
std::lock_guard<std::mutex> Lock(Mutex);
Index = std::move(TempIndex);
Symbols = std::move(Syms); // Relase old symbols.
}
}
bool MemIndex::fuzzyFind(Context & /*Ctx*/, const FuzzyFindRequest &Req,
std::function<void(const Symbol &)> Callback) const {
std::string LoweredQuery = llvm::StringRef(Req.Query).lower();
unsigned Matched = 0;
{
std::lock_guard<std::mutex> Lock(Mutex);
for (const auto Pair : Index) {
const Symbol *Sym = Pair.second;
// Find all symbols that contain the query, igoring cases.
// FIXME: consider matching chunks in qualified names instead the whole
// string.
// FIXME: use better matching algorithm, e.g. fuzzy matcher.
if (StringRef(StringRef(Sym->QualifiedName).lower())
.contains(LoweredQuery)) {
if (++Matched > Req.MaxCandidateCount)
return false;
Callback(*Sym);
}
}
}
return true;
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,41 @@
//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
#include "Index.h"
#include <mutex>
namespace clang {
namespace clangd {
/// \brief This implements an index for a (relatively small) set of symbols that
/// can be easily managed in memory.
class MemIndex : public SymbolIndex {
public:
/// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
/// accessible as long as `Symbols` is kept alive.
void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
bool fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req,
std::function<void(const Symbol &)> Callback) const override;
private:
std::shared_ptr<std::vector<const Symbol *>> Symbols;
// Index is a set of symbols that are deduplicated by symbol IDs.
// FIXME: build smarter index structure.
llvm::DenseMap<SymbolID, const Symbol *> Index;
mutable std::mutex Mutex;
};
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H

View File

@ -13,6 +13,7 @@ add_extra_unittest(ClangdTests
CodeCompleteTests.cpp
ContextTests.cpp
FuzzyMatchTests.cpp
IndexTests.cpp
JSONExprTests.cpp
TestFS.cpp
TraceTests.cpp

View File

@ -0,0 +1,114 @@
//===-- IndexTests.cpp -------------------------------*- C++ -*-----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "index/Index.h"
#include "index/MemIndex.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
using testing::UnorderedElementsAre;
namespace clang {
namespace clangd {
namespace {
Symbol symbol(llvm::StringRef ID) {
Symbol Sym;
Sym.ID = SymbolID(ID);
Sym.QualifiedName = ID;
return Sym;
}
struct SlabAndPointers {
SymbolSlab Slab;
std::vector<const Symbol *> Pointers;
};
// Create a slab of symbols with IDs and names [Begin, End]. The life time of
// the slab is managed by the returned shared pointer. If \p WeakSymbols is
// provided, it will be pointed to the managed object in the returned shared
// pointer.
std::shared_ptr<std::vector<const Symbol *>>
generateNumSymbols(int Begin, int End,
std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
auto Slab = std::make_shared<SlabAndPointers>();
if (WeakSymbols)
*WeakSymbols = Slab;
for (int i = Begin; i <= End; i++)
Slab->Slab.insert(symbol(std::to_string(i)));
for (const auto &Sym : Slab->Slab)
Slab->Pointers.push_back(&Sym.second);
return {std::move(Slab), &Slab->Pointers};
}
std::vector<std::string> match(const SymbolIndex &I,
const FuzzyFindRequest &Req) {
std::vector<std::string> Matches;
auto Ctx = Context::empty();
I.fuzzyFind(Ctx, Req,
[&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
return Matches;
}
TEST(MemIndexTest, MemIndexSymbolsRecycled) {
MemIndex I;
std::weak_ptr<SlabAndPointers> Symbols;
I.build(generateNumSymbols(0, 10, &Symbols));
FuzzyFindRequest Req;
Req.Query = "7";
EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
EXPECT_FALSE(Symbols.expired());
// Release old symbols.
I.build(generateNumSymbols(0, 0));
EXPECT_TRUE(Symbols.expired());
}
TEST(MemIndexTest, MemIndexMatchSubstring) {
MemIndex I;
I.build(generateNumSymbols(5, 25));
FuzzyFindRequest Req;
Req.Query = "5";
EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25"));
}
TEST(MemIndexTest, MemIndexDeduplicate) {
auto Symbols = generateNumSymbols(0, 10);
// Inject some duplicates and make sure we only match the same symbol once.
auto Sym = symbol("7");
Symbols->push_back(&Sym);
Symbols->push_back(&Sym);
Symbols->push_back(&Sym);
FuzzyFindRequest Req;
Req.Query = "7";
MemIndex I;
I.build(std::move(Symbols));
auto Matches = match(I, Req);
EXPECT_EQ(Matches.size(), 1u);
}
TEST(MemIndexTest, MemIndexLimitedNumMatches) {
MemIndex I;
I.build(generateNumSymbols(0, 100));
FuzzyFindRequest Req;
Req.Query = "5";
Req.MaxCandidateCount = 3;
auto Matches = match(I, Req);
EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
}
} // namespace
} // namespace clangd
} // namespace clang