[clangd] Introduce a "Symbol" class.
Summary: * The "Symbol" class represents a C++ symbol in the codebase, containing all the information of a C++ symbol needed by clangd. clangd will use it in clangd's AST/dynamic index and global/static index (code completion and code navigation). * The SymbolCollector (another IndexAction) will be used to recollect the symbols when the source file is changed (for ASTIndex), or to generate all C++ symbols for the whole project. In the long term (when index-while-building is ready), clangd should share a same "Symbol" structure and IndexAction with index-while-building, but for now we want to have some stuff working in clangd. Reviewers: ioeric, sammccall, ilya-biryukov, malaperle Reviewed By: sammccall Subscribers: malaperle, klimek, mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D40897 llvm-svn: 320486
This commit is contained in:
parent
ca2a8cea2f
commit
4c1394d67d
|
|
@ -19,6 +19,8 @@ add_clang_library(clangDaemon
|
|||
Protocol.cpp
|
||||
ProtocolHandlers.cpp
|
||||
Trace.cpp
|
||||
index/Index.cpp
|
||||
index/SymbolCollector.cpp
|
||||
|
||||
LINK_LIBS
|
||||
clangAST
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
//===--- Index.cpp -----------------------------------------------*- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Index.h"
|
||||
|
||||
#include "llvm/Support/SHA1.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
namespace {
|
||||
ArrayRef<uint8_t> toArrayRef(StringRef S) {
|
||||
return {reinterpret_cast<const uint8_t *>(S.data()), S.size()};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
SymbolID::SymbolID(llvm::StringRef USR)
|
||||
: HashValue(llvm::SHA1::hash(toArrayRef(USR))) {}
|
||||
|
||||
SymbolSlab::const_iterator SymbolSlab::begin() const {
|
||||
return Symbols.begin();
|
||||
}
|
||||
|
||||
SymbolSlab::const_iterator SymbolSlab::end() const {
|
||||
return Symbols.end();
|
||||
}
|
||||
|
||||
SymbolSlab::const_iterator SymbolSlab::find(const SymbolID& SymID) const {
|
||||
return Symbols.find(SymID);
|
||||
}
|
||||
|
||||
void SymbolSlab::freeze() {
|
||||
Frozen = true;
|
||||
}
|
||||
|
||||
void SymbolSlab::insert(Symbol S) {
|
||||
assert(!Frozen &&
|
||||
"Can't insert a symbol after the slab has been frozen!");
|
||||
Symbols[S.ID] = std::move(S);
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
//===--- Symbol.h -----------------------------------------------*- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
||||
|
||||
#include "clang/Index/IndexSymbol.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
struct SymbolLocation {
|
||||
// The absolute path of the source file where a symbol occurs.
|
||||
std::string FilePath;
|
||||
// The 0-based offset to the first character of the symbol from the beginning
|
||||
// of the source file.
|
||||
unsigned StartOffset;
|
||||
// The 0-based offset to the last character of the symbol from the beginning
|
||||
// of the source file.
|
||||
unsigned EndOffset;
|
||||
};
|
||||
|
||||
// The class identifies a particular C++ symbol (class, function, method, etc).
|
||||
//
|
||||
// As USRs (Unified Symbol Resolution) could be large, especially for functions
|
||||
// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
|
||||
// guarantee the uniqueness of symbols while using a relatively small amount of
|
||||
// memory (vs storing USRs directly).
|
||||
//
|
||||
// SymbolID can be used as key in the symbol indexes to lookup the symbol.
|
||||
class SymbolID {
|
||||
public:
|
||||
SymbolID() = default;
|
||||
SymbolID(llvm::StringRef USR);
|
||||
|
||||
bool operator==(const SymbolID& Sym) const {
|
||||
return HashValue == Sym.HashValue;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class llvm::DenseMapInfo<clang::clangd::SymbolID>;
|
||||
|
||||
std::array<uint8_t, 20> HashValue;
|
||||
};
|
||||
|
||||
// The class presents a C++ symbol, e.g. class, function.
|
||||
//
|
||||
// FIXME: instead of having own copy fields for each symbol, we can share
|
||||
// storage from SymbolSlab.
|
||||
struct Symbol {
|
||||
// The ID of the symbol.
|
||||
SymbolID ID;
|
||||
// The qualified name of the symbol, e.g. Foo::bar.
|
||||
std::string QualifiedName;
|
||||
// The symbol information, like symbol kind.
|
||||
index::SymbolInfo SymInfo;
|
||||
// The location of the canonical declaration of the symbol.
|
||||
//
|
||||
// A C++ symbol could have multiple declarations and one definition (e.g.
|
||||
// a function is declared in ".h" file, and is defined in ".cc" file).
|
||||
// * For classes, the canonical declaration is usually definition.
|
||||
// * For non-inline functions, the canonical declaration is a declaration
|
||||
// (not a definition), which is usually declared in ".h" file.
|
||||
SymbolLocation CanonicalDeclaration;
|
||||
|
||||
// FIXME: add definition location of the symbol.
|
||||
// FIXME: add all occurrences support.
|
||||
// FIXME: add extra fields for index scoring signals.
|
||||
// FIXME: add code completion information.
|
||||
};
|
||||
|
||||
// A symbol container that stores a set of symbols. The container will maintain
|
||||
// the lifetime of the symbols.
|
||||
//
|
||||
// FIXME: Use a space-efficient implementation, a lot of Symbol fields could
|
||||
// share the same storage.
|
||||
class SymbolSlab {
|
||||
public:
|
||||
using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;
|
||||
|
||||
SymbolSlab() = default;
|
||||
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
const_iterator find(const SymbolID& SymID) const;
|
||||
|
||||
// Once called, no more symbols would be added to the SymbolSlab. This
|
||||
// operation is irreversible.
|
||||
void freeze();
|
||||
|
||||
void insert(Symbol S);
|
||||
|
||||
private:
|
||||
bool Frozen = false;
|
||||
|
||||
llvm::DenseMap<SymbolID, Symbol> Symbols;
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
||||
namespace llvm {
|
||||
|
||||
template <> struct DenseMapInfo<clang::clangd::SymbolID> {
|
||||
static inline clang::clangd::SymbolID getEmptyKey() {
|
||||
static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
|
||||
return EmptyKey;
|
||||
}
|
||||
static inline clang::clangd::SymbolID getTombstoneKey() {
|
||||
static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
|
||||
return TombstoneKey;
|
||||
}
|
||||
static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
|
||||
return hash_value(
|
||||
ArrayRef<uint8_t>(Sym.HashValue.data(), Sym.HashValue.size()));
|
||||
}
|
||||
static bool isEqual(const clang::clangd::SymbolID &LHS,
|
||||
const clang::clangd::SymbolID &RHS) {
|
||||
return LHS == RHS;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "SymbolCollector.h"
|
||||
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/AST/Decl.h"
|
||||
#include "clang/AST/DeclCXX.h"
|
||||
#include "clang/Basic/SourceManager.h"
|
||||
#include "clang/Index/IndexSymbol.h"
|
||||
#include "clang/Index/USRGeneration.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
namespace {
|
||||
// Make the Path absolute using the current working directory of the given
|
||||
// SourceManager if the Path is not an absolute path.
|
||||
//
|
||||
// The Path can be a path relative to the build directory, or retrieved from
|
||||
// the SourceManager.
|
||||
std::string makeAbsolutePath(const SourceManager &SM, StringRef Path) {
|
||||
llvm::SmallString<128> AbsolutePath(Path);
|
||||
if (std::error_code EC =
|
||||
SM.getFileManager().getVirtualFileSystem()->makeAbsolute(
|
||||
AbsolutePath))
|
||||
llvm::errs() << "Warning: could not make absolute file: '" << EC.message()
|
||||
<< '\n';
|
||||
// Handle the symbolic link path case where the current working directory
|
||||
// (getCurrentWorkingDirectory) is a symlink./ We always want to the real
|
||||
// file path (instead of the symlink path) for the C++ symbols.
|
||||
//
|
||||
// Consider the following example:
|
||||
//
|
||||
// src dir: /project/src/foo.h
|
||||
// current working directory (symlink): /tmp/build -> /project/src/
|
||||
//
|
||||
// The file path of Symbol is "/project/src/foo.h" instead of
|
||||
// "/tmp/build/foo.h"
|
||||
const DirectoryEntry *Dir = SM.getFileManager().getDirectory(
|
||||
llvm::sys::path::parent_path(AbsolutePath.str()));
|
||||
if (Dir) {
|
||||
StringRef DirName = SM.getFileManager().getCanonicalName(Dir);
|
||||
SmallVector<char, 128> AbsoluteFilename;
|
||||
llvm::sys::path::append(AbsoluteFilename, DirName,
|
||||
llvm::sys::path::filename(AbsolutePath.str()));
|
||||
return llvm::StringRef(AbsoluteFilename.data(), AbsoluteFilename.size())
|
||||
.str();
|
||||
}
|
||||
return AbsolutePath.str();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Always return true to continue indexing.
|
||||
bool SymbolCollector::handleDeclOccurence(
|
||||
const Decl *D, index::SymbolRoleSet Roles,
|
||||
ArrayRef<index::SymbolRelation> Relations, FileID FID, unsigned Offset,
|
||||
index::IndexDataConsumer::ASTNodeInfo ASTNode) {
|
||||
// FIXME: collect all symbol references.
|
||||
if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
|
||||
Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
|
||||
return true;
|
||||
|
||||
if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(D)) {
|
||||
// FIXME: Should we include the internal linkage symbols?
|
||||
if (!ND->hasExternalFormalLinkage() || ND->isInAnonymousNamespace())
|
||||
return true;
|
||||
|
||||
llvm::SmallVector<char, 128> Buff;
|
||||
if (index::generateUSRForDecl(ND, Buff))
|
||||
return true;
|
||||
|
||||
std::string USR(Buff.data(), Buff.size());
|
||||
auto ID = SymbolID(USR);
|
||||
if (Symbols.find(ID) != Symbols.end())
|
||||
return true;
|
||||
|
||||
auto &SM = ND->getASTContext().getSourceManager();
|
||||
SymbolLocation Location = {
|
||||
makeAbsolutePath(SM, SM.getFilename(D->getLocation())),
|
||||
SM.getFileOffset(D->getLocStart()), SM.getFileOffset(D->getLocEnd())};
|
||||
Symbols.insert({std::move(ID), ND->getQualifiedNameAsString(),
|
||||
index::getSymbolInfo(D), std::move(Location)});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SymbolCollector::finish() {
|
||||
Symbols.freeze();
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
//===--- SymbolCollector.h ---------------------------------------*- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Index.h"
|
||||
|
||||
#include "clang/Index/IndexDataConsumer.h"
|
||||
#include "clang/Index/IndexSymbol.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
// Collect all symbols from an AST.
|
||||
//
|
||||
// Clients (e.g. clangd) can use SymbolCollector together with
|
||||
// index::indexTopLevelDecls to retrieve all symbols when the source file is
|
||||
// changed.
|
||||
class SymbolCollector : public index::IndexDataConsumer {
|
||||
public:
|
||||
SymbolCollector() = default;
|
||||
|
||||
bool
|
||||
handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles,
|
||||
ArrayRef<index::SymbolRelation> Relations, FileID FID,
|
||||
unsigned Offset,
|
||||
index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
|
||||
|
||||
void finish() override;
|
||||
|
||||
SymbolSlab takeSymbols() const { return std::move(Symbols); }
|
||||
|
||||
private:
|
||||
// All Symbols collected from the AST.
|
||||
SymbolSlab Symbols;
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
|
@ -16,6 +16,7 @@ add_extra_unittest(ClangdTests
|
|||
JSONExprTests.cpp
|
||||
TestFS.cpp
|
||||
TraceTests.cpp
|
||||
SymbolCollectorTests.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(ClangdTests
|
||||
|
|
|
|||
|
|
@ -0,0 +1,110 @@
|
|||
//===-- SymbolCollectorTests.cpp -------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "index/SymbolCollector.h"
|
||||
#include "clang/Index/IndexingAction.h"
|
||||
#include "clang/Basic/FileManager.h"
|
||||
#include "clang/Basic/FileSystemOptions.h"
|
||||
#include "clang/Basic/VirtualFileSystem.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
#include "llvm/ADT/IntrusiveRefCntPtr.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "gmock/gmock.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
using testing::UnorderedElementsAre;
|
||||
using testing::Eq;
|
||||
using testing::Field;
|
||||
|
||||
// GMock helpers for matching Symbol.
|
||||
MATCHER_P(QName, Name, "") { return arg.second.QualifiedName == Name; }
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
namespace {
|
||||
class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
|
||||
public:
|
||||
SymbolIndexActionFactory() = default;
|
||||
|
||||
clang::FrontendAction *create() override {
|
||||
index::IndexingOptions IndexOpts;
|
||||
IndexOpts.SystemSymbolFilter =
|
||||
index::IndexingOptions::SystemSymbolFilterKind::All;
|
||||
IndexOpts.IndexFunctionLocals = false;
|
||||
Collector = std::make_shared<SymbolCollector>();
|
||||
FrontendAction *Action =
|
||||
index::createIndexingAction(Collector, IndexOpts, nullptr).release();
|
||||
return Action;
|
||||
}
|
||||
|
||||
std::shared_ptr<SymbolCollector> Collector;
|
||||
};
|
||||
|
||||
class SymbolCollectorTest : public ::testing::Test {
|
||||
public:
|
||||
bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) {
|
||||
llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
|
||||
new vfs::InMemoryFileSystem);
|
||||
llvm::IntrusiveRefCntPtr<FileManager> Files(
|
||||
new FileManager(FileSystemOptions(), InMemoryFileSystem));
|
||||
|
||||
const std::string FileName = "symbol.cc";
|
||||
const std::string HeaderName = "symbols.h";
|
||||
auto Factory = llvm::make_unique<SymbolIndexActionFactory>();
|
||||
|
||||
tooling::ToolInvocation Invocation(
|
||||
{"symbol_collector", "-fsyntax-only", "-std=c++11", FileName},
|
||||
Factory->create(), Files.get(),
|
||||
std::make_shared<PCHContainerOperations>());
|
||||
|
||||
InMemoryFileSystem->addFile(HeaderName, 0,
|
||||
llvm::MemoryBuffer::getMemBuffer(HeaderCode));
|
||||
|
||||
std::string Content = "#include\"" + std::string(HeaderName) + "\"";
|
||||
Content += "\n" + MainCode.str();
|
||||
InMemoryFileSystem->addFile(FileName, 0,
|
||||
llvm::MemoryBuffer::getMemBuffer(Content));
|
||||
Invocation.run();
|
||||
Symbols = Factory->Collector->takeSymbols();
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
SymbolSlab Symbols;
|
||||
};
|
||||
|
||||
TEST_F(SymbolCollectorTest, CollectSymbol) {
|
||||
const std::string Header = R"(
|
||||
class Foo {
|
||||
void f();
|
||||
};
|
||||
void f1();
|
||||
inline void f2() {}
|
||||
)";
|
||||
const std::string Main = R"(
|
||||
namespace {
|
||||
void ff() {} // ignore
|
||||
}
|
||||
void f1() {}
|
||||
)";
|
||||
runSymbolCollector(Header, Main);
|
||||
EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Foo::f"),
|
||||
QName("f1"), QName("f2")));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
Loading…
Reference in New Issue