From 89a7691fd9f6d431acadcca7146a90bdc6230ad6 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 15 Nov 2018 10:31:19 +0000 Subject: [PATCH] Address comments llvm-svn: 346940 --- clang-tools-extra/clangd/index/Background.cpp | 226 ++++++++++-------- clang-tools-extra/clangd/index/Background.h | 67 +++--- .../unittests/clangd/BackgroundIndexTests.cpp | 28 ++- 3 files changed, 179 insertions(+), 142 deletions(-) diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index a9724548e5ee..ef83abf29503 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -56,15 +56,77 @@ getShardPathFromFilePath(llvm::SmallString<128> ShardRoot, return ShardRoot; } +// Uses disk as a storage for index shards. Creates a directory called +// ".clangd-index/" under the path provided during initialize. +// Note: Requires initialize to be called before storing or retrieval. +// This class is thread-safe. +class DiskBackedIndexStorage : public BackgroundIndexStorage { + llvm::SmallString<128> DiskShardRoot; + +public: + llvm::Expected + retrieveShard(llvm::StringRef ShardIdentifier) const override { + auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + auto Buffer = MemoryBuffer::getFile(ShardPath); + if (!Buffer) { + elog("Couldn't retrieve {0}: {1}", ShardPath, + Buffer.getError().message()); + return llvm::make_error(Buffer.getError()); + } + // FIXME: Change readIndexFile to also look at Hash of the source that + // generated index and skip if there is a mismatch. + return readIndexFile(Buffer->get()->getBuffer()); + } + + bool storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const override { + auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + std::error_code EC; + llvm::raw_fd_ostream OS(ShardPath, EC); + if (EC) { + elog("Failed to open {0} for writing: {1}", ShardPath, EC.message()); + return false; + } + OS << Shard; + return true; + } + + // Initializes DiskShardRoot to (Directory + ".clangd-index/") which is the + // base directory for all shard files. After the initialization succeeds all + // subsequent calls are no-op. + DiskBackedIndexStorage(llvm::StringRef Directory) : DiskShardRoot(Directory) { + sys::path::append(DiskShardRoot, ".clangd-index/"); + if (!llvm::sys::fs::exists(DiskShardRoot)) { + std::error_code OK; + std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot); + if (EC != OK) { + elog("Failed to create {0}: {1}", DiskShardRoot, EC.message()); + } + } + } +}; + +SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { + SmallString<128> AbsolutePath; + if (sys::path::is_absolute(Cmd.Filename)) { + AbsolutePath = Cmd.Filename; + } else { + AbsolutePath = Cmd.Directory; + sys::path::append(AbsolutePath, Cmd.Filename); + } + return AbsolutePath; +} + } // namespace -BackgroundIndex::BackgroundIndex( - Context BackgroundContext, StringRef ResourceDir, - const FileSystemProvider &FSProvider, ArrayRef URISchemes, - std::unique_ptr IndexShardStorage, size_t ThreadPoolSize) +BackgroundIndex::BackgroundIndex(Context BackgroundContext, + StringRef ResourceDir, + const FileSystemProvider &FSProvider, + ArrayRef URISchemes, + size_t ThreadPoolSize) : SwapIndex(make_unique()), ResourceDir(ResourceDir), FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)), - URISchemes(URISchemes), IndexShardStorage(std::move(IndexShardStorage)) { + URISchemes(URISchemes) { assert(ThreadPoolSize > 0 && "Thread pool size can't be zero."); while (ThreadPoolSize--) { ThreadPool.emplace_back([this] { run(); }); @@ -123,19 +185,58 @@ void BackgroundIndex::blockUntilIdleForTest() { void BackgroundIndex::enqueue(StringRef Directory, tooling::CompileCommand Cmd) { + auto IndexStorage = BackgroundIndexStorage::getForDirectory(Directory); + if (IndexStorage) + loadShard(IndexStorage.get(), Cmd); + else + elog("No index storage for: {0}", Directory); { std::lock_guard Lock(QueueMu); - enqueueLocked(std::move(Cmd)); + enqueueLocked(std::move(Cmd), std::move(IndexStorage)); } QueueCV.notify_all(); } +void BackgroundIndex::loadShard(BackgroundIndexStorage *IndexStorage, + const tooling::CompileCommand &Cmd) { + assert(IndexStorage && "No index storage to load from?"); + auto AbsolutePath = getAbsolutePath(Cmd); + auto Shard = IndexStorage->retrieveShard(AbsolutePath); + if (Shard) { + // FIXME: Updated hashes once we have them in serialized format. + // IndexedFileDigests[AbsolutePath] = Hash; + IndexedSymbols.update(AbsolutePath, + make_unique(std::move(*Shard->Symbols)), + make_unique(std::move(*Shard->Refs))); + + vlog("Loaded {0} from storage", AbsolutePath); + } +} + +void BackgroundIndex::loadShards( + BackgroundIndexStorage *IndexStorage, + const std::vector &Cmds) { + assert(IndexStorage && "No index storage to load from?"); + for (const auto &Cmd : Cmds) + loadShard(IndexStorage, Cmd); + // FIXME: Maybe we should get rid of this one once we start building index + // periodically? Especially if we also offload this task onto the queue. + vlog("Rebuilding automatic index"); + reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge, + URISchemes)); +} + void BackgroundIndex::enqueueAll(StringRef Directory, const tooling::CompilationDatabase &CDB) { trace::Span Tracer("BackgroundIndexEnqueueCDB"); // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB // from disk and enqueue the commands asynchronously? auto Cmds = CDB.getAllCompileCommands(); + auto IndexStorage = BackgroundIndexStorage::getForDirectory(Directory); + if (IndexStorage) + loadShards(IndexStorage.get(), Cmds); + else + elog("No index storage for: {0}", Directory); SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size())); std::mt19937 Generator(std::random_device{}()); std::shuffle(Cmds.begin(), Cmds.end(), Generator); @@ -143,26 +244,23 @@ void BackgroundIndex::enqueueAll(StringRef Directory, { std::lock_guard Lock(QueueMu); for (auto &Cmd : Cmds) - enqueueLocked(std::move(Cmd)); + enqueueLocked(std::move(Cmd), IndexStorage); } QueueCV.notify_all(); } -void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) { - // Initialize storage to project root. Since Initialize is no-op for multiple - // calls we can simply call it for each file. - if (IndexShardStorage && !IndexShardStorage->initialize(Cmd.Directory)) { - elog("Failed to initialize shard storage"); - IndexShardStorage.reset(); - } +void BackgroundIndex::enqueueLocked( + tooling::CompileCommand Cmd, + std::shared_ptr IndexStorage) { Queue.push_back(Bind( - [this](tooling::CompileCommand Cmd) { + [this](tooling::CompileCommand Cmd, + std::shared_ptr IndexStorage) { std::string Filename = Cmd.Filename; Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); - if (auto Error = index(std::move(Cmd))) + if (auto Error = index(std::move(Cmd), IndexStorage.get())) log("Indexing {0} failed: {1}", Filename, std::move(Error)); }, - std::move(Cmd))); + std::move(Cmd), std::move(IndexStorage))); } // Resolves URI to file paths with cache. @@ -198,7 +296,8 @@ private: /// Given index results from a TU, only update files in \p FilesToUpdate. void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, - const StringMap &FilesToUpdate) { + const StringMap &FilesToUpdate, + BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. struct File { DenseSet Symbols; @@ -250,13 +349,14 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, auto RS = llvm::make_unique(std::move(Refs).build()); auto Hash = FilesToUpdate.lookup(Path); - // Put shards into storage for subsequent use. + // We need to store shards before updating the index, since the latter + // consumes slabs. // FIXME: Store Hash in the Shard. - if (IndexShardStorage) { + if (IndexStorage) { IndexFileOut Shard; Shard.Symbols = SS.get(); Shard.Refs = RS.get(); - IndexShardStorage->storeShard(Path, Shard); + IndexStorage->storeShard(Path, Shard); } std::lock_guard Lock(DigestsMu); @@ -270,7 +370,8 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, // Creates a filter to not collect index results from files with unchanged // digests. -// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate. +// \p FileDigests contains file digests for the current indexed files, and all +// changed files will be added to \p FilesToUpdate. decltype(SymbolCollector::Options::FileFilter) createFileFilter( const llvm::StringMap &FileDigests, llvm::StringMap &FilesToUpdate) { @@ -299,16 +400,11 @@ decltype(SymbolCollector::Options::FileFilter) createFileFilter( }; } -Error BackgroundIndex::index(tooling::CompileCommand Cmd) { +Error BackgroundIndex::index(tooling::CompileCommand Cmd, + BackgroundIndexStorage *IndexStorage) { trace::Span Tracer("BackgroundIndex"); SPAN_ATTACH(Tracer, "file", Cmd.Filename); - SmallString<128> AbsolutePath; - if (sys::path::is_absolute(Cmd.Filename)) { - AbsolutePath = Cmd.Filename; - } else { - AbsolutePath = Cmd.Directory; - sys::path::append(AbsolutePath, Cmd.Filename); - } + SmallString<128> AbsolutePath = getAbsolutePath(Cmd); auto FS = FSProvider.getFileSystem(); auto Buf = FS->getBufferForFile(AbsolutePath); @@ -323,18 +419,6 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd) { if (IndexedFileDigests.lookup(AbsolutePath) == Hash) { vlog("No need to index {0}, already up to date", AbsolutePath); return Error::success(); - } else if (IndexShardStorage) { // Check if shard storage has the index. - auto Shard = IndexShardStorage->retrieveShard(AbsolutePath, Hash); - if (Shard) { - // FIXME: We might still want to re-index headers. - IndexedFileDigests[AbsolutePath] = Hash; - IndexedSymbols.update( - AbsolutePath, make_unique(std::move(*Shard->Symbols)), - make_unique(std::move(*Shard->Refs))); - - vlog("Loaded {0} from storage", AbsolutePath); - return Error::success(); - } } DigestsSnapshot = IndexedFileDigests; @@ -384,7 +468,8 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd) { Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate); + update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, + IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. @@ -401,59 +486,8 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd) { return Error::success(); } -llvm::Expected -DiskShardStorage::retrieveShard(llvm::StringRef ShardIdentifier, - FileDigest Hash) const { - assert(Initialized && "Not initialized?"); - llvm::SmallString<128> ShardPath; - { - std::lock_guard Lock(DiskShardRootMu); - ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); - } - auto Buffer = MemoryBuffer::getFile(ShardPath); - if (!Buffer) { - elog("Couldn't retrieve {0}: {1}", ShardPath, Buffer.getError().message()); - return llvm::make_error(Buffer.getError()); - } - // FIXME: Change readIndexFile to also look at Hash of the source that - // generated index and skip if there is a mismatch. - return readIndexFile(Buffer->get()->getBuffer()); -} - -bool DiskShardStorage::storeShard(llvm::StringRef ShardIdentifier, - IndexFileOut Shard) const { - assert(Initialized && "Not initialized?"); - llvm::SmallString<128> ShardPath; - { - std::lock_guard Lock(DiskShardRootMu); - ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); - } - std::error_code EC; - llvm::raw_fd_ostream OS(ShardPath, EC); - if (EC) { - elog("Failed to open {0} for writing: {1}", ShardPath, EC.message()); - return false; - } - OS << Shard; - return true; -} - -bool DiskShardStorage::initialize(llvm::StringRef Directory) { - if (Initialized) - return true; - std::lock_guard Lock(DiskShardRootMu); - DiskShardRoot = Directory; - sys::path::append(DiskShardRoot, ".clangd-index/"); - if (!llvm::sys::fs::exists(DiskShardRoot)) { - std::error_code OK; - std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot); - if (EC != OK) { - elog("Failed to create {0}: {1}", DiskShardRoot, EC.message()); - return Initialized = false; - } - } - return Initialized = true; -} +std::function(llvm::StringRef)> + BackgroundIndexStorage::Factory = nullptr; } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index bff630478556..a96e20e833a2 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -28,15 +28,35 @@ namespace clang { namespace clangd { -// Base class for Shard Storage operations. See DiskShardStorage for more info. -class ShardStorage { +// Handles storage and retrieval of index shards. +class BackgroundIndexStorage { +private: + static std::function(llvm::StringRef)> + Factory; + public: using FileDigest = decltype(llvm::SHA1::hash({})); + + // Stores given shard associationg with ShardIdentifier, which can be + // retrieved later on with the same identifier. virtual bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const = 0; + + // Retrieves the shard if found, also returns hash for the source file that + // generated this shard. virtual llvm::Expected - retrieveShard(llvm::StringRef ShardIdentifier, FileDigest Hash) const = 0; - virtual bool initialize(llvm::StringRef Directory) = 0; + retrieveShard(llvm::StringRef ShardIdentifier) const = 0; + + template static void setStorageFactory(T Factory) { + BackgroundIndexStorage::Factory = Factory; + } + + static std::shared_ptr + getForDirectory(llvm::StringRef Directory) { + if (!Factory) + return nullptr; + return Factory(Directory); + } }; // Builds an in-memory index by by running the static indexer action over @@ -48,7 +68,6 @@ public: // FIXME: resource-dir injection should be hoisted somewhere common. BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir, const FileSystemProvider &, ArrayRef URISchemes, - std::unique_ptr IndexShardStorage = nullptr, size_t ThreadPoolSize = llvm::hardware_concurrency()); ~BackgroundIndex(); // Blocks while the current task finishes. @@ -72,17 +91,22 @@ public: private: /// Given index results from a TU, only update files in \p FilesToUpdate. void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, - const llvm::StringMap &FilesToUpdate); + const llvm::StringMap &FilesToUpdate, + BackgroundIndexStorage *IndexStorage); + void loadShard(BackgroundIndexStorage *IndexStorage, + const tooling::CompileCommand &Cmd); + void loadShards(BackgroundIndexStorage *IndexStorage, + const std::vector &Cmds); // configuration std::string ResourceDir; const FileSystemProvider &FSProvider; Context BackgroundContext; std::vector URISchemes; - std::unique_ptr IndexShardStorage; // index state - llvm::Error index(tooling::CompileCommand); + llvm::Error index(tooling::CompileCommand, + BackgroundIndexStorage *IndexStorage); FileSymbols IndexedSymbols; llvm::StringMap IndexedFileDigests; // Key is absolute file path. @@ -91,7 +115,8 @@ private: // queue management using Task = std::function; void run(); // Main loop executed by Thread. Runs tasks from Queue. - void enqueueLocked(tooling::CompileCommand Cmd); + void enqueueLocked(tooling::CompileCommand Cmd, + std::shared_ptr IndexStorage); std::mutex QueueMu; unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks. std::condition_variable QueueCV; @@ -100,30 +125,6 @@ private: std::vector ThreadPool; // FIXME: Abstract this away. }; -// Handles storage and retrieval of index shards into disk. Requires Initialize -// to be called before storing or retrieval. Creates a directory called -// ".clangd-index/" under the path provided during initialize. This class is -// thread-safe. -class DiskShardStorage : public ShardStorage { - mutable std::mutex DiskShardRootMu; - llvm::SmallString<128> DiskShardRoot; - bool Initialized; - -public: - // Retrieves the shard if found and contents are consistent with the provided - // Hash. - llvm::Expected retrieveShard(llvm::StringRef ShardIdentifier, - FileDigest Hash) const; - - // Stores given shard with name ShardIdentifier under initialized directory. - bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const; - - // Initializes DiskShardRoot to (Directory + ".clangd-index/") which is the - // base directory for all shard files. After the initialization succeeds all - // subsequent calls or no-op. - bool initialize(llvm::StringRef Directory); -}; - } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp index ee8aa67dd6db..eb3a19b2cbce 100644 --- a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp @@ -79,7 +79,7 @@ TEST(BackgroundIndexTest, IndexTwoFiles) { } TEST(BackgroundIndexTest, ShardStorageTest) { - class MemoryShardStorage : public ShardStorage { + class MemoryShardStorage : public BackgroundIndexStorage { mutable std::mutex StorageMu; llvm::StringMap &Storage; size_t &CacheHits; @@ -88,7 +88,8 @@ TEST(BackgroundIndexTest, ShardStorageTest) { MemoryShardStorage(llvm::StringMap &Storage, size_t &CacheHits) : Storage(Storage), CacheHits(CacheHits) {} - bool storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const { + bool storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const override { std::lock_guard Lock(StorageMu); std::string &str = Storage[ShardIdentifier]; llvm::raw_string_ostream OS(str); @@ -96,8 +97,8 @@ TEST(BackgroundIndexTest, ShardStorageTest) { OS.flush(); return true; } - llvm::Expected retrieveShard(llvm::StringRef ShardIdentifier, - FileDigest Hash) const { + llvm::Expected + retrieveShard(llvm::StringRef ShardIdentifier) const override { std::lock_guard Lock(StorageMu); if (Storage.find(ShardIdentifier) == Storage.end()) return llvm::make_error( @@ -118,17 +119,21 @@ TEST(BackgroundIndexTest, ShardStorageTest) { )cpp"; FS.Files[testPath("root/A.cc")] = "#include \"A.h\"\nvoid g() { (void)common; }"; + llvm::StringMap Storage; size_t CacheHits = 0; + BackgroundIndexStorage::setStorageFactory( + [&Storage, &CacheHits](llvm::StringRef) { + return std::make_shared(Storage, CacheHits); + }); + tooling::CompileCommand Cmd; Cmd.Filename = testPath("root/A.cc"); Cmd.Directory = testPath("root"); Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + // Check nothing is loaded from Storage, but A.cc and A.h has been stored. { - BackgroundIndex Idx( - Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, - /*IndexShardStorage=*/ - llvm::make_unique(Storage, CacheHits)); + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}); Idx.enqueue(testPath("root"), Cmd); Idx.blockUntilIdleForTest(); } @@ -137,11 +142,9 @@ TEST(BackgroundIndexTest, ShardStorageTest) { EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end()); EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end()); + // Check A.cc has been loaded from cache. { - BackgroundIndex Idx( - Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, - /*IndexShardStorage=*/ - llvm::make_unique(Storage, CacheHits)); + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}); Idx.enqueue(testPath("root"), Cmd); Idx.blockUntilIdleForTest(); } @@ -149,7 +152,6 @@ TEST(BackgroundIndexTest, ShardStorageTest) { EXPECT_EQ(Storage.size(), 2U); EXPECT_NE(Storage.find(testPath("root/A.h")), Storage.end()); EXPECT_NE(Storage.find(testPath("root/A.cc")), Storage.end()); - // B_CC is dropped as we don't collect symbols from A.h in this compilation. } } // namespace clangd