240 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			240 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===--- Symbol.h ------------------------------------------------*- C++-*-===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
 | |
| #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
 | |
| 
 | |
| #include "SymbolID.h"
 | |
| #include "SymbolLocation.h"
 | |
| #include "SymbolOrigin.h"
 | |
| #include "clang/Index/IndexSymbol.h"
 | |
| #include "llvm/ADT/StringRef.h"
 | |
| #include "llvm/Support/StringSaver.h"
 | |
| 
 | |
| namespace clang {
 | |
| namespace clangd {
 | |
| 
 | |
| /// The class presents a C++ symbol, e.g. class, function.
 | |
| ///
 | |
| /// WARNING: Symbols do not own much of their underlying data - typically
 | |
| /// strings are owned by a SymbolSlab. They should be treated as non-owning
 | |
| /// references. Copies are shallow.
 | |
| ///
 | |
| /// When adding new unowned data fields to Symbol, remember to update:
 | |
| ///   - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
 | |
| ///   - mergeSymbol in Merge.cpp, to properly combine two Symbols.
 | |
| ///
 | |
| /// A fully documented symbol can be split as:
 | |
| /// size_type std::map<k, t>::count(const K& key) const
 | |
| /// | Return  |     Scope     |Name|    Signature     |
 | |
| /// We split up these components to allow display flexibility later.
 | |
| struct Symbol {
 | |
|   /// The ID of the symbol.
 | |
|   SymbolID ID;
 | |
|   /// The symbol information, like symbol kind.
 | |
|   index::SymbolInfo SymInfo = index::SymbolInfo();
 | |
|   /// The unqualified name of the symbol, e.g. "bar" (for ns::bar).
 | |
|   llvm::StringRef Name;
 | |
|   /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
 | |
|   llvm::StringRef Scope;
 | |
|   /// The location of the symbol's definition, if one was found.
 | |
|   /// This just covers the symbol name (e.g. without class/function body).
 | |
|   SymbolLocation Definition;
 | |
|   /// The location of the preferred declaration of the symbol.
 | |
|   /// This just covers the symbol name.
 | |
|   /// This may be the same as Definition.
 | |
|   ///
 | |
|   /// A C++ symbol may have multiple declarations, and we pick one to prefer.
 | |
|   ///   * For classes, the canonical declaration should be the definition.
 | |
|   ///   * For non-inline functions, the canonical declaration typically appears
 | |
|   ///     in the ".h" file corresponding to the definition.
 | |
|   SymbolLocation CanonicalDeclaration;
 | |
|   /// The number of translation units that reference this symbol from their main
 | |
|   /// file. This number is only meaningful if aggregated in an index.
 | |
|   unsigned References = 0;
 | |
|   /// Where this symbol came from. Usually an index provides a constant value.
 | |
|   SymbolOrigin Origin = SymbolOrigin::Unknown;
 | |
|   /// A brief description of the symbol that can be appended in the completion
 | |
|   /// candidate list. For example, "(X x, Y y) const" is a function signature.
 | |
|   /// Only set when the symbol is indexed for completion.
 | |
|   llvm::StringRef Signature;
 | |
|   /// Argument list in human-readable format, will be displayed to help
 | |
|   /// disambiguate between different specializations of a template. Empty for
 | |
|   /// non-specializations. Example: "<int, bool, 3>"
 | |
|   llvm::StringRef TemplateSpecializationArgs;
 | |
|   /// What to insert when completing this symbol, after the symbol name.
 | |
|   /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
 | |
|   /// (When snippets are disabled, the symbol name alone is used).
 | |
|   /// Only set when the symbol is indexed for completion.
 | |
|   llvm::StringRef CompletionSnippetSuffix;
 | |
|   /// Documentation including comment for the symbol declaration.
 | |
|   llvm::StringRef Documentation;
 | |
|   /// Type when this symbol is used in an expression. (Short display form).
 | |
|   /// e.g. return type of a function, or type of a variable.
 | |
|   /// Only set when the symbol is indexed for completion.
 | |
|   llvm::StringRef ReturnType;
 | |
| 
 | |
|   /// Raw representation of the OpaqueType of the symbol, used for scoring
 | |
|   /// purposes.
 | |
|   /// Only set when the symbol is indexed for completion.
 | |
|   llvm::StringRef Type;
 | |
| 
 | |
|   struct IncludeHeaderWithReferences {
 | |
|     IncludeHeaderWithReferences() = default;
 | |
| 
 | |
|     IncludeHeaderWithReferences(llvm::StringRef IncludeHeader,
 | |
|                                 unsigned References)
 | |
|         : IncludeHeader(IncludeHeader), References(References) {}
 | |
| 
 | |
|     /// This can be either a URI of the header to be #include'd
 | |
|     /// for this symbol, or a literal header quoted with <> or "" that is
 | |
|     /// suitable to be included directly. When it is a URI, the exact #include
 | |
|     /// path needs to be calculated according to the URI scheme.
 | |
|     ///
 | |
|     /// Note that the include header is a canonical include for the symbol and
 | |
|     /// can be different from FileURI in the CanonicalDeclaration.
 | |
|     llvm::StringRef IncludeHeader = "";
 | |
|     /// The number of translation units that reference this symbol and include
 | |
|     /// this header. This number is only meaningful if aggregated in an index.
 | |
|     unsigned References = 0;
 | |
|   };
 | |
|   /// One Symbol can potentially be included via different headers.
 | |
|   ///   - If we haven't seen a definition, this covers all declarations.
 | |
|   ///   - If we have seen a definition, this covers declarations visible from
 | |
|   ///   any definition.
 | |
|   /// Only set when the symbol is indexed for completion.
 | |
|   llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders;
 | |
| 
 | |
|   enum SymbolFlag : uint8_t {
 | |
|     None = 0,
 | |
|     /// Whether or not this symbol is meant to be used for the code completion.
 | |
|     /// See also isIndexedForCodeCompletion().
 | |
|     /// Note that we don't store completion information (signature, snippet,
 | |
|     /// type, includes) if the symbol is not indexed for code completion.
 | |
|     IndexedForCodeCompletion = 1 << 0,
 | |
|     /// Indicates if the symbol is deprecated.
 | |
|     Deprecated = 1 << 1,
 | |
|     /// Symbol is an implementation detail.
 | |
|     ImplementationDetail = 1 << 2,
 | |
|     /// Symbol is visible to other files (not e.g. a static helper function).
 | |
|     VisibleOutsideFile = 1 << 3,
 | |
|   };
 | |
| 
 | |
|   SymbolFlag Flags = SymbolFlag::None;
 | |
|   /// FIXME: also add deprecation message and fixit?
 | |
| };
 | |
| 
 | |
| inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A,
 | |
|                                     Symbol::SymbolFlag B) {
 | |
|   return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) |
 | |
|                                          static_cast<uint8_t>(B));
 | |
| }
 | |
| inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A,
 | |
|                                       Symbol::SymbolFlag B) {
 | |
|   return A = A | B;
 | |
| }
 | |
| 
 | |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
 | |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag);
 | |
| 
 | |
| /// Invokes Callback with each StringRef& contained in the Symbol.
 | |
| /// Useful for deduplicating backing strings.
 | |
| template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
 | |
|   CB(S.Name);
 | |
|   CB(S.Scope);
 | |
|   CB(S.TemplateSpecializationArgs);
 | |
|   CB(S.Signature);
 | |
|   CB(S.CompletionSnippetSuffix);
 | |
|   CB(S.Documentation);
 | |
|   CB(S.ReturnType);
 | |
|   CB(S.Type);
 | |
|   auto RawCharPointerCB = [&CB](const char *&P) {
 | |
|     llvm::StringRef S(P);
 | |
|     CB(S);
 | |
|     assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated");
 | |
|     P = S.data();
 | |
|   };
 | |
|   RawCharPointerCB(S.CanonicalDeclaration.FileURI);
 | |
|   RawCharPointerCB(S.Definition.FileURI);
 | |
| 
 | |
|   for (auto &Include : S.IncludeHeaders)
 | |
|     CB(Include.IncludeHeader);
 | |
| }
 | |
| 
 | |
| /// Computes query-independent quality score for a Symbol.
 | |
| /// This currently falls in the range [1, ln(#indexed documents)].
 | |
| /// FIXME: this should probably be split into symbol -> signals
 | |
| ///        and signals -> score, so it can be reused for Sema completions.
 | |
| float quality(const Symbol &S);
 | |
| 
 | |
| /// An immutable symbol container that stores a set of symbols.
 | |
| /// The container will maintain the lifetime of the symbols.
 | |
| class SymbolSlab {
 | |
| public:
 | |
|   using const_iterator = std::vector<Symbol>::const_iterator;
 | |
|   using iterator = const_iterator;
 | |
|   using value_type = Symbol;
 | |
| 
 | |
|   SymbolSlab() = default;
 | |
| 
 | |
|   const_iterator begin() const { return Symbols.begin(); }
 | |
|   const_iterator end() const { return Symbols.end(); }
 | |
|   const_iterator find(const SymbolID &SymID) const;
 | |
| 
 | |
|   using size_type = size_t;
 | |
|   size_type size() const { return Symbols.size(); }
 | |
|   bool empty() const { return Symbols.empty(); }
 | |
|   // Estimates the total memory usage.
 | |
|   size_t bytes() const {
 | |
|     return sizeof(*this) + Arena.getTotalMemory() +
 | |
|            Symbols.capacity() * sizeof(Symbol);
 | |
|   }
 | |
| 
 | |
|   /// SymbolSlab::Builder is a mutable container that can 'freeze' to
 | |
|   /// SymbolSlab. The frozen SymbolSlab will use less memory.
 | |
|   class Builder {
 | |
|   public:
 | |
|     Builder() : UniqueStrings(Arena) {}
 | |
| 
 | |
|     /// Adds a symbol, overwriting any existing one with the same ID.
 | |
|     /// This is a deep copy: underlying strings will be owned by the slab.
 | |
|     void insert(const Symbol &S);
 | |
| 
 | |
|     /// Removes the symbol with an ID, if it exists.
 | |
|     void erase(const SymbolID &ID) { Symbols.erase(ID); }
 | |
| 
 | |
|     /// Returns the symbol with an ID, if it exists. Valid until insert/remove.
 | |
|     const Symbol *find(const SymbolID &ID) {
 | |
|       auto I = Symbols.find(ID);
 | |
|       return I == Symbols.end() ? nullptr : &I->second;
 | |
|     }
 | |
| 
 | |
|     /// Consumes the builder to finalize the slab.
 | |
|     SymbolSlab build() &&;
 | |
| 
 | |
|   private:
 | |
|     llvm::BumpPtrAllocator Arena;
 | |
|     /// Intern table for strings. Contents are on the arena.
 | |
|     llvm::UniqueStringSaver UniqueStrings;
 | |
|     /// Values are indices into Symbols vector.
 | |
|     llvm::DenseMap<SymbolID, Symbol> Symbols;
 | |
|   };
 | |
| 
 | |
| private:
 | |
|   SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
 | |
|       : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
 | |
| 
 | |
|   llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
 | |
|   std::vector<Symbol> Symbols;  // Sorted by SymbolID to allow lookup.
 | |
| };
 | |
| 
 | |
| } // namespace clangd
 | |
| } // namespace clang
 | |
| 
 | |
| #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
 |