1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 
23 #include "Iterator.h"
24 #include "PostingList.h"
25 #include "Token.h"
26 #include "Trigram.h"
27 #include "index/Index.h"
28 #include "index/MemIndex.h"
29 #include "index/Relation.h"
30 #include "index/SymbolCollector.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace dex {
35 
36 /// In-memory Dex trigram-based index implementation.
37 // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
38 // index so that it can be loaded from the disk. Since static index is not
39 // changed frequently, it's safe to assume that it has to be built only once
40 // (when the clangd process starts). Therefore, it can be easier to store built
41 // index on disk and then load it if available.
42 class Dex : public SymbolIndex {
43 public:
44   // All data must outlive this index.
45   template <typename SymbolRange, typename RefsRange, typename RelationsRange>
Dex(SymbolRange && Symbols,RefsRange && Refs,RelationsRange && Relations)46   Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
47       : Corpus(0) {
48     for (auto &&Sym : Symbols)
49       this->Symbols.push_back(&Sym);
50     for (auto &&Ref : Refs)
51       this->Refs.try_emplace(Ref.first, Ref.second);
52     for (auto &&Rel : Relations)
53       this->Relations[std::make_pair(Rel.Subject,
54                                      static_cast<uint8_t>(Rel.Predicate))]
55           .push_back(Rel.Object);
56     buildIndex();
57   }
58   // Symbols and Refs are owned by BackingData, Index takes ownership.
59   template <typename SymbolRange, typename RefsRange, typename RelationsRange,
60             typename Payload>
Dex(SymbolRange && Symbols,RefsRange && Refs,RelationsRange && Relations,Payload && BackingData,size_t BackingDataSize)61   Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
62       Payload &&BackingData, size_t BackingDataSize)
63       : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
64             std::forward<RelationsRange>(Relations)) {
65     KeepAlive = std::shared_ptr<void>(
66         std::make_shared<Payload>(std::move(BackingData)), nullptr);
67     this->BackingDataSize = BackingDataSize;
68   }
69 
70   /// Builds an index from slabs. The index takes ownership of the slab.
71   static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
72 
73   bool
74   fuzzyFind(const FuzzyFindRequest &Req,
75             llvm::function_ref<void(const Symbol &)> Callback) const override;
76 
77   void lookup(const LookupRequest &Req,
78               llvm::function_ref<void(const Symbol &)> Callback) const override;
79 
80   bool refs(const RefsRequest &Req,
81             llvm::function_ref<void(const Ref &)> Callback) const override;
82 
83   void relations(const RelationsRequest &Req,
84                  llvm::function_ref<void(const SymbolID &, const Symbol &)>
85                      Callback) const override;
86 
87   size_t estimateMemoryUsage() const override;
88 
89 private:
90   void buildIndex();
91   std::unique_ptr<Iterator> iterator(const Token &Tok) const;
92   std::unique_ptr<Iterator>
93   createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
94   std::unique_ptr<Iterator>
95   createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
96 
97   /// Stores symbols sorted in the descending order of symbol quality..
98   std::vector<const Symbol *> Symbols;
99   /// SymbolQuality[I] is the quality of Symbols[I].
100   std::vector<float> SymbolQuality;
101   llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
102   /// Inverted index is a mapping from the search token to the posting list,
103   /// which contains all items which can be characterized by such search token.
104   /// For example, if the search token is scope "std::", the corresponding
105   /// posting list would contain all indices of symbols defined in namespace
106   /// std. Inverted index is used to retrieve posting lists which are processed
107   /// during the fuzzyFind process.
108   llvm::DenseMap<Token, PostingList> InvertedIndex;
109   dex::Corpus Corpus;
110   llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
111   static_assert(sizeof(RelationKind) == sizeof(uint8_t),
112                 "RelationKind should be of same size as a uint8_t");
113   llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
114   std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
115   // Size of memory retained by KeepAlive.
116   size_t BackingDataSize = 0;
117 };
118 
119 /// Returns Search Token for a number of parent directories of given Path.
120 /// Should be used within the index build process.
121 ///
122 /// This function is exposed for testing only.
123 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
124 
125 } // namespace dex
126 } // namespace clangd
127 } // namespace clang
128 
129 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
130