1 /*
2  * Copyright 2006-2008 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 #ifndef ZORBA_COMPILER_VALUE_INDEX
18 #define ZORBA_COMPILER_VALUE_INDEX
19 
20 #include <vector>
21 
22 #include "common/shared_types.h"
23 
24 #include "compiler/parser/query_loc.h"
25 #include "compiler/expression/expr_utils.h"
26 
27 
28 namespace zorba
29 {
30 
31 class ExprManager;
32 class dynamic_context;
33 class DocIndexer;
34 typedef rchandle<DocIndexer> DocIndexer_t;
35 
36 
37 /******************************************************************************
38 
39   An index is a map data structure whose contents are defined by a domain expr
40   and a number of key exprs. Informally, the domain expr is evaluated first.
41   The domain expr must return a sequence of nodes. Then, for each node D in
42   the result of that expr, the key exprs are evaluated with D serving as their
43   context node. Each key expr must return a single atomic value (after atomization
44   is applied). Thus, for each domain node D, a tuple K of N atomic values is
45   constructed, and an entry mapping K to D is inserted to the index data structure.
46 
47   Class IndexDecl represents an index declaration, which describes the index
48   properties and the domain and key expressions and their types. An instance
49   of IndexDecl is constructed by the translator for each DECLARE INDEX stmt
50   that appears in an imported data module. A (index_qname --> IndexDecl obj)
51   mapping is also registered in the static context during translation.
52 
53   The DECLARE INDEX syntax is the following:
54   -------------------------------------------
55 
56   IndexDecl ::= "declare" IndexPropertyList "index" QName
57                 "on" "nodes" IndexDomainExpr "by" IndexKeyList
58 
59   IndexPropertyList := ("unique" | "non" "unique" |
60                         "value" "range" | "value" "equality" |
61                         "general" "range" | "general" "equality" |
62                         "automatically" "maintained" | "manually" "maintained")*
63 
64   IndexDomainExpr := PathExpr
65 
66   IndexKeyList := IndexKeySpec+
67 
68   IndexKeySpec := PathExpr TypeDeclaration? ("collation" UriLiteral)?
69 
70 
71   Constraints:
72   ------------
73 
74   The domain expr must be deterministic and it must not reference any variables
75   that are not defined inside the expr itself.
76 
77   After atomization, the ExprSingle in each IndexKeySpec must return at most one
78   atomic value for each value returned by the domain expression.
79 
80 
81   Index-related functions:
82   ------------------------
83 
84     create(...)
85     create-internal-index(...)
86     delete(...)
87     refresh-index(...)
88     probe-index-point(...)
89     probe-index-range(...)
90 
91   See src/functions/Index.cpp for details.
92 
93 
94   Data members:
95   -------------
96 
97   theLocation:
98   ------------
99   The query location where the index declaration appears at.
100 
101   theSctx:
102   --------
103   The root static context of the data module containing the index declaration.
104 
105   theName:
106   --------
107   The qname that identifies the index.
108 
109   theIsGeneral:
110   -------------
111 
112   theIsUnique:
113   ------------
114   Whether it is a unique index or not.
115 
116   theIsTemp:
117   ----------
118   Whether it is a temp index or not. A temp index is an index that is created
119   on-the-fly to optimize a query by converting a nested-lopp join to a hashjoin
120   (see index_join_rule.cpp). Such an index is destroyed at the end of the query
121   that created it.
122 
123   theMaintenanceMode:
124   -------------------
125   If and how to maintain the index during/after each apply-updates or not.
126 
127   theContainerKind:
128   -----------------
129   The kind if container used to implement the index. Currently, there are 2
130   kinds: a tree-based container (std::map) for ordered indexes, or hash-based
131   container for unordered indexes.
132 
133   theDomainClause:
134   ----------------
135   A FOR-clause that associates the domain expr with a FOR var that is referenced
136   by the key exprs and acts as the domain node for those exprs. (We need this
137   so that the get_domain_expr() method can work on the FOR var). If dexpr is the
138   domain expr specified in the index declaration, the actual domain expr is:
139 
140   for value indexes   : domainExpr := dexpr treat as node()*
141   for general indexes : domainExpr := check-distinct-nodes(dexpr treat as node()*)
142 
143   theKeyExprs:
144   ------------
145   The key expressions of the index. If kexpr is a key expr specified in the
146   index declaration, the actual domain expr is:
147 
148   for value indexes   : keyExpr := fn:data(kexpr) treat as typeDecl
149   for general indexes : distinct-values(fn:data(kexpr) treat as typeDecl)
150 
151   theKeyTypes:
152   ------------
153   For each key expr, this vector contains the builtin atomic item that is the
154   base of the atomic type type T specified for that key expr. For value indexes,
155   their index declaration must always specify a type T, and the item type of T
156   may not be xs:anyAtomicType or xs:untypedAtomic. For general indexes, the key
157   type declaration may be absent, or its item type may be xs:anyAtomicType or
158   xs:untypedAtomic. If absent, the associated entry in theKeyTypes will be NULL,
159   but this is treated the same as xs:anyAtomicType. The translator makes sure
160   that each key value inserted into the index matches with T according to the
161   rules of sequence type matching.
162 
163   theOrderModifiers:
164   ------------------
165 
166   theSourceNames:
167   -----------------
168   The qnames of the collections referenced in the domain and key expressions.
169   Currently, it is required that the arg given to every xqddf:collection()
170   invocation within the domain expr or any of the key exprs is a constant expr.
171   This guarantees that the set of collections that participate in the index
172   definition is stable. This is required for efficient automatic index
173   maintenance, because if the collection set for an index changes between 2
174   applyUpdates (by potentially different queries), all we can do is to always
175   rebuild the index unconditionally. Furthermore, this restriction means that
176   the collection set for an index is known at compile time. As a result, the
177   compiler may be able, if the index definition is "simple enough", to generate
178   explicit pul primitives, thus making index maintenance even more efficient.
179 
180   Notice that if we eliminate this restriction, we may still enforce via other
181   means the more general restriction that the set of collections for an index
182   is stable. For example, we may say that it is the set of collection that were
183   accessed when the index was first created. For index maintenance, we must
184   still make sure that the store knows for each index its collection set.
185   With the above restriction, this is easy, because it is the compiler who
186   gives to the store this info. Without the restriction, the runtime must
187   determine the collection set of an index as the index is being created, and
188   then give this info to the store.
189 
190   theDomainSourceExprs:
191   ---------------------
192   A vector containing pointers to the xqddf:collection() exprs within the
193   domain expr.
194 
195   theBuildExpr:
196   -------------
197   The expr that computes the index entries. It is used to create or rebuild
198   the full index from scratch. The expr is a flwor expr of the following form,
199   for value and general indexes, respectively:
200 
201   for $$dot at $$pos in domainExpr
202   return value-index-entry-builder($$dot, fieldExpr1, ..., fieldExprN);
203 
204   for $$dot at $$pos in domainExpr
205   return general-index-entry-builder($$dot, fieldExpr);
206 
207   theBuildPlan:
208   -------------
209   The runtime plan corresponding to theBuildExpr. During runtime (see
210   CreateIndexIterator and RebuildIndexIterator), this plan is wrapper into a
211   PlanWrapper and then passed to the store, which uses it to create or
212   re-build the full index.
213 
214   theDocIndexerExpr:
215   ------------------
216   This is an expr that builds the index over a single xml tree, which is
217   provided as an external var to this expr. It is basically the same as
218   theBuildExpr except that a reference to a collection inside the domainExpr
219   is replaced with a reference to an external var that can be bound dynamically
220   to some xml tree.
221 
222   theDocIndexerVar:
223   -----------------
224   The external var appearing in theDocIndexerExpr.
225 
226   theDocIndexer:
227   --------------
228   If the index domain expr is a map over a collection C, then theDocIndexer obj
229   is not NULL and it provides a method that takes as input a document D in C
230   and produces all the index entries for D. theDocIndexer obj is passed to the
231   store during an apply-updates, so that the store can obtain the index entries
232   corresponding to a document that is being updated within collection C, and
233   using these entries, maintain the index appropriately.
234 ********************************************************************************/
235 class IndexDecl : public SimpleRCObject
236 {
237 public:
238   typedef enum
239   {
240     HASH,
241     TREE
242   } ContainerKind;
243 
244   typedef enum
245   {
246     MANUAL,
247     REBUILD,
248     DOC_MAP
249   } MaintenanceMode;
250 
251 
252 private:
253   CompilerCB                    * theCCB;
254 
255   QueryLoc                        theLocation;
256 
257   static_context                * theSctx;
258 
259   store::Item_t                   theName;
260 
261   bool                            theIsGeneral;
262   bool                            theIsUnique;
263   bool                            theIsTemp;
264   MaintenanceMode                 theMaintenanceMode;
265   ContainerKind                   theContainerKind;
266 
267   for_clause                    * theDomainClause;
268   expr                          * theDomainExpr;
269   var_expr                      * theDomainVar;
270   var_expr                      * theDomainPosVar;
271 
272   csize                           theNumKeyExprs;
273   std::vector<expr*>              theKeyExprs;
274   std::vector<xqtref_t>           theKeyTypes;
275   std::vector<OrderModifier>      theOrderModifiers;
276 
277   std::vector<store::Item*>       theSourceNames;
278 
279   std::vector<expr*>              theDomainSourceExprs;
280 
281   expr                          * theBuildExpr;
282   PlanIter_t                      theBuildPlan;
283 
284   expr                          * theDocIndexerExpr;
285   PlanIter_t                      theDocIndexerPlan;
286   DocIndexer_t                    theDocIndexer;
287 
288 public:
289   SERIALIZABLE_CLASS(IndexDecl)
290   IndexDecl(::zorba::serialization::Archiver& ar);
291   void serialize(::zorba::serialization::Archiver& ar);
292 
293 public:
294   IndexDecl(
295       static_context* sctx,
296       CompilerCB* ccb,
297       const QueryLoc& loc,
298       const store::Item_t& name);
299 
300   ~IndexDecl();
301 
getSctx()302   static_context* getSctx() const { return theSctx; }
303 
304   store::Item* getName() const;
305 
isGeneral()306   bool isGeneral() const { return theIsGeneral; }
307 
setGeneral(bool gen)308   void setGeneral(bool gen) { theIsGeneral = gen; }
309 
getUnique()310   bool getUnique() const { return theIsUnique; }
311 
setUnique(bool unique)312   void setUnique(bool unique) { theIsUnique = unique; }
313 
isTemp()314   bool isTemp() const { return theIsTemp; }
315 
setTemp(bool tmp)316   void setTemp(bool tmp) { theIsTemp = tmp; }
317 
getMaintenanceMode()318   MaintenanceMode getMaintenanceMode() const { return theMaintenanceMode; }
319 
setMaintenanceMode(MaintenanceMode v)320   void setMaintenanceMode(MaintenanceMode v) { theMaintenanceMode = v; }
321 
isAutomatic()322   bool isAutomatic() const { return theMaintenanceMode != MANUAL; }
323 
getMethod()324   ContainerKind getMethod() const { return theContainerKind; }
325 
setMethod(ContainerKind kind)326   void setMethod(ContainerKind kind) { theContainerKind = kind; }
327 
isOrdered()328   bool isOrdered() const { return theContainerKind == TREE; }
329 
330   expr* getDomainExpr() const;
331 
332   void setDomainExpr(expr* domainExpr);
333 
334   var_expr* getDomainVariable() const;
335 
336   void setDomainVariable(var_expr* domainVar);
337 
338   var_expr* getDomainPositionVariable() const;
339 
340   void setDomainPositionVariable(var_expr* domainPosVar);
341 
getNumKeyExprs()342   csize getNumKeyExprs() const { return theNumKeyExprs; }
343 
344   void setKeyExpressions(const std::vector<expr*>& keyExprs);
345 
346   const std::vector<xqtref_t>& getKeyTypes() const;
347 
348   void setKeyTypes(const std::vector<xqtref_t>& keyTypes);
349 
350   const std::vector<OrderModifier>& getOrderModifiers() const;
351 
352   void setOrderModifiers(const std::vector<OrderModifier>& modifiers);
353 
numSources()354   csize numSources() const { return theSourceNames.size(); }
355 
getSourceName(csize i)356   const store::Item* getSourceName(csize i) const { return theSourceNames[i]; }
357 
getDomainSourceExpr(csize i)358   expr* getDomainSourceExpr(csize i) const { return theDomainSourceExprs[i]; }
359 
360   void analyze(CompilerCB* ccb);
361 
362   expr* getBuildExpr(CompilerCB* ccb, const QueryLoc& loc);
363 
364   PlanIterator* getBuildPlan(CompilerCB* ccb, const QueryLoc& loc);
365 
366   DocIndexer* getDocIndexer(CompilerCB* ccb, const QueryLoc& loc);
367 
368   std::string toString();
369 
370 private:
371   void analyzeExprInternal(
372         expr* e,
373         std::vector</*const */store::Item*>& sourceNames,
374         std::vector<expr*>& sourceExprs,
375         std::vector<var_expr*>& varExprs,
376         expr* dotVar);
377 };
378 
379 
380 typedef rchandle<IndexDecl> IndexDecl_t;
381 
382 }
383 
384 #endif
385 
386 
387 /*
388  * Local variables:
389  * mode: c++
390  * End:
391  */
392 /* vim:set et sw=2 ts=2: */
393