1 /* 2 * Copyright 2006-2008 The FLWOR Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 #ifndef ZORBA_COMPILER_VALUE_INDEX 18 #define ZORBA_COMPILER_VALUE_INDEX 19 20 #include <vector> 21 22 #include "common/shared_types.h" 23 24 #include "compiler/parser/query_loc.h" 25 #include "compiler/expression/expr_utils.h" 26 27 28 namespace zorba 29 { 30 31 class ExprManager; 32 class dynamic_context; 33 class DocIndexer; 34 typedef rchandle<DocIndexer> DocIndexer_t; 35 36 37 /****************************************************************************** 38 39 An index is a map data structure whose contents are defined by a domain expr 40 and a number of key exprs. Informally, the domain expr is evaluated first. 41 The domain expr must return a sequence of nodes. Then, for each node D in 42 the result of that expr, the key exprs are evaluated with D serving as their 43 context node. Each key expr must return a single atomic value (after atomization 44 is applied). Thus, for each domain node D, a tuple K of N atomic values is 45 constructed, and an entry mapping K to D is inserted to the index data structure. 46 47 Class IndexDecl represents an index declaration, which describes the index 48 properties and the domain and key expressions and their types. An instance 49 of IndexDecl is constructed by the translator for each DECLARE INDEX stmt 50 that appears in an imported data module. A (index_qname --> IndexDecl obj) 51 mapping is also registered in the static context during translation. 52 53 The DECLARE INDEX syntax is the following: 54 ------------------------------------------- 55 56 IndexDecl ::= "declare" IndexPropertyList "index" QName 57 "on" "nodes" IndexDomainExpr "by" IndexKeyList 58 59 IndexPropertyList := ("unique" | "non" "unique" | 60 "value" "range" | "value" "equality" | 61 "general" "range" | "general" "equality" | 62 "automatically" "maintained" | "manually" "maintained")* 63 64 IndexDomainExpr := PathExpr 65 66 IndexKeyList := IndexKeySpec+ 67 68 IndexKeySpec := PathExpr TypeDeclaration? ("collation" UriLiteral)? 69 70 71 Constraints: 72 ------------ 73 74 The domain expr must be deterministic and it must not reference any variables 75 that are not defined inside the expr itself. 76 77 After atomization, the ExprSingle in each IndexKeySpec must return at most one 78 atomic value for each value returned by the domain expression. 79 80 81 Index-related functions: 82 ------------------------ 83 84 create(...) 85 create-internal-index(...) 86 delete(...) 87 refresh-index(...) 88 probe-index-point(...) 89 probe-index-range(...) 90 91 See src/functions/Index.cpp for details. 92 93 94 Data members: 95 ------------- 96 97 theLocation: 98 ------------ 99 The query location where the index declaration appears at. 100 101 theSctx: 102 -------- 103 The root static context of the data module containing the index declaration. 104 105 theName: 106 -------- 107 The qname that identifies the index. 108 109 theIsGeneral: 110 ------------- 111 112 theIsUnique: 113 ------------ 114 Whether it is a unique index or not. 115 116 theIsTemp: 117 ---------- 118 Whether it is a temp index or not. A temp index is an index that is created 119 on-the-fly to optimize a query by converting a nested-lopp join to a hashjoin 120 (see index_join_rule.cpp). Such an index is destroyed at the end of the query 121 that created it. 122 123 theMaintenanceMode: 124 ------------------- 125 If and how to maintain the index during/after each apply-updates or not. 126 127 theContainerKind: 128 ----------------- 129 The kind if container used to implement the index. Currently, there are 2 130 kinds: a tree-based container (std::map) for ordered indexes, or hash-based 131 container for unordered indexes. 132 133 theDomainClause: 134 ---------------- 135 A FOR-clause that associates the domain expr with a FOR var that is referenced 136 by the key exprs and acts as the domain node for those exprs. (We need this 137 so that the get_domain_expr() method can work on the FOR var). If dexpr is the 138 domain expr specified in the index declaration, the actual domain expr is: 139 140 for value indexes : domainExpr := dexpr treat as node()* 141 for general indexes : domainExpr := check-distinct-nodes(dexpr treat as node()*) 142 143 theKeyExprs: 144 ------------ 145 The key expressions of the index. If kexpr is a key expr specified in the 146 index declaration, the actual domain expr is: 147 148 for value indexes : keyExpr := fn:data(kexpr) treat as typeDecl 149 for general indexes : distinct-values(fn:data(kexpr) treat as typeDecl) 150 151 theKeyTypes: 152 ------------ 153 For each key expr, this vector contains the builtin atomic item that is the 154 base of the atomic type type T specified for that key expr. For value indexes, 155 their index declaration must always specify a type T, and the item type of T 156 may not be xs:anyAtomicType or xs:untypedAtomic. For general indexes, the key 157 type declaration may be absent, or its item type may be xs:anyAtomicType or 158 xs:untypedAtomic. If absent, the associated entry in theKeyTypes will be NULL, 159 but this is treated the same as xs:anyAtomicType. The translator makes sure 160 that each key value inserted into the index matches with T according to the 161 rules of sequence type matching. 162 163 theOrderModifiers: 164 ------------------ 165 166 theSourceNames: 167 ----------------- 168 The qnames of the collections referenced in the domain and key expressions. 169 Currently, it is required that the arg given to every xqddf:collection() 170 invocation within the domain expr or any of the key exprs is a constant expr. 171 This guarantees that the set of collections that participate in the index 172 definition is stable. This is required for efficient automatic index 173 maintenance, because if the collection set for an index changes between 2 174 applyUpdates (by potentially different queries), all we can do is to always 175 rebuild the index unconditionally. Furthermore, this restriction means that 176 the collection set for an index is known at compile time. As a result, the 177 compiler may be able, if the index definition is "simple enough", to generate 178 explicit pul primitives, thus making index maintenance even more efficient. 179 180 Notice that if we eliminate this restriction, we may still enforce via other 181 means the more general restriction that the set of collections for an index 182 is stable. For example, we may say that it is the set of collection that were 183 accessed when the index was first created. For index maintenance, we must 184 still make sure that the store knows for each index its collection set. 185 With the above restriction, this is easy, because it is the compiler who 186 gives to the store this info. Without the restriction, the runtime must 187 determine the collection set of an index as the index is being created, and 188 then give this info to the store. 189 190 theDomainSourceExprs: 191 --------------------- 192 A vector containing pointers to the xqddf:collection() exprs within the 193 domain expr. 194 195 theBuildExpr: 196 ------------- 197 The expr that computes the index entries. It is used to create or rebuild 198 the full index from scratch. The expr is a flwor expr of the following form, 199 for value and general indexes, respectively: 200 201 for $$dot at $$pos in domainExpr 202 return value-index-entry-builder($$dot, fieldExpr1, ..., fieldExprN); 203 204 for $$dot at $$pos in domainExpr 205 return general-index-entry-builder($$dot, fieldExpr); 206 207 theBuildPlan: 208 ------------- 209 The runtime plan corresponding to theBuildExpr. During runtime (see 210 CreateIndexIterator and RebuildIndexIterator), this plan is wrapper into a 211 PlanWrapper and then passed to the store, which uses it to create or 212 re-build the full index. 213 214 theDocIndexerExpr: 215 ------------------ 216 This is an expr that builds the index over a single xml tree, which is 217 provided as an external var to this expr. It is basically the same as 218 theBuildExpr except that a reference to a collection inside the domainExpr 219 is replaced with a reference to an external var that can be bound dynamically 220 to some xml tree. 221 222 theDocIndexerVar: 223 ----------------- 224 The external var appearing in theDocIndexerExpr. 225 226 theDocIndexer: 227 -------------- 228 If the index domain expr is a map over a collection C, then theDocIndexer obj 229 is not NULL and it provides a method that takes as input a document D in C 230 and produces all the index entries for D. theDocIndexer obj is passed to the 231 store during an apply-updates, so that the store can obtain the index entries 232 corresponding to a document that is being updated within collection C, and 233 using these entries, maintain the index appropriately. 234 ********************************************************************************/ 235 class IndexDecl : public SimpleRCObject 236 { 237 public: 238 typedef enum 239 { 240 HASH, 241 TREE 242 } ContainerKind; 243 244 typedef enum 245 { 246 MANUAL, 247 REBUILD, 248 DOC_MAP 249 } MaintenanceMode; 250 251 252 private: 253 CompilerCB * theCCB; 254 255 QueryLoc theLocation; 256 257 static_context * theSctx; 258 259 store::Item_t theName; 260 261 bool theIsGeneral; 262 bool theIsUnique; 263 bool theIsTemp; 264 MaintenanceMode theMaintenanceMode; 265 ContainerKind theContainerKind; 266 267 for_clause * theDomainClause; 268 expr * theDomainExpr; 269 var_expr * theDomainVar; 270 var_expr * theDomainPosVar; 271 272 csize theNumKeyExprs; 273 std::vector<expr*> theKeyExprs; 274 std::vector<xqtref_t> theKeyTypes; 275 std::vector<OrderModifier> theOrderModifiers; 276 277 std::vector<store::Item*> theSourceNames; 278 279 std::vector<expr*> theDomainSourceExprs; 280 281 expr * theBuildExpr; 282 PlanIter_t theBuildPlan; 283 284 expr * theDocIndexerExpr; 285 PlanIter_t theDocIndexerPlan; 286 DocIndexer_t theDocIndexer; 287 288 public: 289 SERIALIZABLE_CLASS(IndexDecl) 290 IndexDecl(::zorba::serialization::Archiver& ar); 291 void serialize(::zorba::serialization::Archiver& ar); 292 293 public: 294 IndexDecl( 295 static_context* sctx, 296 CompilerCB* ccb, 297 const QueryLoc& loc, 298 const store::Item_t& name); 299 300 ~IndexDecl(); 301 getSctx()302 static_context* getSctx() const { return theSctx; } 303 304 store::Item* getName() const; 305 isGeneral()306 bool isGeneral() const { return theIsGeneral; } 307 setGeneral(bool gen)308 void setGeneral(bool gen) { theIsGeneral = gen; } 309 getUnique()310 bool getUnique() const { return theIsUnique; } 311 setUnique(bool unique)312 void setUnique(bool unique) { theIsUnique = unique; } 313 isTemp()314 bool isTemp() const { return theIsTemp; } 315 setTemp(bool tmp)316 void setTemp(bool tmp) { theIsTemp = tmp; } 317 getMaintenanceMode()318 MaintenanceMode getMaintenanceMode() const { return theMaintenanceMode; } 319 setMaintenanceMode(MaintenanceMode v)320 void setMaintenanceMode(MaintenanceMode v) { theMaintenanceMode = v; } 321 isAutomatic()322 bool isAutomatic() const { return theMaintenanceMode != MANUAL; } 323 getMethod()324 ContainerKind getMethod() const { return theContainerKind; } 325 setMethod(ContainerKind kind)326 void setMethod(ContainerKind kind) { theContainerKind = kind; } 327 isOrdered()328 bool isOrdered() const { return theContainerKind == TREE; } 329 330 expr* getDomainExpr() const; 331 332 void setDomainExpr(expr* domainExpr); 333 334 var_expr* getDomainVariable() const; 335 336 void setDomainVariable(var_expr* domainVar); 337 338 var_expr* getDomainPositionVariable() const; 339 340 void setDomainPositionVariable(var_expr* domainPosVar); 341 getNumKeyExprs()342 csize getNumKeyExprs() const { return theNumKeyExprs; } 343 344 void setKeyExpressions(const std::vector<expr*>& keyExprs); 345 346 const std::vector<xqtref_t>& getKeyTypes() const; 347 348 void setKeyTypes(const std::vector<xqtref_t>& keyTypes); 349 350 const std::vector<OrderModifier>& getOrderModifiers() const; 351 352 void setOrderModifiers(const std::vector<OrderModifier>& modifiers); 353 numSources()354 csize numSources() const { return theSourceNames.size(); } 355 getSourceName(csize i)356 const store::Item* getSourceName(csize i) const { return theSourceNames[i]; } 357 getDomainSourceExpr(csize i)358 expr* getDomainSourceExpr(csize i) const { return theDomainSourceExprs[i]; } 359 360 void analyze(CompilerCB* ccb); 361 362 expr* getBuildExpr(CompilerCB* ccb, const QueryLoc& loc); 363 364 PlanIterator* getBuildPlan(CompilerCB* ccb, const QueryLoc& loc); 365 366 DocIndexer* getDocIndexer(CompilerCB* ccb, const QueryLoc& loc); 367 368 std::string toString(); 369 370 private: 371 void analyzeExprInternal( 372 expr* e, 373 std::vector</*const */store::Item*>& sourceNames, 374 std::vector<expr*>& sourceExprs, 375 std::vector<var_expr*>& varExprs, 376 expr* dotVar); 377 }; 378 379 380 typedef rchandle<IndexDecl> IndexDecl_t; 381 382 } 383 384 #endif 385 386 387 /* 388 * Local variables: 389 * mode: c++ 390 * End: 391 */ 392 /* vim:set et sw=2 ts=2: */ 393