1 /* 2 * Copyright 2006-2008 The FLWOR Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef ZORBA_STORE_INDEX 17 #define ZORBA_STORE_INDEX 18 19 #include "store/api/shared_types.h" 20 #include "store/util/item_vector.h" 21 22 namespace zorba 23 { 24 25 class XQPCollator; 26 27 28 namespace store 29 { 30 31 class IndexSpecification; 32 class IndexEntryCreator; 33 class Iterator; 34 35 typedef rchandle<IndexEntryCreator> IndexEntryCreator_t; 36 37 38 /***************************************************************************//** 39 Specification for creating a value or general index. 40 41 theNumKeyColumns: 42 ----------------- 43 The number of columns in each key. 44 45 theKeyTypes: 46 ------------ 47 The data types of the key columns. Each type must be a builtin atomic type, 48 and for sorted indices, it must have an ordering. 49 50 theCollations: 51 -------------- 52 The names (uris) of the collations to use when comparing the string columns 53 of two keys. The size of this vector is equal to theNumKeyColumns; if a type 54 of a key column is not string, the associated entry in theCollations is the 55 empty string. 56 57 theTimezone: 58 ------------ 59 The timezone is needed to compare date/time key values. 60 61 theIsGeneral: 62 ------------- 63 Whether the index is "general" or not. 64 65 theIsUnique: 66 ------------ 67 Whether the index is unique, i.e., there is exactly one value associated with 68 each key. 69 70 theIsSorted: 71 ------------ 72 Whether the index is sorted by its key values or not. 73 74 theIsTemp: 75 ---------- 76 Whether the index is temporary or not. 77 78 theIsThreadSafe: 79 ---------------- 80 Whether the index can be shared among multiple threads or not. 81 82 theIsAutomatic: 83 --------------- 84 Whether the index must be maintained during/after each apply- updates or not. 85 86 theSources: 87 ----------- 88 The qnames of the collections accessed by the defining exprs of this index. 89 ********************************************************************************/ 90 class IndexSpecification 91 { 92 public: 93 csize theNumKeyColumns; 94 std::vector<store::Item_t> theKeyTypes; 95 std::vector<std::string> theCollations; 96 long theTimezone; 97 98 bool theIsGeneral; 99 bool theIsUnique; 100 bool theIsSorted; 101 bool theIsTemp; 102 bool theIsThreadSafe; 103 bool theIsAutomatic; 104 105 std::vector<store::Item_t> theSources; 106 107 public: IndexSpecification()108 IndexSpecification() 109 : 110 theNumKeyColumns(0), 111 theTimezone(0), 112 theIsGeneral(false), 113 theIsUnique(false), 114 theIsSorted(false), 115 theIsTemp(false), 116 theIsThreadSafe(false) 117 { 118 } 119 clear()120 void clear() 121 { 122 theNumKeyColumns = 0; 123 theKeyTypes.clear(); 124 theCollations.clear(); 125 theTimezone = 0; 126 theIsGeneral = theIsUnique = theIsSorted = theIsTemp = theIsThreadSafe = false; 127 } 128 resize(csize numColumns)129 void resize(csize numColumns) 130 { 131 theNumKeyColumns = numColumns; 132 theKeyTypes.resize(numColumns); 133 theCollations.resize(numColumns); 134 } 135 getNumColumns()136 csize getNumColumns() const { return theNumKeyColumns; } 137 getTimezone()138 long getTimezone() const { return theTimezone; } 139 }; 140 141 142 /**************************************************************************//** 143 Class IndexKey represents an index key as a vector of item handles. 144 *******************************************************************************/ 145 class IndexKey : public ItemVector 146 { 147 public: ItemVector(size)148 IndexKey(csize size = 0) : ItemVector(size) {} 149 }; 150 151 152 /**************************************************************************//** 153 A index delta is a set of [domain-node, associated-key(s)] pairs. 154 *******************************************************************************/ 155 class IndexDelta 156 { 157 public: 158 typedef std::pair<Item_t, IndexKey*> ValuePair; 159 160 typedef std::vector<ValuePair> ValueDelta; 161 162 typedef std::pair<Item_t, Item_t> GeneralPair; 163 164 typedef std::vector<GeneralPair> GeneralDelta; 165 166 protected: 167 ValueDelta theValueDelta; 168 GeneralDelta theGeneralDelta; 169 170 public: 171 void addValuePair(Item_t& node, IndexKey* key); 172 173 void addGeneralPair(Item_t& node, Item_t& key); 174 175 void clear(); 176 177 protected: IndexDelta()178 IndexDelta() {} 179 }; 180 181 182 /***************************************************************************//** 183 184 Class IndexCondition represents a search condition on the keys of an index. 185 An instance of IndexCondition is given as a parameter to the init() method 186 of an IndexProbeIterator (see iterator.h), which can then iterate over the 187 items in the value of each index key that satisfies the condition. 188 189 There are 4 kinds of index conditions: 190 191 POINT_VALUE : 192 ------------- 193 194 It represents a condition that is satisfied by at most one index key tuple, 195 namely the index key tuple K (if it exists) that is equal, according to the 196 rules of value equality, to a user specified search key tuple. If any of 197 the domain items associated with K is also associated with another key tuples, 198 an error os raised. 199 200 POINT_GENERAL: 201 -------------- 202 203 It represents a condition that is satisfied by all index keys that are equal, 204 according to the rules of general equality, to a user specified search key. 205 This condition is applicable to general indexes only. 206 207 208 BOX_VALUE : 209 ----------- 210 211 It represents a condition that is satisfied by the index keys inside a 212 user-specified "box". 213 214 Let M be the number of key columns. Then, an M-dimensional box is defined as 215 a conjuction of M range conditions on columns 0 to M-1. Each range condition 216 specifies a range of acceptable values for some key column. Specifically, a 217 range is defined as the set of all key values K such that 218 219 lower_bound <? K <? upper_bound, where <? is either the lt or the le operator. 220 221 The lower bound may be -INFINITY and the upper bound may be +INFINTY. 222 223 BOX_GENERAL : 224 ------------- 225 226 It represents the following condition: 227 228 bound op? K, where 229 230 (a) op? is one of <, <=, >, or >=, 231 (b) K is a key value, and 232 (c) bound is either an atomic item or -INFINITY or +INFINITY. 233 234 ********************************************************************************/ 235 class IndexCondition : public SimpleRCObject 236 { 237 public: 238 typedef enum 239 { 240 POINT_VALUE, 241 POINT_GENERAL, 242 BOX_VALUE, 243 BOX_GENERAL 244 } Kind; 245 246 public: ~IndexCondition()247 virtual ~IndexCondition() {} 248 249 /** 250 * Clear the internal data of this condition object, so that it can be 251 * rebuilt and reused. 252 */ 253 virtual void clear() = 0; 254 255 /** 256 * Return the kind of the condition. 257 */ 258 virtual Kind getKind() const = 0; 259 260 /** 261 * Return the kind of the condition as a string. 262 */ 263 virtual std::string getKindString() const = 0; 264 265 /** 266 * This method applies to POINT_VALUE and POINT_GENERAL conditions only. 267 * The key associated with such conditions is built one item at a time using 268 * this method. 269 */ 270 virtual void pushItem(Item_t& item) = 0; 271 272 /** 273 * This method applies to BOX_VALUE conditions only. 274 * The box associated with this condition is built one range at a time using 275 * the pushRange() method. 276 * 277 * @param lower The lower bound of the range. May be NULL, which indicates 278 * either the empty sequence or -INFINITY. The haveLower parameter is 279 * used to distinguish between these two cases. 280 * @param upper The upper bound of the range. May be NULL, which indicates 281 * either the empty sequence or +INFINITY. The haveUpper parameter is 282 * used to distinguish between these two cases. 283 * @param haveLower False if the lower bound is -INFINITY. True otherwise. 284 * @param haveUpper False if the upper bound is +INFINITY. True otherwise. 285 * @param lowerIncl True if the lower bound is included in the range. False 286 * otherwise. 287 * @param upperIncl True if the upper bound is included in the range. False 288 * otherwise. 289 */ 290 virtual void pushRange( 291 Item_t& lower, 292 Item_t& upper, 293 bool haveLower, 294 bool haveUpper, 295 bool lowerIncl, 296 bool upperIncl) = 0; 297 298 /** 299 * This method applies to BOX_GENERAL conditions only. 300 * 301 * @param bound An item that constitutes either a lower or an upper bound 302 * for the key values. 303 * @param isLower True if the bound is a lower one; false if it is an upper 304 * one. 305 * @param boundIncl True if the boundary search key is included in the 306 * search. False otherwise. 307 */ 308 virtual void pushBound(Item_t& bound, bool isLower, bool boundIncl) = 0; 309 310 /** 311 * Serialize this condition. 312 */ 313 virtual std::string toString() const = 0; 314 }; 315 316 317 /**************************************************************************//** 318 319 Abstract index class. It represents both "value" and "general" indexes. 320 321 Value Indexes: 322 -------------- 323 324 From the store's point of view, a "value index" is a container that "stores" 325 a relation between tuples of atomic items (called key tuples) and items 326 (called domain items). The relationship is a function on the domain items, 327 i.e., for each domain item there is exactly one key tuple. In general, the 328 function is N:1, that is, several domain items may have the same key tuple. 329 330 The key tuples must satisfy the following constraints: 331 332 1. All key tuples in a value index have the same fixed number of items, say M. 333 Given this constraint, we can define the i-th "key column" of a value index 334 as the set of items that appear in the i-th position of each key tuple. If 335 an index has N tuples, then there are M key columns, each containing N items. 336 337 2. The items in each key column must be comparable with each other using the 338 Item::equals() method and/or the Item::compare() method. This implies that 339 all items in a key column must belong to the same branch of the XMLSchema 340 type hierarchy. Furthermore, no key item may have type xs:untypedAtomic or 341 xs:anyAtomicType. 342 343 General Indexes: 344 ---------------- 345 346 From the store's point of view, a "general index" is a container that "stores" 347 a relation between tuples of atomic items (called key tuples) and items 348 (called domain items). The relationship is N:M, that is, each domain item 349 may have more than one associated key tuples and several domain items may 350 be associated with the same key tuple. 351 352 Let D be a domain item and K an associated key tuple. In the case of value 353 indexes, if K contains a key item whose type is xs:untypedAtomic, an error 354 is raised. In contrast, a general index casts the xs:untyped key item to 355 every other primitive xml-schema type and for each such successful cast, 356 creates a new key tuple by replacing the xs:untypedAtomic item with the 357 result of the cast. D is then associated with each of these new tuples. The 358 process is repeated until there are no xs:untypedAtomic key items in any 359 of the key tuples. Finally, the associations between D and the transformed 360 key tuples are stored in the index container. 361 362 After the above transformations, the key tuples must satisfy the same 363 constraints as the key tuples of value indexes. 364 365 366 It is expected that for both value and general indexes, the index container 367 is organized in a way that makes it efficient to find all the domain items 368 whose associated key tuple(s) satisfy a given search condition (see class 369 IndexCondition below). 370 371 *******************************************************************************/ 372 class Index : public RCObject 373 { 374 protected: SYNC_CODE(mutable RCLock theRCLock;)375 SYNC_CODE(mutable RCLock theRCLock;) 376 377 public: 378 class KeyIterator : virtual public SimpleRCObject 379 { 380 public: 381 virtual void open() = 0; 382 383 virtual bool next(IndexKey&) = 0; 384 385 virtual void close() = 0; 386 387 virtual ~KeyIterator() {} 388 }; 389 390 typedef rchandle<KeyIterator> KeyIterator_t; 391 392 393 public: SYNC_CODE(RCLock * getRCLock ()const{ return &theRCLock; })394 SYNC_CODE(RCLock* getRCLock() const { return &theRCLock; }) 395 396 long* getSharedRefCounter() const { return NULL; } 397 398 public: 399 ~Index()400 virtual ~Index() {} 401 402 /** 403 * Return the qname of the index. 404 */ 405 virtual Item* getName() const = 0; 406 407 /** 408 * Return a reference to the specification object that describes this index. 409 */ 410 virtual const IndexSpecification& getSpecification() const = 0; 411 412 /** 413 * Return the number of columns in the jeys of this index. 414 */ 415 virtual csize getNumColumns() const = 0; 416 417 /** 418 * Return the timezone that is used when comparing date-time related items 419 */ 420 virtual long getTimezone() const = 0; 421 422 /** 423 * Return pointer to the collator used by this index when comparing items at 424 * its i-th column (return NULL if no collator is used for the i-th column). 425 */ 426 virtual const XQPCollator* getCollator(csize i) const = 0; 427 428 /** 429 * Create an index condition (see class IndexCondition below) 430 */ 431 virtual IndexCondition_t createCondition(IndexCondition::Kind k) = 0; 432 433 /** 434 * Returns the number of entries in the index 435 */ 436 virtual csize size() const = 0; 437 438 /** 439 * Returns all keys stored in this index 440 */ 441 virtual KeyIterator_t keys() const = 0; 442 443 /** 444 * Insert the given item in the value set of the given key. If the key is not 445 * in the index already, then the key itself is inserted as well. Return true 446 * if the key was already in the index, false otherwise 447 * The index wil take the ownership of the key if it was not already in the 448 * index. 449 * 450 * NOTE: this method is needed here because it is invoked from the 451 * UDFunctionCallIterator to implement the function cache. 452 * 453 * @error ZDDY0035 if a key with more than one item is inserted into 454 * a general index 455 */ 456 virtual bool insert(store::IndexKey*& key, store::Item_t& item) = 0; 457 458 virtual bool remove( 459 const store::IndexKey* key, 460 const store::Item_t& item, 461 bool all = false) = 0; 462 }; 463 464 465 466 /******************************************************************************* 467 An abstract class that provides a callback method for the store to call during 468 index maintenance. The method computes [domain_node, associated-key] pairs for 469 a given node that has some relationship to the domain expression. 470 471 Instances of IndexEntryCreator are created by the ApplyIterator for each 472 incrementally-maintenable index that needs maintenance. Such an instance is 473 stored inside the IndexDecl of the associated index, so that it can be 474 reused every time the index needs maintenance. 475 476 A concrete implementation of this class is provided in 477 runtime/index/doc_indexer.h 478 ********************************************************************************/ 479 class IndexEntryCreator : public SimpleRCObject 480 { 481 public: ~IndexEntryCreator()482 virtual ~IndexEntryCreator() { } 483 484 virtual void createIndexEntries(Item* item, IndexDelta& delta) = 0; 485 }; 486 487 488 } 489 } 490 491 #endif 492 493 494 /* 495 * Local variables: 496 * mode: c++ 497 * End: 498 */ 499 /* vim:set et sw=2 ts=2: */ 500