1 /*
2  * Copyright 2006-2008 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef ZORBA_STORE_INDEX
17 #define ZORBA_STORE_INDEX
18 
19 #include "store/api/shared_types.h"
20 #include "store/util/item_vector.h"
21 
22 namespace zorba
23 {
24 
25 class XQPCollator;
26 
27 
28 namespace store
29 {
30 
31 class IndexSpecification;
32 class IndexEntryCreator;
33 class Iterator;
34 
35 typedef rchandle<IndexEntryCreator> IndexEntryCreator_t;
36 
37 
38 /***************************************************************************//**
39   Specification for creating a value or general index.
40 
41   theNumKeyColumns:
42   -----------------
43   The number of columns in each key.
44 
45   theKeyTypes:
46   ------------
47   The data types of the key columns. Each type must be a builtin atomic type,
48   and for sorted indices, it must have an ordering.
49 
50   theCollations:
51   --------------
52   The names (uris) of the collations to use when comparing the string columns
53   of two keys. The size of this vector is equal to theNumKeyColumns; if a type
54   of a key column is not string, the associated entry in theCollations is the
55   empty string.
56 
57   theTimezone:
58   ------------
59   The timezone is needed to compare date/time key values.
60 
61   theIsGeneral:
62   -------------
63   Whether the index is "general" or not.
64 
65   theIsUnique:
66   ------------
67   Whether the index is unique, i.e., there is exactly one value associated with
68   each key.
69 
70   theIsSorted:
71   ------------
72   Whether the index is sorted by its key values or not.
73 
74   theIsTemp:
75   ----------
76   Whether the index is temporary or not.
77 
78   theIsThreadSafe:
79   ----------------
80   Whether the index can be shared among multiple threads or not.
81 
82   theIsAutomatic:
83   ---------------
84   Whether the index must be maintained during/after each apply- updates or not.
85 
86   theSources:
87   -----------
88   The qnames of the collections accessed by the defining exprs of this index.
89 ********************************************************************************/
90 class IndexSpecification
91 {
92 public:
93   csize                          theNumKeyColumns;
94   std::vector<store::Item_t>     theKeyTypes;
95   std::vector<std::string>       theCollations;
96   long                           theTimezone;
97 
98   bool                           theIsGeneral;
99   bool                           theIsUnique;
100   bool                           theIsSorted;
101   bool                           theIsTemp;
102   bool                           theIsThreadSafe;
103   bool                           theIsAutomatic;
104 
105   std::vector<store::Item_t>     theSources;
106 
107 public:
IndexSpecification()108   IndexSpecification()
109     :
110     theNumKeyColumns(0),
111     theTimezone(0),
112     theIsGeneral(false),
113     theIsUnique(false),
114     theIsSorted(false),
115     theIsTemp(false),
116     theIsThreadSafe(false)
117   {
118   }
119 
clear()120   void clear()
121   {
122     theNumKeyColumns = 0;
123     theKeyTypes.clear();
124     theCollations.clear();
125     theTimezone = 0;
126     theIsGeneral = theIsUnique = theIsSorted = theIsTemp = theIsThreadSafe = false;
127   }
128 
resize(csize numColumns)129   void resize(csize numColumns)
130   {
131     theNumKeyColumns = numColumns;
132     theKeyTypes.resize(numColumns);
133     theCollations.resize(numColumns);
134   }
135 
getNumColumns()136   csize getNumColumns() const { return theNumKeyColumns; }
137 
getTimezone()138   long getTimezone() const { return theTimezone; }
139 };
140 
141 
142 /**************************************************************************//**
143   Class IndexKey represents an index key as a vector of item handles.
144 *******************************************************************************/
145 class IndexKey : public ItemVector
146 {
147 public:
ItemVector(size)148   IndexKey(csize size = 0) : ItemVector(size) {}
149 };
150 
151 
152 /**************************************************************************//**
153   A index delta is a set of [domain-node, associated-key(s)] pairs.
154 *******************************************************************************/
155 class IndexDelta
156 {
157 public:
158   typedef std::pair<Item_t, IndexKey*> ValuePair;
159 
160   typedef std::vector<ValuePair> ValueDelta;
161 
162   typedef std::pair<Item_t, Item_t> GeneralPair;
163 
164   typedef std::vector<GeneralPair> GeneralDelta;
165 
166 protected:
167   ValueDelta       theValueDelta;
168   GeneralDelta     theGeneralDelta;
169 
170 public:
171   void addValuePair(Item_t& node, IndexKey* key);
172 
173   void addGeneralPair(Item_t& node, Item_t& key);
174 
175   void clear();
176 
177 protected:
IndexDelta()178   IndexDelta() {}
179 };
180 
181 
182 /***************************************************************************//**
183 
184   Class IndexCondition represents a search condition on the keys of an index.
185   An instance of IndexCondition is given as a parameter to the init() method
186   of an IndexProbeIterator (see iterator.h), which can then iterate over the
187   items in the value of each index key that satisfies the condition.
188 
189   There are 4 kinds of index conditions:
190 
191   POINT_VALUE :
192   -------------
193 
194   It represents a condition that is satisfied by at most one index key tuple,
195   namely the index key tuple K (if it exists) that is equal, according to the
196   rules of value equality, to a user specified search key tuple. If any of
197   the domain items associated with K is also associated with another key tuples,
198   an error os raised.
199 
200   POINT_GENERAL:
201   --------------
202 
203   It represents a condition that is satisfied by all index keys that are equal,
204   according to the rules of general equality, to a user specified search key.
205   This condition is applicable to general indexes only.
206 
207 
208   BOX_VALUE :
209   -----------
210 
211   It represents a condition that is satisfied by the index keys inside a
212   user-specified "box".
213 
214   Let M be the number of key columns. Then, an M-dimensional box is defined as
215   a conjuction of M range conditions on columns 0 to M-1. Each range condition
216   specifies a range of acceptable values for some key column. Specifically, a
217   range is defined as the set of all key values K such that
218 
219   lower_bound <? K <? upper_bound, where <? is either the lt or the le operator.
220 
221   The lower bound may be -INFINITY and the upper bound may be +INFINTY.
222 
223   BOX_GENERAL :
224   -------------
225 
226   It represents the following condition:
227 
228   bound op? K, where
229 
230   (a) op? is one of <, <=, >, or >=,
231   (b) K is a key value, and
232   (c) bound is either an atomic item or -INFINITY or +INFINITY.
233 
234 ********************************************************************************/
235 class IndexCondition : public SimpleRCObject
236 {
237 public:
238   typedef enum
239   {
240     POINT_VALUE,
241     POINT_GENERAL,
242     BOX_VALUE,
243     BOX_GENERAL
244   } Kind;
245 
246 public:
~IndexCondition()247   virtual ~IndexCondition() {}
248 
249   /**
250    * Clear the internal data of this condition object, so that it can be
251    * rebuilt and reused.
252    */
253   virtual void clear() = 0;
254 
255   /**
256    *  Return the kind of the condition.
257    */
258   virtual Kind getKind() const = 0;
259 
260   /**
261    *  Return the kind of the condition as a string.
262    */
263   virtual std::string getKindString() const = 0;
264 
265   /**
266    * This method applies to POINT_VALUE and POINT_GENERAL conditions only.
267    * The key associated with such conditions is built one item at a time using
268    * this method.
269    */
270   virtual void pushItem(Item_t& item) = 0;
271 
272   /**
273    * This method applies to BOX_VALUE conditions only.
274    * The box associated with this condition is built one range at a time using
275    * the pushRange() method.
276    *
277    * @param lower The lower bound of the range. May be NULL, which indicates
278    *        either the empty sequence or -INFINITY. The haveLower parameter is
279    *        used to distinguish between these two cases.
280    * @param upper The upper bound of the range. May be NULL, which indicates
281    *        either the empty sequence or +INFINITY. The haveUpper parameter is
282    *        used to distinguish between these two cases.
283    * @param haveLower False if the lower bound is -INFINITY. True otherwise.
284    * @param haveUpper False if the upper bound is +INFINITY. True otherwise.
285    * @param lowerIncl True if the lower bound is included in the range. False
286    *        otherwise.
287    * @param upperIncl True if the upper bound is included in the range. False
288    *        otherwise.
289    */
290   virtual void pushRange(
291         Item_t& lower,
292         Item_t& upper,
293         bool haveLower,
294         bool haveUpper,
295         bool lowerIncl,
296         bool upperIncl) = 0;
297 
298   /**
299    * This method applies to BOX_GENERAL conditions only.
300    *
301    * @param bound An item that constitutes either a lower or an upper bound
302    *        for the key values.
303    * @param isLower True if the bound is a lower one; false if it is an upper
304    *        one.
305    * @param boundIncl True if the boundary search key is included in the
306    *        search. False otherwise.
307    */
308   virtual void pushBound(Item_t& bound, bool isLower, bool boundIncl) = 0;
309 
310   /**
311    *  Serialize this condition.
312    */
313   virtual std::string toString() const = 0;
314 };
315 
316 
317 /**************************************************************************//**
318 
319   Abstract index class. It represents both "value" and "general" indexes.
320 
321   Value Indexes:
322   --------------
323 
324   From the store's point of view, a "value index" is a container that "stores"
325   a relation between tuples of atomic items (called key tuples) and items
326   (called domain items). The relationship is a function on the domain items,
327   i.e., for each domain item there is exactly one key tuple. In general, the
328   function is N:1, that is, several domain items may have the same key tuple.
329 
330   The key tuples must satisfy the following constraints:
331 
332   1. All key tuples in a value index have the same fixed number of items, say M.
333      Given this constraint, we can define the i-th "key column" of a value index
334      as the set of items that appear in the i-th position of each key tuple. If
335      an index has N tuples, then there are M key columns, each containing N items.
336 
337   2. The items in each key column must be comparable with each other using the
338      Item::equals() method and/or the Item::compare() method. This implies that
339      all items in a key column must belong to the same branch of the XMLSchema
340      type hierarchy. Furthermore, no key item may have type xs:untypedAtomic or
341      xs:anyAtomicType.
342 
343   General Indexes:
344   ----------------
345 
346   From the store's point of view, a "general index" is a container that "stores"
347   a relation between tuples of atomic items (called key tuples) and items
348   (called domain items). The relationship is N:M, that is, each domain item
349   may have more than one associated key tuples and several domain items may
350   be associated with the same key tuple.
351 
352   Let D be a domain item and K an associated key tuple. In the case of value
353   indexes, if K contains a key item whose type is xs:untypedAtomic, an error
354   is raised. In contrast, a general index casts the xs:untyped key item to
355   every other primitive xml-schema type and for each such successful cast,
356   creates a new key tuple by replacing the xs:untypedAtomic item with the
357   result of the cast. D is then associated with each of these new tuples. The
358   process is repeated until there are no xs:untypedAtomic key items in any
359   of the key tuples. Finally, the associations between D and the transformed
360   key tuples are stored in the index container.
361 
362   After the above transformations, the key tuples must satisfy the same
363   constraints as the key tuples of value indexes.
364 
365 
366   It is expected that for both value and general indexes, the index container
367   is organized in a way that makes it efficient to find all the domain items
368   whose associated key tuple(s) satisfy a given search condition (see class
369   IndexCondition below).
370 
371 *******************************************************************************/
372 class Index : public RCObject
373 {
374 protected:
SYNC_CODE(mutable RCLock theRCLock;)375   SYNC_CODE(mutable RCLock theRCLock;)
376 
377 public:
378   class KeyIterator : virtual public SimpleRCObject
379   {
380   public:
381     virtual void open() = 0;
382 
383     virtual bool next(IndexKey&) = 0;
384 
385     virtual void close() = 0;
386 
387     virtual ~KeyIterator() {}
388   };
389 
390   typedef rchandle<KeyIterator> KeyIterator_t;
391 
392 
393 public:
SYNC_CODE(RCLock * getRCLock ()const{ return &theRCLock; })394   SYNC_CODE(RCLock* getRCLock() const { return &theRCLock; })
395 
396   long* getSharedRefCounter() const { return NULL; }
397 
398 public:
399 
~Index()400   virtual ~Index() {}
401 
402   /**
403    * Return the qname of the index.
404    */
405   virtual Item* getName() const = 0;
406 
407   /**
408    *  Return a reference to the specification object that describes this index.
409    */
410   virtual const IndexSpecification& getSpecification() const = 0;
411 
412   /**
413    *  Return the number of columns in the jeys of this index.
414    */
415   virtual csize getNumColumns() const = 0;
416 
417   /**
418    * Return the timezone that is used when comparing date-time related items
419    */
420   virtual long getTimezone() const = 0;
421 
422   /**
423    *  Return pointer to the collator used by this index when comparing items at
424    *  its i-th column (return NULL if no collator is used for the i-th column).
425    */
426   virtual const XQPCollator* getCollator(csize i) const = 0;
427 
428   /**
429    * Create an index condition (see class IndexCondition below)
430    */
431   virtual IndexCondition_t createCondition(IndexCondition::Kind k) = 0;
432 
433   /**
434    * Returns the number of entries in the index
435    */
436   virtual csize size() const = 0;
437 
438   /**
439    * Returns all keys stored in this index
440    */
441   virtual KeyIterator_t keys() const = 0;
442 
443   /**
444    * Insert the given item in the value set of the given key. If the key is not
445    * in the index already, then the key itself is inserted as well. Return true
446    * if the key was already in the index, false otherwise
447    * The index wil take the ownership of the key if it was not already in the
448    * index.
449    *
450    * NOTE: this method is needed here because it is invoked from the
451    * UDFunctionCallIterator to implement the function cache.
452    *
453    * @error ZDDY0035 if a key with more than one item is inserted into
454    *  a general index
455    */
456   virtual bool insert(store::IndexKey*& key, store::Item_t& item) = 0;
457 
458   virtual bool remove(
459       const store::IndexKey* key,
460       const store::Item_t& item,
461       bool all = false) = 0;
462 };
463 
464 
465 
466 /*******************************************************************************
467   An abstract class that provides a callback method for the store to call during
468   index maintenance. The method computes [domain_node, associated-key] pairs for
469   a given node that has some relationship to the domain expression.
470 
471   Instances of IndexEntryCreator are created by the ApplyIterator for each
472   incrementally-maintenable index that needs maintenance. Such an instance is
473   stored inside the IndexDecl of the associated index, so that it can be
474   reused every time the index needs maintenance.
475 
476   A concrete implementation of this class is provided in
477   runtime/index/doc_indexer.h
478 ********************************************************************************/
479 class IndexEntryCreator : public SimpleRCObject
480 {
481 public:
~IndexEntryCreator()482   virtual ~IndexEntryCreator() { }
483 
484   virtual void createIndexEntries(Item* item, IndexDelta& delta) = 0;
485 };
486 
487 
488 }
489 }
490 
491 #endif
492 
493 
494 /*
495  * Local variables:
496  * mode: c++
497  * End:
498  */
499 /* vim:set et sw=2 ts=2: */
500