1 // WordKey.h
2 //
3 // NAME
4 // inverted index key.
5 //
6 // SYNOPSIS
7 //
8 // #include <WordKey.h>
9 //
10 // #define WORD_KEY_DOCID    1
11 // #define WORD_KEY_LOCATION 2
12 //
13 // WordList* words = ...;
14 // WordKey key = words->Key("word 100 20");
15 // WordKey searchKey;
16 // words->Dict()->SerialExists("dog", searchKey.Get(WORD_KEY_WORD));
17 // searchKey.Set(WORD_KEY_LOCATION, 5);
18 // WordCursor* cursor = words->Key(searchKey);
19 //
20 // DESCRIPTION
21 //
22 // Describes the key used to store a entry in the inverted index.
23 // Each field in the key has a bit in the <b>set</b>
24 // member that says if it is set or not. This bit allows to
25 // say that a particular field is <i>undefined</i> regardless of
26 // the actual value stored. The methods
27 // <b>IsDefined, SetDefined</b> and <b>Undefined</b> are used to manipulate
28 // the <i>defined</i> status of a field. The <b>Pack</b> and <b>Unpack</b>
29 // methods are used to convert to and from the disk storage representation
30 // of the key.
31 //
32 // Although constructors may be used, the prefered way to create a
33 // WordKey object is by using the <b>WordContext::Key</b> method.
34 //
35 // The following constants are defined:
36 // <dl>
37 // <dt> WORD_KEY_WORD
38 // <dd> the index of the word identifier with the key for Set and Get
39 // methods.
40 // <dt> WORD_KEY_VALUE_INVALID
41 // <dd> a value that is invalid for any field of the key.
42 // </dl>
43 //
44 // ASCII FORMAT
45 //
46 // The ASCII description is a string with fields separated by tabs or
47 // white space.
48 // <pre>
49 // Example: 200 <UNDEF> 1 4 2
50 // Field 1: The word identifier or <UNDEF> if not defined
51 // Field 2 to the end: numerical value of the field or <UNDEF> if
52 //                     not defined
53 //
54 // </pre>
55 //
56 // END
57 //
58 // Part of the ht://Dig package   <http://www.htdig.org/>
59 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
60 // For copyright details, see the file COPYING in your distribution
61 // or the GNU General Public License version 2 or later
62 // <http://www.gnu.org/copyleft/gpl.html>
63 //
64 //
65 
66 #ifndef _WordKey_h_
67 #define _WordKey_h_
68 
69 #ifndef SWIG
70 #include "db.h"
71 #include "htString.h"
72 #include "StringList.h"
73 #include "WordContext.h"
74 #endif /* SWIG */
75 
76 //
77 // Possible return values of Outbound/Overflow/Underflow methods
78 //
79 #define WORD_INBOUND	0
80 #define WORD_OVERFLOW	1
81 #define WORD_UNDERFLOW	2
82 
83 //
84 // Possible return values of SetToFollowing
85 //
86 #define WORD_FOLLOWING_ATEND	0x0001
87 //
88 // Default value for position argument of SetToFollowing
89 // meaning NFields() - 1
90 //
91 #define WORD_FOLLOWING_MAX	-1
92 
93 //
94 // No value in a key may be 0
95 //
96 #define WORD_KEY_VALUE_INVALID 0
97 
98 //
99 // Unknown field position
100 //
101 #define WORD_KEY_UNKNOWN_POSITION	-1
102 
103 //
104 // Index of the word identifier within the key
105 //
106 #define WORD_KEY_WORD	0
107 
108 #ifndef SWIG
109 //
110 // C comparison function interface for Berkeley DB (bt_compare)
111 //
112 int word_db_cmp(const DBT *a, const DBT *b);
113 #endif /* SWIG */
114 
115 #ifndef SWIG
116 #include"WordKeyInfo.h"
117 #endif /* SWIG */
118 
119 //
120 // Describe a word occurrence
121 //
122 class WordKey
123 {
124  public:
125   //
126   // Constructors, destructors, copy and clear
127   //
128   //-
129   // Constructor. Build an empty key.
130   // The <b>ncontext</b> argument must be a pointer to a valid
131   // WordContext object.
132   //
WordKey(WordContext * ncontext)133   WordKey(WordContext* ncontext) {
134     context = ncontext;
135     Clear();
136   }
137 #ifndef SWIG
138   //-
139   // Constructor. Initialize from an ASCII description of a key.
140   // See <i>ASCII FORMAT</i> section.
141   // The <b>ncontext</b> argument must be a pointer to a valid
142   // WordContext object.
143   //
WordKey(WordContext * ncontext,const String & desc)144   WordKey(WordContext* ncontext, const String& desc) {
145     context = ncontext;
146     Set(desc);
147   }
148  public:
149 #endif /* SWIG */
150   //-
151   // Reset to empty key.
152   //
Clear()153   void	Clear() {
154     setbits = 0;
155     for(int i = 0; i < NFields(); i++) {
156       values[i] = 0;
157     }
158   }
159 
160   //-
161   // Convenience functions to access the total number of fields
162   // in a key (see <i>WordKeyInfo(3)</i>).
163   //
NFields()164   inline int 	           NFields() const { return context->GetKeyInfo().nfields; }
165   //-
166   // Convenience functions to access the
167   // maximum possible value for field at <b>position.</b>
168   // in a key (see <i>WordKeyInfo(3)</i>).
169   //
MaxValue(int position)170   inline WordKeyNum         MaxValue(int position) { return context->GetKeyInfo().MaxValue(position); }
171 
172   //
173   // Accessors
174   //
175   //-
176   // Return a pointer to the WordContext object used to create
177   // this instance.
178   //
GetContext()179   inline WordContext* GetContext() { return context; }
180 #ifndef SWIG
181   //-
182   // Return a pointer to the WordContext object used to create
183   // this instance as a const.
184   //
GetContext()185   inline const WordContext* GetContext() const { return context; }
186 #endif /* SWIG */
187 
188   //
189   // Get/Set fields
190   //
191   //-
192   // Return value of numerical field at <b>position</b> as const.
193   //
Get(int position)194   inline WordKeyNum Get(int position) const {
195     return(values[position]);
196   }
197 #ifndef SWIG
198   //-
199   // Return value of numerical field at <b>position.</b>
200   //
Get(int position)201   inline WordKeyNum& Get(int position) {
202     return(values[position]);
203   }
204   //-
205   // Return value of numerical field at <b>position</b> as const.
206   //
207   inline const WordKeyNum &      operator[] (int position) const  { return(values[position]); }
208   //-
209   // Return value of numerical field at <b>position.</b>
210   //
211   inline       WordKeyNum &      operator[] (int position)        { return(values[position]); }
212 #endif /* SWIG */
213   //-
214   // Set value of numerical field at <b>position</b> to <b>val.</b>
215   //
Set(int position,WordKeyNum val)216   inline void Set(int position, WordKeyNum val) {
217     SetDefined(position);
218     values[position] = val;
219   }
220 
221   //
222   // Key field value existenz. Defined means the value of the field contains
223   // a valid value. Undefined means the value of the field is not valid.
224   //
225   //-
226   // Returns true if field at <b>position</b> is <i>defined</i>, false
227   // otherwise.
228   //
IsDefined(int position)229   int	IsDefined(int position) const { return setbits & (1 << position); }
230   //-
231   // Value in field <b>position</b> becomes <i>defined.</i> A bit
232   // is set in the bit field describing the defined/undefined state
233   // of the value and the actual value of the field is not modified.
234   //
SetDefined(int position)235   void	SetDefined(int position)      { setbits |= (1 << position); }
236   //-
237   // Value in field <b>position</b> becomes <i>undefined.</i> A bit
238   // is set in the bit field describing the defined/undefined state
239   // of the value and the actual value of the field is not modified.
240   //
Undefined(int position)241   void	Undefined(int position)       { setbits &= ~(1 << position); }
242 
243 #ifndef SWIG
244   //
245   // Set and Get the whole structure from/to ASCII description
246   //-
247   // Set the whole structure from ASCII string in <b>bufferin.</b>
248   // See <i>ASCII FORMAT</i> section.
249   // Return OK if successfull, NOTOK otherwise.
250   //
251   int Set(const String& bufferin);
252   int SetList(StringList& fields);
253   //-
254   // Convert the whole structure to an ASCII string description
255   // in <b>bufferout.</b>
256   // See <i>ASCII FORMAT</i> section.
257   // Return OK if successfull, NOTOK otherwise.
258   //
259   int Get(String& bufferout) const;
260   //-
261   // Convert the whole structure to an ASCII string description
262   // and return it.
263   // See <i>ASCII FORMAT</i> section.
264   //
265   String Get() const;
266 #endif /* SWIG */
267 
268   //
269   // Storage format conversion
270   //
271 #ifndef SWIG
272   //-
273   // Set structure from disk storage format as found in
274   // <b>string</b> buffer or length <b>length.</b>
275   // Return OK if successfull, NOTOK otherwise.
276   //
277   int 		Unpack(const char* string, int length);
278   //
279   //-
280   // Set structure from disk storage format as found in
281   // <b>data</b> string.
282   // Return OK if successfull, NOTOK otherwise.
283   //
Unpack(const String & data)284   inline int    Unpack(const String& data) { return(Unpack(data,data.length())); }
285   //
286   //-
287   // Convert object into disk storage format as found in
288   // and place the result in <b>data</b> string.
289   // Return OK if successfull, NOTOK otherwise.
290   //
291   int 		Pack(String& data) const;
292 #endif /* SWIG */
293 
294   //
295   // Transformations
296   //
297   //-
298   // Copy each <i>defined</i> field from other into the object, if
299   // the corresponding field of the object is not defined.
300   // Return OK if successfull, NOTOK otherwise.
301   //
302   int		Merge(const WordKey& other);
303   //-
304   // Undefine all fields found after the first undefined field. The
305   // resulting key has a set of defined fields followed by undefined fields.
306   // Returns NOTOK if the word is not defined because the resulting key would
307   // be empty and this is considered an error. Returns OK on success.
308   //
309   int		PrefixOnly();
310 #ifndef SWIG
311   //-
312   // Implement ++ on a key.
313   //
314   // It behaves like arithmetic but follows these rules:
315   // <pre>
316   // . Increment starts at field <position>
317   // . If a field value overflows, increment field <b>position</b> - 1
318   // . Undefined fields are ignored and their value untouched
319   // . When a field is incremented all fields to the left are set to 0
320   // </pre>
321   // If position is not specified it is equivalent to NFields() - 1.
322   // It returns OK if successfull, NOTOK if <b>position</b> out of range or
323   // WORD_FOLLOWING_ATEND if the maximum possible value was reached.
324   //
325   int           SetToFollowing(int position = WORD_FOLLOWING_MAX);
326 #endif /* SWIG */
327 
328   //
329   // Predicates
330   //
331   //-
332   // Return true if all the fields are <i>defined</i>, false otherwise.
333   //
Filled()334   int		Filled() const { return setbits == (unsigned int) (((1 << NFields()) - 1)); }
335   //-
336   // Return true if no fields are <i>defined</i>, false otherwise.
337   //
Empty()338   int		Empty() const  { return setbits == 0; }
339   //-
340   // Return true if the object and <b>other</b> are equal.
341   // Only fields defined in both keys are compared.
342   //
343   int 		Equal(const WordKey& other) const;
344   //-
345   // Return true if the object and <b>other</b> are equal.
346   // All fields are compared. If a field is defined in <b>object</b>
347   // and not defined in the object, the key are not considered
348   // equal.
349   //
ExactEqual(const WordKey & other)350   int 		ExactEqual(const WordKey& other) const { return(Equal(other) && other.setbits == setbits); }
351   //-
352   // Compare <b>object</b> and <b>other</b> as in strcmp. Undefined
353   // fields are ignored. Returns a positive number if <b>object</b> is
354   // greater than <b>other</b>, zero if they are equal, a negative
355   // number if <b>object</b> is lower than <b>other.</b>
356   //
357   int		Cmp(const WordKey& other) const;
358 #ifndef SWIG
359   //-
360   // Return true if the object and <b>other</b> are equal.
361   // The packed string are compared. An <i>undefined</i> numerical field
362   // will be 0 and therefore undistinguishable from a <i>defined</i> field
363   // whose value is 0.
364   //
365   int 		PackEqual(const WordKey& other) const;
366   //-
367   // Return true if adding <b>increment</b> in field at <b>position</b> makes
368   // it overflow or underflow, false if it fits.
369   //
Outbound(int position,int increment)370   int		Outbound(int position, int increment) {
371     if(increment < 0) return Underflow(position, increment);
372     else if(increment > 0) return Overflow(position, increment);
373     else return WORD_INBOUND;
374   }
375   //-
376   // Return true if adding positive <b>increment</b> to field at
377   // <b>position</b> makes it overflow, false if it fits.
378   //
Overflow(int position,int increment)379   int		Overflow(int position, int increment) {
380     return MaxValue(position) - Get(position) < (WordKeyNum)increment ? WORD_OVERFLOW : WORD_INBOUND;
381   }
382   //-
383   // Return true if subtracting positive <b>increment</b> to field
384   // at <b>position</b> makes it underflow, false if it fits.
385   //
Underflow(int position,int increment)386   int		Underflow(int position, int increment) {
387     return Get(position) < (WordKeyNum)(-increment) ? WORD_UNDERFLOW : WORD_INBOUND;
388   }
389 #endif /* SWIG */
390   //-
391   // Return OK if the key may be used as a prefix for search.
392   // In other words return OK if the fields set in the key
393   // are all contiguous, starting from the first field.
394   // Otherwise returns NOTOK
395   //
396   int		Prefix() const;
397 
398 #ifndef SWIG
399   //-
400   // Compare <b>a</b> and <b>b</b> in the Berkeley DB fashion.
401   // <b>a</b> and <b>b</b> are packed keys. The semantics of the
402   // returned int is as of strcmp and is driven by the key description
403   // found in <i>WordKeyInfo.</i> Returns a positive number if <b>a</b> is
404   // greater than <b>b</b>, zero if they are equal, a negative number
405   // if <b>a</b> is lower than <b>b.</b>
406   //
407   static int 	    Compare(WordContext* context, const String& a, const String& b);
408   //-
409   // Compare <b>a</b> and <b>b</b> in the Berkeley DB fashion.
410   // <b>a</b> and <b>b</b> are packed keys. The semantics of the
411   // returned int is as of strcmp and is driven by the key description
412   // found in <i>WordKeyInfo.</i> Returns a positive number if <b>a</b> is
413   // greater than <b>b</b>, zero if they are equal, a negative number
414   // if <b>a</b> is lower than <b>b.</b>
415   //
416   static int        Compare(WordContext* context, const unsigned char *a, int a_length, const unsigned char *b, int b_length);
417   //-
418   // Compare object defined fields with <b>other</b> key defined fields only,
419   // ignore fields that are not defined in object or <b>other.</b>
420   // Return 1 if different 0 if equal.
421   // If different, <b>position</b> is set to the field number that differ,
422   // <b>lower</b> is set to 1 if Get(<b>position</b>) is lower than
423   // other.Get(<b>position</b>) otherwise lower is set to 0.
424   //
425   int               Diff(const WordKey& other, int& position, int& lower);
426 
427   //-
428   // Print object in ASCII form on <b>f</b> (uses <i>Get</i> method).
429   // See <i>ASCII FORMAT</i> section.
430   //
431   int Write(FILE* f) const;
432 #endif /* SWIG */
433   //-
434   // Print object in ASCII form on <b>stdout</b> (uses <i>Get</i> method).
435   // See <i>ASCII FORMAT</i> section.
436   //
437   void Print() const;
438 
439   //
440   // Direct access to values array. Only use if you know what you're
441   // doing.
442   //
Values()443   WordKeyNum* Values() { return values; }
Values()444   const WordKeyNum* Values() const { return values; }
445 #ifndef SWIG
446 
447 private:
448 
449   //
450   // Data members
451   //
452   //
453   // Bit field for defined/undefined status of each key field
454   //
455   unsigned int setbits;
456   //
457   // Holds the numerical values of the key fields
458   //
459   WordKeyNum   values[WORD_KEY_MAX_NFIELDS];
460 
461   WordContext  *context;
462 #endif /* SWIG */
463 };
464 
465 #endif /* _WordKey_h */
466