1 // WordKey.h 2 // 3 // NAME 4 // inverted index key. 5 // 6 // SYNOPSIS 7 // 8 // #include <WordKey.h> 9 // 10 // #define WORD_KEY_DOCID 1 11 // #define WORD_KEY_LOCATION 2 12 // 13 // WordList* words = ...; 14 // WordKey key = words->Key("word 100 20"); 15 // WordKey searchKey; 16 // words->Dict()->SerialExists("dog", searchKey.Get(WORD_KEY_WORD)); 17 // searchKey.Set(WORD_KEY_LOCATION, 5); 18 // WordCursor* cursor = words->Key(searchKey); 19 // 20 // DESCRIPTION 21 // 22 // Describes the key used to store a entry in the inverted index. 23 // Each field in the key has a bit in the <b>set</b> 24 // member that says if it is set or not. This bit allows to 25 // say that a particular field is <i>undefined</i> regardless of 26 // the actual value stored. The methods 27 // <b>IsDefined, SetDefined</b> and <b>Undefined</b> are used to manipulate 28 // the <i>defined</i> status of a field. The <b>Pack</b> and <b>Unpack</b> 29 // methods are used to convert to and from the disk storage representation 30 // of the key. 31 // 32 // Although constructors may be used, the prefered way to create a 33 // WordKey object is by using the <b>WordContext::Key</b> method. 34 // 35 // The following constants are defined: 36 // <dl> 37 // <dt> WORD_KEY_WORD 38 // <dd> the index of the word identifier with the key for Set and Get 39 // methods. 40 // <dt> WORD_KEY_VALUE_INVALID 41 // <dd> a value that is invalid for any field of the key. 42 // </dl> 43 // 44 // ASCII FORMAT 45 // 46 // The ASCII description is a string with fields separated by tabs or 47 // white space. 48 // <pre> 49 // Example: 200 <UNDEF> 1 4 2 50 // Field 1: The word identifier or <UNDEF> if not defined 51 // Field 2 to the end: numerical value of the field or <UNDEF> if 52 // not defined 53 // 54 // </pre> 55 // 56 // END 57 // 58 // Part of the ht://Dig package <http://www.htdig.org/> 59 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group 60 // For copyright details, see the file COPYING in your distribution 61 // or the GNU General Public License version 2 or later 62 // <http://www.gnu.org/copyleft/gpl.html> 63 // 64 // 65 66 #ifndef _WordKey_h_ 67 #define _WordKey_h_ 68 69 #ifndef SWIG 70 #include "db.h" 71 #include "htString.h" 72 #include "StringList.h" 73 #include "WordContext.h" 74 #endif /* SWIG */ 75 76 // 77 // Possible return values of Outbound/Overflow/Underflow methods 78 // 79 #define WORD_INBOUND 0 80 #define WORD_OVERFLOW 1 81 #define WORD_UNDERFLOW 2 82 83 // 84 // Possible return values of SetToFollowing 85 // 86 #define WORD_FOLLOWING_ATEND 0x0001 87 // 88 // Default value for position argument of SetToFollowing 89 // meaning NFields() - 1 90 // 91 #define WORD_FOLLOWING_MAX -1 92 93 // 94 // No value in a key may be 0 95 // 96 #define WORD_KEY_VALUE_INVALID 0 97 98 // 99 // Unknown field position 100 // 101 #define WORD_KEY_UNKNOWN_POSITION -1 102 103 // 104 // Index of the word identifier within the key 105 // 106 #define WORD_KEY_WORD 0 107 108 #ifndef SWIG 109 // 110 // C comparison function interface for Berkeley DB (bt_compare) 111 // 112 int word_db_cmp(const DBT *a, const DBT *b); 113 #endif /* SWIG */ 114 115 #ifndef SWIG 116 #include"WordKeyInfo.h" 117 #endif /* SWIG */ 118 119 // 120 // Describe a word occurrence 121 // 122 class WordKey 123 { 124 public: 125 // 126 // Constructors, destructors, copy and clear 127 // 128 //- 129 // Constructor. Build an empty key. 130 // The <b>ncontext</b> argument must be a pointer to a valid 131 // WordContext object. 132 // WordKey(WordContext * ncontext)133 WordKey(WordContext* ncontext) { 134 context = ncontext; 135 Clear(); 136 } 137 #ifndef SWIG 138 //- 139 // Constructor. Initialize from an ASCII description of a key. 140 // See <i>ASCII FORMAT</i> section. 141 // The <b>ncontext</b> argument must be a pointer to a valid 142 // WordContext object. 143 // WordKey(WordContext * ncontext,const String & desc)144 WordKey(WordContext* ncontext, const String& desc) { 145 context = ncontext; 146 Set(desc); 147 } 148 public: 149 #endif /* SWIG */ 150 //- 151 // Reset to empty key. 152 // Clear()153 void Clear() { 154 setbits = 0; 155 for(int i = 0; i < NFields(); i++) { 156 values[i] = 0; 157 } 158 } 159 160 //- 161 // Convenience functions to access the total number of fields 162 // in a key (see <i>WordKeyInfo(3)</i>). 163 // NFields()164 inline int NFields() const { return context->GetKeyInfo().nfields; } 165 //- 166 // Convenience functions to access the 167 // maximum possible value for field at <b>position.</b> 168 // in a key (see <i>WordKeyInfo(3)</i>). 169 // MaxValue(int position)170 inline WordKeyNum MaxValue(int position) { return context->GetKeyInfo().MaxValue(position); } 171 172 // 173 // Accessors 174 // 175 //- 176 // Return a pointer to the WordContext object used to create 177 // this instance. 178 // GetContext()179 inline WordContext* GetContext() { return context; } 180 #ifndef SWIG 181 //- 182 // Return a pointer to the WordContext object used to create 183 // this instance as a const. 184 // GetContext()185 inline const WordContext* GetContext() const { return context; } 186 #endif /* SWIG */ 187 188 // 189 // Get/Set fields 190 // 191 //- 192 // Return value of numerical field at <b>position</b> as const. 193 // Get(int position)194 inline WordKeyNum Get(int position) const { 195 return(values[position]); 196 } 197 #ifndef SWIG 198 //- 199 // Return value of numerical field at <b>position.</b> 200 // Get(int position)201 inline WordKeyNum& Get(int position) { 202 return(values[position]); 203 } 204 //- 205 // Return value of numerical field at <b>position</b> as const. 206 // 207 inline const WordKeyNum & operator[] (int position) const { return(values[position]); } 208 //- 209 // Return value of numerical field at <b>position.</b> 210 // 211 inline WordKeyNum & operator[] (int position) { return(values[position]); } 212 #endif /* SWIG */ 213 //- 214 // Set value of numerical field at <b>position</b> to <b>val.</b> 215 // Set(int position,WordKeyNum val)216 inline void Set(int position, WordKeyNum val) { 217 SetDefined(position); 218 values[position] = val; 219 } 220 221 // 222 // Key field value existenz. Defined means the value of the field contains 223 // a valid value. Undefined means the value of the field is not valid. 224 // 225 //- 226 // Returns true if field at <b>position</b> is <i>defined</i>, false 227 // otherwise. 228 // IsDefined(int position)229 int IsDefined(int position) const { return setbits & (1 << position); } 230 //- 231 // Value in field <b>position</b> becomes <i>defined.</i> A bit 232 // is set in the bit field describing the defined/undefined state 233 // of the value and the actual value of the field is not modified. 234 // SetDefined(int position)235 void SetDefined(int position) { setbits |= (1 << position); } 236 //- 237 // Value in field <b>position</b> becomes <i>undefined.</i> A bit 238 // is set in the bit field describing the defined/undefined state 239 // of the value and the actual value of the field is not modified. 240 // Undefined(int position)241 void Undefined(int position) { setbits &= ~(1 << position); } 242 243 #ifndef SWIG 244 // 245 // Set and Get the whole structure from/to ASCII description 246 //- 247 // Set the whole structure from ASCII string in <b>bufferin.</b> 248 // See <i>ASCII FORMAT</i> section. 249 // Return OK if successfull, NOTOK otherwise. 250 // 251 int Set(const String& bufferin); 252 int SetList(StringList& fields); 253 //- 254 // Convert the whole structure to an ASCII string description 255 // in <b>bufferout.</b> 256 // See <i>ASCII FORMAT</i> section. 257 // Return OK if successfull, NOTOK otherwise. 258 // 259 int Get(String& bufferout) const; 260 //- 261 // Convert the whole structure to an ASCII string description 262 // and return it. 263 // See <i>ASCII FORMAT</i> section. 264 // 265 String Get() const; 266 #endif /* SWIG */ 267 268 // 269 // Storage format conversion 270 // 271 #ifndef SWIG 272 //- 273 // Set structure from disk storage format as found in 274 // <b>string</b> buffer or length <b>length.</b> 275 // Return OK if successfull, NOTOK otherwise. 276 // 277 int Unpack(const char* string, int length); 278 // 279 //- 280 // Set structure from disk storage format as found in 281 // <b>data</b> string. 282 // Return OK if successfull, NOTOK otherwise. 283 // Unpack(const String & data)284 inline int Unpack(const String& data) { return(Unpack(data,data.length())); } 285 // 286 //- 287 // Convert object into disk storage format as found in 288 // and place the result in <b>data</b> string. 289 // Return OK if successfull, NOTOK otherwise. 290 // 291 int Pack(String& data) const; 292 #endif /* SWIG */ 293 294 // 295 // Transformations 296 // 297 //- 298 // Copy each <i>defined</i> field from other into the object, if 299 // the corresponding field of the object is not defined. 300 // Return OK if successfull, NOTOK otherwise. 301 // 302 int Merge(const WordKey& other); 303 //- 304 // Undefine all fields found after the first undefined field. The 305 // resulting key has a set of defined fields followed by undefined fields. 306 // Returns NOTOK if the word is not defined because the resulting key would 307 // be empty and this is considered an error. Returns OK on success. 308 // 309 int PrefixOnly(); 310 #ifndef SWIG 311 //- 312 // Implement ++ on a key. 313 // 314 // It behaves like arithmetic but follows these rules: 315 // <pre> 316 // . Increment starts at field <position> 317 // . If a field value overflows, increment field <b>position</b> - 1 318 // . Undefined fields are ignored and their value untouched 319 // . When a field is incremented all fields to the left are set to 0 320 // </pre> 321 // If position is not specified it is equivalent to NFields() - 1. 322 // It returns OK if successfull, NOTOK if <b>position</b> out of range or 323 // WORD_FOLLOWING_ATEND if the maximum possible value was reached. 324 // 325 int SetToFollowing(int position = WORD_FOLLOWING_MAX); 326 #endif /* SWIG */ 327 328 // 329 // Predicates 330 // 331 //- 332 // Return true if all the fields are <i>defined</i>, false otherwise. 333 // Filled()334 int Filled() const { return setbits == (unsigned int) (((1 << NFields()) - 1)); } 335 //- 336 // Return true if no fields are <i>defined</i>, false otherwise. 337 // Empty()338 int Empty() const { return setbits == 0; } 339 //- 340 // Return true if the object and <b>other</b> are equal. 341 // Only fields defined in both keys are compared. 342 // 343 int Equal(const WordKey& other) const; 344 //- 345 // Return true if the object and <b>other</b> are equal. 346 // All fields are compared. If a field is defined in <b>object</b> 347 // and not defined in the object, the key are not considered 348 // equal. 349 // ExactEqual(const WordKey & other)350 int ExactEqual(const WordKey& other) const { return(Equal(other) && other.setbits == setbits); } 351 //- 352 // Compare <b>object</b> and <b>other</b> as in strcmp. Undefined 353 // fields are ignored. Returns a positive number if <b>object</b> is 354 // greater than <b>other</b>, zero if they are equal, a negative 355 // number if <b>object</b> is lower than <b>other.</b> 356 // 357 int Cmp(const WordKey& other) const; 358 #ifndef SWIG 359 //- 360 // Return true if the object and <b>other</b> are equal. 361 // The packed string are compared. An <i>undefined</i> numerical field 362 // will be 0 and therefore undistinguishable from a <i>defined</i> field 363 // whose value is 0. 364 // 365 int PackEqual(const WordKey& other) const; 366 //- 367 // Return true if adding <b>increment</b> in field at <b>position</b> makes 368 // it overflow or underflow, false if it fits. 369 // Outbound(int position,int increment)370 int Outbound(int position, int increment) { 371 if(increment < 0) return Underflow(position, increment); 372 else if(increment > 0) return Overflow(position, increment); 373 else return WORD_INBOUND; 374 } 375 //- 376 // Return true if adding positive <b>increment</b> to field at 377 // <b>position</b> makes it overflow, false if it fits. 378 // Overflow(int position,int increment)379 int Overflow(int position, int increment) { 380 return MaxValue(position) - Get(position) < (WordKeyNum)increment ? WORD_OVERFLOW : WORD_INBOUND; 381 } 382 //- 383 // Return true if subtracting positive <b>increment</b> to field 384 // at <b>position</b> makes it underflow, false if it fits. 385 // Underflow(int position,int increment)386 int Underflow(int position, int increment) { 387 return Get(position) < (WordKeyNum)(-increment) ? WORD_UNDERFLOW : WORD_INBOUND; 388 } 389 #endif /* SWIG */ 390 //- 391 // Return OK if the key may be used as a prefix for search. 392 // In other words return OK if the fields set in the key 393 // are all contiguous, starting from the first field. 394 // Otherwise returns NOTOK 395 // 396 int Prefix() const; 397 398 #ifndef SWIG 399 //- 400 // Compare <b>a</b> and <b>b</b> in the Berkeley DB fashion. 401 // <b>a</b> and <b>b</b> are packed keys. The semantics of the 402 // returned int is as of strcmp and is driven by the key description 403 // found in <i>WordKeyInfo.</i> Returns a positive number if <b>a</b> is 404 // greater than <b>b</b>, zero if they are equal, a negative number 405 // if <b>a</b> is lower than <b>b.</b> 406 // 407 static int Compare(WordContext* context, const String& a, const String& b); 408 //- 409 // Compare <b>a</b> and <b>b</b> in the Berkeley DB fashion. 410 // <b>a</b> and <b>b</b> are packed keys. The semantics of the 411 // returned int is as of strcmp and is driven by the key description 412 // found in <i>WordKeyInfo.</i> Returns a positive number if <b>a</b> is 413 // greater than <b>b</b>, zero if they are equal, a negative number 414 // if <b>a</b> is lower than <b>b.</b> 415 // 416 static int Compare(WordContext* context, const unsigned char *a, int a_length, const unsigned char *b, int b_length); 417 //- 418 // Compare object defined fields with <b>other</b> key defined fields only, 419 // ignore fields that are not defined in object or <b>other.</b> 420 // Return 1 if different 0 if equal. 421 // If different, <b>position</b> is set to the field number that differ, 422 // <b>lower</b> is set to 1 if Get(<b>position</b>) is lower than 423 // other.Get(<b>position</b>) otherwise lower is set to 0. 424 // 425 int Diff(const WordKey& other, int& position, int& lower); 426 427 //- 428 // Print object in ASCII form on <b>f</b> (uses <i>Get</i> method). 429 // See <i>ASCII FORMAT</i> section. 430 // 431 int Write(FILE* f) const; 432 #endif /* SWIG */ 433 //- 434 // Print object in ASCII form on <b>stdout</b> (uses <i>Get</i> method). 435 // See <i>ASCII FORMAT</i> section. 436 // 437 void Print() const; 438 439 // 440 // Direct access to values array. Only use if you know what you're 441 // doing. 442 // Values()443 WordKeyNum* Values() { return values; } Values()444 const WordKeyNum* Values() const { return values; } 445 #ifndef SWIG 446 447 private: 448 449 // 450 // Data members 451 // 452 // 453 // Bit field for defined/undefined status of each key field 454 // 455 unsigned int setbits; 456 // 457 // Holds the numerical values of the key fields 458 // 459 WordKeyNum values[WORD_KEY_MAX_NFIELDS]; 460 461 WordContext *context; 462 #endif /* SWIG */ 463 }; 464 465 #endif /* _WordKey_h */ 466