1 /** @file 2 * @brief External sources of posting information 3 */ 4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts 5 * Copyright (C) 2008,2009 Lemur Consulting Ltd 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22 #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H 23 #define XAPIAN_INCLUDED_POSTINGSOURCE_H 24 25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD 26 # error Never use <xapian/postingsource.h> directly; include <xapian.h> instead. 27 #endif 28 29 #include <xapian/attributes.h> 30 #include <xapian/database.h> 31 #include <xapian/deprecated.h> 32 #include <xapian/intrusive_ptr.h> 33 #include <xapian/postingiterator.h> 34 #include <xapian/types.h> 35 #include <xapian/valueiterator.h> 36 #include <xapian/visibility.h> 37 38 #include <string> 39 #include <map> 40 41 namespace Xapian { 42 43 class Registry; 44 45 /** Base class which provides an "external" source of postings. 46 */ 47 class XAPIAN_VISIBILITY_DEFAULT PostingSource 48 : public Xapian::Internal::opt_intrusive_base { 49 /// Don't allow assignment. 50 void operator=(const PostingSource &); 51 52 /// Don't allow copying. 53 PostingSource(const PostingSource &); 54 55 /// The current upper bound on what get_weight() can return. 56 double max_weight_; 57 58 /** The object to inform of maxweight changes. 59 * 60 * We store this as a (void*) to avoid needing to declare an internal 61 * type in an external header. It's actually (MultiMatch *). 62 */ 63 void * matcher_; 64 65 public: 66 /// Allow subclasses to be instantiated. XAPIAN_NOTHROW(PostingSource ())67 XAPIAN_NOTHROW(PostingSource()) 68 : max_weight_(0), matcher_(NULL) { } 69 70 /** @private @internal Set the object to inform of maxweight changes. 71 * 72 * This method is for internal use only - it would be private except that 73 * would force us to forward declare an internal class in an external API 74 * header just to make it a friend. 75 */ register_matcher_(void * matcher)76 void register_matcher_(void * matcher) { matcher_ = matcher; } 77 78 // Destructor. 79 virtual ~PostingSource(); 80 81 /** A lower bound on the number of documents this object can return. 82 * 83 * Xapian will always call init() on a PostingSource before calling this 84 * for the first time. 85 */ 86 virtual Xapian::doccount get_termfreq_min() const = 0; 87 88 /** An estimate of the number of documents this object can return. 89 * 90 * It must always be true that: 91 * 92 * get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max() 93 * 94 * Xapian will always call init() on a PostingSource before calling this 95 * for the first time. 96 */ 97 virtual Xapian::doccount get_termfreq_est() const = 0; 98 99 /** An upper bound on the number of documents this object can return. 100 * 101 * Xapian will always call init() on a PostingSource before calling this 102 * for the first time. 103 */ 104 virtual Xapian::doccount get_termfreq_max() const = 0; 105 106 /** Specify an upper bound on what get_weight() will return from now on. 107 * 108 * This upper bound is used by the matcher to perform various 109 * optimisations, so if you can return a good bound, then matches 110 * will generally run faster. 111 * 112 * This method should be called after calling init(), and may be called 113 * during iteration if the upper bound drops. It is probably only useful 114 * to call from subclasses (it was actually a "protected" method prior to 115 * Xapian 1.3.4, but that makes it tricky to wrap for other languages). 116 * 117 * It is valid for the posting source to have returned a higher value from 118 * get_weight() earlier in the iteration, but the posting source must not 119 * return a higher value from get_weight() than the currently set upper 120 * bound, and the upper bound must not be increased (until init() has been 121 * called). 122 * 123 * If you don't call this method, the upper bound will default to 0, for 124 * convenience when implementing "weight-less" PostingSource subclasses. 125 * 126 * @param max_weight The upper bound to set. 127 */ 128 void set_maxweight(double max_weight); 129 130 /// Return the currently set upper bound on what get_weight() can return. XAPIAN_NOTHROW(get_maxweight ()const)131 double XAPIAN_NOTHROW(get_maxweight() const) { return max_weight_; } 132 133 /** Return the weight contribution for the current document. 134 * 135 * This default implementation always returns 0, for convenience when 136 * implementing "weight-less" PostingSource subclasses. 137 * 138 * This method may assume that it will only be called when there is a 139 * "current document". In detail: Xapian will always call init() on a 140 * PostingSource before calling this for the first time. It will also 141 * only call this if the PostingSource reports that it is pointing to a 142 * valid document (ie, it will not call it before calling at least one of 143 * next(), skip_to() or check(), and will ensure that the PostingSource is 144 * not at the end by calling at_end()). 145 */ 146 virtual double get_weight() const; 147 148 /** Return the current docid. 149 * 150 * This method may assume that it will only be called when there is a 151 * "current document". See @a get_weight() for details. 152 * 153 * Note: in the case of a multi-database search, the returned docid should 154 * be in the single subdatabase relevant to this posting source. See the 155 * @a init() method for details. 156 */ 157 virtual Xapian::docid get_docid() const = 0; 158 159 /** Advance the current position to the next matching document. 160 * 161 * The PostingSource starts before the first entry in the list, so next(), 162 * skip_to() or check() must be called before any methods which need the 163 * context of the current position. 164 * 165 * Xapian will always call init() on a PostingSource before calling this 166 * for the first time. 167 * 168 * @param min_wt The minimum weight contribution that is needed (this is 169 * just a hint which subclasses may ignore). 170 */ 171 virtual void next(double min_wt) = 0; 172 173 /** Advance to the specified docid. 174 * 175 * If the specified docid isn't in the list, position ourselves on the 176 * first document after it (or at_end() if no greater docids are present). 177 * 178 * If the current position is already the specified docid, this method will 179 * leave the position unmodified. 180 * 181 * If the specified docid is earlier than the current position, the 182 * behaviour is unspecified. A sensible behaviour would be to leave the 183 * current position unmodified, but it is also reasonable to move to the 184 * specified docid. 185 * 186 * The default implementation calls next() repeatedly, which works but 187 * skip_to() can often be implemented much more efficiently. 188 * 189 * Xapian will always call init() on a PostingSource before calling this 190 * for the first time. 191 * 192 * Note: in the case of a multi-database search, the docid specified is 193 * the docid in the single subdatabase relevant to this posting source. 194 * See the @a init() method for details. 195 * 196 * @param did The document id to advance to. 197 * @param min_wt The minimum weight contribution that is needed (this is 198 * just a hint which subclasses may ignore). 199 */ 200 virtual void skip_to(Xapian::docid did, double min_wt); 201 202 /** Check if the specified docid occurs. 203 * 204 * The caller is required to ensure that the specified document id @a did 205 * actually exists in the database. If it does, it must move to that 206 * document id, and return true. If it does not, it may either: 207 * 208 * - return true, having moved to a definite position (including 209 * "at_end"), which must be the same position as skip_to() would have 210 * moved to. 211 * 212 * or 213 * 214 * - return false, having moved to an "indeterminate" position, such that 215 * a subsequent call to next() or skip_to() will move to the next 216 * matching position after @a did. 217 * 218 * Generally, this method should act like skip_to() and return true if 219 * that can be done at little extra cost. 220 * 221 * Otherwise it should simply check if a particular docid is present, 222 * returning true if it is, and false if it isn't. 223 * 224 * The default implementation calls skip_to() and always returns true. 225 * 226 * Xapian will always call init() on a PostingSource before calling this 227 * for the first time. 228 * 229 * Note: in the case of a multi-database search, the docid specified is 230 * the docid in the single subdatabase relevant to this posting source. 231 * See the @a init() method for details. 232 * 233 * @param did The document id to check. 234 * @param min_wt The minimum weight contribution that is needed (this is 235 * just a hint which subclasses may ignore). 236 */ 237 virtual bool check(Xapian::docid did, double min_wt); 238 239 /** Return true if the current position is past the last entry in this list. 240 * 241 * At least one of @a next(), @a skip_to() or @a check() will be called 242 * before this method is first called. 243 */ 244 virtual bool at_end() const = 0; 245 246 /** Clone the posting source. 247 * 248 * The clone should inherit the configuration of the parent, but need not 249 * inherit the state. ie, the clone does not need to be in the same 250 * iteration position as the original: the matcher will always call 251 * init() on the clone before attempting to move the iterator, or read 252 * the information about the current position of the iterator. 253 * 254 * This may return NULL to indicate that cloning is not supported. In 255 * this case, the PostingSource may only be used with a single-database 256 * search. 257 * 258 * The default implementation returns NULL. 259 * 260 * Note that the returned object will be deallocated by Xapian after use 261 * with "delete". If you want to handle the deletion in a special way 262 * (for example when wrapping the Xapian API for use from another 263 * language) then you can define a static <code>operator delete</code> 264 * method in your subclass as shown here: 265 * https://trac.xapian.org/ticket/554#comment:1 266 */ 267 virtual PostingSource * clone() const; 268 269 /** Name of the posting source class. 270 * 271 * This is used when serialising and unserialising posting sources; for 272 * example, for performing remote searches. 273 * 274 * If the subclass is in a C++ namespace, the namespace should be included 275 * in the name, using "::" as a separator. For example, for a 276 * PostingSource subclass called "FooPostingSource" in the "Xapian" 277 * namespace the result of this call should be "Xapian::FooPostingSource". 278 * 279 * This should only be implemented if serialise() and unserialise() are 280 * also implemented. The default implementation returns an empty string. 281 * 282 * If this returns an empty string, Xapian will assume that serialise() 283 * and unserialise() are not implemented. 284 */ 285 virtual std::string name() const; 286 287 /** Serialise object parameters into a string. 288 * 289 * The serialised parameters should represent the configuration of the 290 * posting source, but need not (indeed, should not) represent the current 291 * iteration state. 292 * 293 * If you don't want to support the remote backend, you can use the 294 * default implementation which simply throws Xapian::UnimplementedError. 295 */ 296 virtual std::string serialise() const; 297 298 /** Create object given string serialisation returned by serialise(). 299 * 300 * Note that the returned object will be deallocated by Xapian after use 301 * with "delete". If you want to handle the deletion in a special way 302 * (for example when wrapping the Xapian API for use from another 303 * language) then you can define a static <code>operator delete</code> 304 * method in your subclass as shown here: 305 * https://trac.xapian.org/ticket/554#comment:1 306 * 307 * If you don't want to support the remote backend, you can use the 308 * default implementation which simply throws Xapian::UnimplementedError. 309 * 310 * @param serialised A serialised instance of this PostingSource subclass. 311 */ 312 virtual PostingSource * unserialise(const std::string &serialised) const; 313 314 /** Create object given string serialisation returned by serialise(). 315 * 316 * Note that the returned object will be deallocated by Xapian after use 317 * with "delete". If you want to handle the deletion in a special way 318 * (for example when wrapping the Xapian API for use from another 319 * language) then you can define a static <code>operator delete</code> 320 * method in your subclass as shown here: 321 * https://trac.xapian.org/ticket/554#comment:1 322 * 323 * This method is supplied with a Registry object, which can be used when 324 * unserialising objects contained within the posting source. The default 325 * implementation simply calls unserialise() which doesn't take the 326 * Registry object, so you do not need to implement this method unless you 327 * want to take advantage of the Registry object when unserialising. 328 * 329 * @param serialised A serialised instance of this PostingSource subclass. 330 * @param registry The Xapian::Registry object to use. 331 */ 332 virtual PostingSource * unserialise_with_registry(const std::string &serialised, 333 const Registry & registry) const; 334 335 /** Set this PostingSource to the start of the list of postings. 336 * 337 * This is called automatically by the matcher prior to each query being 338 * processed. 339 * 340 * If a PostingSource is used for multiple searches, @a init() will 341 * therefore be called multiple times, and must handle this by using the 342 * database passed in the most recent call. 343 * 344 * @param db The database which the PostingSource should iterate through. 345 * 346 * Note: in the case of a multi-database search, a separate PostingSource 347 * will be used for each database (the separate PostingSources will be 348 * obtained using @a clone()), and each PostingSource will be passed one of 349 * the sub-databases as the @a db parameter here. The @a db parameter 350 * will therefore always refer to a single database. All docids passed 351 * to, or returned from, the PostingSource refer to docids in that single 352 * database, rather than in the multi-database. 353 */ 354 virtual void init(const Database & db) = 0; 355 356 /** Return a string describing this object. 357 * 358 * This default implementation returns a generic answer. This default 359 * it provided to avoid forcing those deriving their own PostingSource 360 * subclass from having to implement this (they may not care what 361 * get_description() gives for their subclass). 362 */ 363 virtual std::string get_description() const; 364 365 /** Start reference counting this object. 366 * 367 * You can hand ownership of a dynamically allocated PostingSource 368 * object to Xapian by calling release() and then passing the object to a 369 * Xapian method. Xapian will arrange to delete the object once it is no 370 * longer required. 371 */ release()372 PostingSource * release() { 373 opt_intrusive_base::release(); 374 return this; 375 } 376 377 /** Start reference counting this object. 378 * 379 * You can hand ownership of a dynamically allocated PostingSource 380 * object to Xapian by calling release() and then passing the object to a 381 * Xapian method. Xapian will arrange to delete the object once it is no 382 * longer required. 383 */ release()384 const PostingSource * release() const { 385 opt_intrusive_base::release(); 386 return this; 387 } 388 }; 389 390 391 /** A posting source which generates weights from a value slot. 392 * 393 * This is a base class for classes which generate weights using values stored 394 * in the specified slot. For example, ValueWeightPostingSource uses 395 * sortable_unserialise to convert values directly to weights. 396 * 397 * The upper bound on the weight returned is set to DBL_MAX. Subclasses 398 * should call set_maxweight() in their init() methods after calling 399 * ValuePostingSource::init() if they know a tighter bound on the weight. 400 */ 401 class XAPIAN_VISIBILITY_DEFAULT ValuePostingSource : public PostingSource { 402 // We want to give a deprecation warning for uses of the members from user 403 // code, but we also want to be able to inline functions to access them, 404 // without those functions generating deprecated warnings. To achieve 405 // this, we make the old names references to members with a "real_" prefix 406 // and then use the latter in the inlined accessor functions. The 407 // constructor initialises all the references to point to their "real_" 408 // counterparts. 409 Xapian::Database real_db; 410 411 Xapian::valueno real_slot; 412 413 Xapian::ValueIterator real_value_it; 414 415 bool real_started; 416 417 Xapian::doccount real_termfreq_min; 418 419 Xapian::doccount real_termfreq_est; 420 421 Xapian::doccount real_termfreq_max; 422 423 protected: 424 /** The database we're reading values from. 425 * 426 * @deprecated Use @a get_database() in preference. 427 */ 428 XAPIAN_DEPRECATED(Xapian::Database& db); 429 430 /** The slot we're reading values from. 431 * 432 * @deprecated Use @a get_slot() in preference. 433 */ 434 XAPIAN_DEPRECATED(Xapian::valueno& slot); 435 436 /** Value stream iterator. 437 * 438 * @deprecated Use @a get_value() in preference to *value_it, and other 439 * methods of ValuePostingSource in preference to calling methods of 440 * value_it. 441 */ 442 XAPIAN_DEPRECATED(Xapian::ValueIterator& value_it); 443 444 /** Flag indicating if we've started (true if we have). 445 * 446 * @deprecated Use @a get_started() in preference. 447 */ 448 XAPIAN_DEPRECATED(bool& started); 449 450 /** A lower bound on the term frequency. 451 * 452 * Subclasses should set this if they are overriding the next(), skip_to() 453 * or check() methods to return fewer documents. 454 * 455 * @deprecated Use @a set_termfreq_min() in preference. 456 */ 457 XAPIAN_DEPRECATED(Xapian::doccount& termfreq_min); 458 459 /** An estimate of the term frequency. 460 * 461 * Subclasses should set this if they are overriding the next(), skip_to() 462 * or check() methods. 463 * 464 * @deprecated Use @a set_termfreq_est() in preference. 465 */ 466 XAPIAN_DEPRECATED(Xapian::doccount& termfreq_est); 467 468 /** An upper bound on the term frequency. 469 * 470 * Subclasses should set this if they are overriding the next(), skip_to() 471 * or check() methods. 472 * 473 * @deprecated Use @a set_termfreq_max() in preference. 474 */ 475 XAPIAN_DEPRECATED(Xapian::doccount& termfreq_max); 476 477 public: 478 /** Construct a ValuePostingSource. 479 * 480 * @param slot_ The value slot to read values from. 481 */ 482 explicit ValuePostingSource(Xapian::valueno slot_); 483 484 Xapian::doccount get_termfreq_min() const; 485 Xapian::doccount get_termfreq_est() const; 486 Xapian::doccount get_termfreq_max() const; 487 488 void next(double min_wt); 489 void skip_to(Xapian::docid min_docid, double min_wt); 490 bool check(Xapian::docid min_docid, double min_wt); 491 492 bool at_end() const; 493 494 Xapian::docid get_docid() const; 495 496 void init(const Database & db_); 497 498 /** The database we're reading values from. 499 * 500 * Added in 1.2.23 and 1.3.5. 501 */ get_database()502 Xapian::Database get_database() const { return real_db; } 503 504 /** The slot we're reading values from. 505 * 506 * Added in 1.2.23 and 1.3.5. 507 */ get_slot()508 Xapian::valueno get_slot() const { return real_slot; } 509 510 /** Read current value. 511 * 512 * Added in 1.2.23 and 1.3.5. 513 */ get_value()514 std::string get_value() const { return *real_value_it; } 515 516 /** End the iteration. 517 * 518 * Calls to at_end() will return true after calling this method. 519 * 520 * Added in 1.2.23 and 1.3.5. 521 */ done()522 void done() { 523 real_value_it = real_db.valuestream_end(real_slot); 524 real_started = true; 525 } 526 527 /** Flag indicating if we've started (true if we have). 528 * 529 * Added in 1.2.23 and 1.3.5. 530 */ get_started()531 bool get_started() const { return real_started; } 532 533 /** Set a lower bound on the term frequency. 534 * 535 * Subclasses should set this if they are overriding the next(), skip_to() 536 * or check() methods to return fewer documents. 537 * 538 * Added in 1.2.23 and 1.3.5. 539 */ set_termfreq_min(Xapian::doccount termfreq_min_)540 void set_termfreq_min(Xapian::doccount termfreq_min_) { 541 real_termfreq_min = termfreq_min_; 542 } 543 544 /** An estimate of the term frequency. 545 * 546 * Subclasses should set this if they are overriding the next(), skip_to() 547 * or check() methods. 548 * 549 * Added in 1.2.23 and 1.3.5. 550 */ set_termfreq_est(Xapian::doccount termfreq_est_)551 void set_termfreq_est(Xapian::doccount termfreq_est_) { 552 real_termfreq_est = termfreq_est_; 553 } 554 555 /** An upper bound on the term frequency. 556 * 557 * Subclasses should set this if they are overriding the next(), skip_to() 558 * or check() methods. 559 * 560 * Added in 1.2.23 and 1.3.5. 561 */ set_termfreq_max(Xapian::doccount termfreq_max_)562 void set_termfreq_max(Xapian::doccount termfreq_max_) { 563 real_termfreq_max = termfreq_max_; 564 } 565 }; 566 567 568 /** A posting source which reads weights from a value slot. 569 * 570 * This returns entries for all documents in the given database which have a 571 * non empty values in the specified slot. It returns a weight calculated by 572 * applying sortable_unserialise to the value stored in the slot (so the 573 * values stored should probably have been calculated by applying 574 * sortable_serialise to a floating point number at index time). 575 * 576 * The upper bound on the weight returned is set using the upper bound on the 577 * values in the specified slot, or DBL_MAX if value bounds aren't supported 578 * by the current backend. 579 * 580 * For efficiency, this posting source doesn't check that the stored values 581 * are valid in any way, so it will never raise an exception due to invalid 582 * stored values. In particular, it doesn't ensure that the unserialised 583 * values are positive, which is a requirement for weights. The behaviour if 584 * the slot contains values which unserialise to negative values is undefined. 585 */ 586 class XAPIAN_VISIBILITY_DEFAULT ValueWeightPostingSource 587 : public ValuePostingSource { 588 public: 589 /** Construct a ValueWeightPostingSource. 590 * 591 * @param slot_ The value slot to read values from. 592 */ 593 explicit ValueWeightPostingSource(Xapian::valueno slot_); 594 595 double get_weight() const; 596 ValueWeightPostingSource * clone() const; 597 std::string name() const; 598 std::string serialise() const; 599 ValueWeightPostingSource * unserialise(const std::string &serialised) const; 600 void init(const Database & db_); 601 602 std::string get_description() const; 603 }; 604 605 606 /** Read weights from a value which is known to decrease as docid increases. 607 * 608 * This posting source can be used, like ValueWeightPostingSource, to add a 609 * weight contribution to a query based on the values stored in a slot. The 610 * values in the slot must be serialised as by @a sortable_serialise(). 611 * 612 * However, this posting source is additionally given a range of document IDs, 613 * within which the weight is known to be decreasing. ie, for all documents 614 * with ids A and B within this range (including the endpoints), where A is 615 * less than B, the weight of A is less than or equal to the weight of B. 616 * This can allow the posting source to skip to the end of the range quickly 617 * if insufficient weight is left in the posting source for a particular 618 * source. 619 * 620 * By default, the range is assumed to cover all document IDs. 621 * 622 * The ordering property can be arranged at index time, or by sorting an 623 * indexed database to produce a new, sorted, database. 624 */ 625 class XAPIAN_VISIBILITY_DEFAULT DecreasingValueWeightPostingSource 626 : public Xapian::ValueWeightPostingSource { 627 protected: 628 /** Start of range of docids for which weights are known to be decreasing. 629 * 630 * 0 => first docid. 631 */ 632 Xapian::docid range_start; 633 634 /** End of range of docids for which weights are known to be decreasing. 635 * 636 * 0 => last docid. 637 */ 638 Xapian::docid range_end; 639 640 /// Weight at current position. 641 double curr_weight; 642 643 /// Flag, set to true if there are docs after the end of the range. 644 bool items_at_end; 645 646 /// Skip the iterator forward if in the decreasing range, and weight is low. 647 void skip_if_in_range(double min_wt); 648 649 public: 650 /** Construct a DecreasingValueWeightPostingSource. 651 * 652 * @param slot_ The value slot to read values from. 653 * @param range_start_ Start of range of docids for which weights are 654 * known to be decreasing (default: first docid) 655 * @param range_end_ End of range of docids for which weights are 656 * known to be decreasing (default: last docid) 657 */ 658 DecreasingValueWeightPostingSource(Xapian::valueno slot_, 659 Xapian::docid range_start_ = 0, 660 Xapian::docid range_end_ = 0); 661 662 double get_weight() const; 663 DecreasingValueWeightPostingSource * clone() const; 664 std::string name() const; 665 std::string serialise() const; 666 DecreasingValueWeightPostingSource * unserialise(const std::string &serialised) const; 667 void init(const Xapian::Database & db_); 668 669 void next(double min_wt); 670 void skip_to(Xapian::docid min_docid, double min_wt); 671 bool check(Xapian::docid min_docid, double min_wt); 672 673 std::string get_description() const; 674 }; 675 676 677 /** A posting source which looks up weights in a map using values as the key. 678 * 679 * This allows will return entries for all documents in the given database 680 * which have a value in the slot specified. The values will be mapped to the 681 * corresponding weight in the weight map. If there is no mapping for a 682 * particular value, the default weight will be returned (which itself 683 * defaults to 0.0). 684 */ 685 class XAPIAN_VISIBILITY_DEFAULT ValueMapPostingSource 686 : public ValuePostingSource { 687 /// The default weight 688 double default_weight; 689 690 /// The maximum weight in weight_map. 691 double max_weight_in_map; 692 693 /// The value -> weight map 694 std::map<std::string, double> weight_map; 695 696 public: 697 /** Construct a ValueMapPostingSource. 698 * 699 * @param slot_ The value slot to read values from. 700 */ 701 explicit ValueMapPostingSource(Xapian::valueno slot_); 702 703 /** Add a mapping. 704 * 705 * @param key The key looked up from the value slot. 706 * @param wt The weight to give this key. 707 */ 708 void add_mapping(const std::string &key, double wt); 709 710 /** Clear all mappings. */ 711 void clear_mappings(); 712 713 /** Set a default weight for document values not in the map. 714 * 715 * @param wt The weight to set as the default. 716 */ 717 void set_default_weight(double wt); 718 719 double get_weight() const; 720 ValueMapPostingSource * clone() const; 721 std::string name() const; 722 std::string serialise() const; 723 ValueMapPostingSource * unserialise(const std::string &serialised) const; 724 void init(const Database & db_); 725 726 std::string get_description() const; 727 }; 728 729 730 /** A posting source which returns a fixed weight for all documents. 731 * 732 * This returns entries for all documents in the given database, with a fixed 733 * weight (specified by a parameter to the constructor). 734 */ 735 class XAPIAN_VISIBILITY_DEFAULT FixedWeightPostingSource : public PostingSource { 736 /// The database we're reading documents from. 737 Xapian::Database db; 738 739 /// Number of documents in the posting source. 740 Xapian::doccount termfreq; 741 742 /// Iterator over all documents. 743 Xapian::PostingIterator it; 744 745 /// Flag indicating if we've started (true if we have). 746 bool started; 747 748 /// The docid last passed to check() (0 if check() wasn't the last move). 749 Xapian::docid check_docid; 750 751 public: 752 /** Construct a FixedWeightPostingSource. 753 * 754 * @param wt The fixed weight to return. 755 */ 756 explicit FixedWeightPostingSource(double wt); 757 758 Xapian::doccount get_termfreq_min() const; 759 Xapian::doccount get_termfreq_est() const; 760 Xapian::doccount get_termfreq_max() const; 761 762 double get_weight() const; 763 764 void next(double min_wt); 765 void skip_to(Xapian::docid min_docid, double min_wt); 766 bool check(Xapian::docid min_docid, double min_wt); 767 768 bool at_end() const; 769 770 Xapian::docid get_docid() const; 771 772 FixedWeightPostingSource * clone() const; 773 std::string name() const; 774 std::string serialise() const; 775 FixedWeightPostingSource * unserialise(const std::string &serialised) const; 776 void init(const Database & db_); 777 778 std::string get_description() const; 779 }; 780 781 } 782 783 #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H 784