1 /** @file
2  *  @brief External sources of posting information
3  */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H
23 #define XAPIAN_INCLUDED_POSTINGSOURCE_H
24 
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error Never use <xapian/postingsource.h> directly; include <xapian.h> instead.
27 #endif
28 
29 #include <xapian/attributes.h>
30 #include <xapian/database.h>
31 #include <xapian/deprecated.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/types.h>
35 #include <xapian/valueiterator.h>
36 #include <xapian/visibility.h>
37 
38 #include <string>
39 #include <map>
40 
41 namespace Xapian {
42 
43 class Registry;
44 
45 /** Base class which provides an "external" source of postings.
46  */
47 class XAPIAN_VISIBILITY_DEFAULT PostingSource
48     : public Xapian::Internal::opt_intrusive_base {
49     /// Don't allow assignment.
50     void operator=(const PostingSource &);
51 
52     /// Don't allow copying.
53     PostingSource(const PostingSource &);
54 
55     /// The current upper bound on what get_weight() can return.
56     double max_weight_;
57 
58     /** The object to inform of maxweight changes.
59      *
60      *  We store this as a (void*) to avoid needing to declare an internal
61      *  type in an external header.  It's actually (MultiMatch *).
62      */
63     void * matcher_;
64 
65   public:
66     /// Allow subclasses to be instantiated.
XAPIAN_NOTHROW(PostingSource ())67     XAPIAN_NOTHROW(PostingSource())
68 	: max_weight_(0), matcher_(NULL) { }
69 
70     /** @private @internal Set the object to inform of maxweight changes.
71      *
72      *  This method is for internal use only - it would be private except that
73      *  would force us to forward declare an internal class in an external API
74      *  header just to make it a friend.
75      */
register_matcher_(void * matcher)76     void register_matcher_(void * matcher) { matcher_ = matcher; }
77 
78     // Destructor.
79     virtual ~PostingSource();
80 
81     /** A lower bound on the number of documents this object can return.
82      *
83      *  Xapian will always call init() on a PostingSource before calling this
84      *  for the first time.
85      */
86     virtual Xapian::doccount get_termfreq_min() const = 0;
87 
88     /** An estimate of the number of documents this object can return.
89      *
90      *  It must always be true that:
91      *
92      *  get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
93      *
94      *  Xapian will always call init() on a PostingSource before calling this
95      *  for the first time.
96      */
97     virtual Xapian::doccount get_termfreq_est() const = 0;
98 
99     /** An upper bound on the number of documents this object can return.
100      *
101      *  Xapian will always call init() on a PostingSource before calling this
102      *  for the first time.
103      */
104     virtual Xapian::doccount get_termfreq_max() const = 0;
105 
106     /** Specify an upper bound on what get_weight() will return from now on.
107      *
108      *  This upper bound is used by the matcher to perform various
109      *  optimisations, so if you can return a good bound, then matches
110      *  will generally run faster.
111      *
112      *  This method should be called after calling init(), and may be called
113      *  during iteration if the upper bound drops.  It is probably only useful
114      *  to call from subclasses (it was actually a "protected" method prior to
115      *  Xapian 1.3.4, but that makes it tricky to wrap for other languages).
116      *
117      *  It is valid for the posting source to have returned a higher value from
118      *  get_weight() earlier in the iteration, but the posting source must not
119      *  return a higher value from get_weight() than the currently set upper
120      *  bound, and the upper bound must not be increased (until init() has been
121      *  called).
122      *
123      *  If you don't call this method, the upper bound will default to 0, for
124      *  convenience when implementing "weight-less" PostingSource subclasses.
125      *
126      *  @param max_weight	The upper bound to set.
127      */
128     void set_maxweight(double max_weight);
129 
130     /// Return the currently set upper bound on what get_weight() can return.
XAPIAN_NOTHROW(get_maxweight ()const)131     double XAPIAN_NOTHROW(get_maxweight() const) { return max_weight_; }
132 
133     /** Return the weight contribution for the current document.
134      *
135      *  This default implementation always returns 0, for convenience when
136      *  implementing "weight-less" PostingSource subclasses.
137      *
138      *  This method may assume that it will only be called when there is a
139      *  "current document".  In detail: Xapian will always call init() on a
140      *  PostingSource before calling this for the first time.  It will also
141      *  only call this if the PostingSource reports that it is pointing to a
142      *  valid document (ie, it will not call it before calling at least one of
143      *  next(), skip_to() or check(), and will ensure that the PostingSource is
144      *  not at the end by calling at_end()).
145      */
146     virtual double get_weight() const;
147 
148     /** Return the current docid.
149      *
150      *  This method may assume that it will only be called when there is a
151      *  "current document".  See @a get_weight() for details.
152      *
153      *  Note: in the case of a multi-database search, the returned docid should
154      *  be in the single subdatabase relevant to this posting source.  See the
155      *  @a init() method for details.
156      */
157     virtual Xapian::docid get_docid() const = 0;
158 
159     /** Advance the current position to the next matching document.
160      *
161      *  The PostingSource starts before the first entry in the list, so next(),
162      *  skip_to() or check() must be called before any methods which need the
163      *  context of the current position.
164      *
165      *  Xapian will always call init() on a PostingSource before calling this
166      *  for the first time.
167      *
168      *  @param min_wt	The minimum weight contribution that is needed (this is
169      *			just a hint which subclasses may ignore).
170      */
171     virtual void next(double min_wt) = 0;
172 
173     /** Advance to the specified docid.
174      *
175      *  If the specified docid isn't in the list, position ourselves on the
176      *  first document after it (or at_end() if no greater docids are present).
177      *
178      *  If the current position is already the specified docid, this method will
179      *  leave the position unmodified.
180      *
181      *  If the specified docid is earlier than the current position, the
182      *  behaviour is unspecified.  A sensible behaviour would be to leave the
183      *  current position unmodified, but it is also reasonable to move to the
184      *  specified docid.
185      *
186      *  The default implementation calls next() repeatedly, which works but
187      *  skip_to() can often be implemented much more efficiently.
188      *
189      *  Xapian will always call init() on a PostingSource before calling this
190      *  for the first time.
191      *
192      *  Note: in the case of a multi-database search, the docid specified is
193      *  the docid in the single subdatabase relevant to this posting source.
194      *  See the @a init() method for details.
195      *
196      *  @param did	The document id to advance to.
197      *  @param min_wt	The minimum weight contribution that is needed (this is
198      *			just a hint which subclasses may ignore).
199      */
200     virtual void skip_to(Xapian::docid did, double min_wt);
201 
202     /** Check if the specified docid occurs.
203      *
204      *  The caller is required to ensure that the specified document id @a did
205      *  actually exists in the database.  If it does, it must move to that
206      *  document id, and return true.  If it does not, it may either:
207      *
208      *   - return true, having moved to a definite position (including
209      *   "at_end"), which must be the same position as skip_to() would have
210      *   moved to.
211      *
212      *  or
213      *
214      *   - return false, having moved to an "indeterminate" position, such that
215      *   a subsequent call to next() or skip_to() will move to the next
216      *   matching position after @a did.
217      *
218      *  Generally, this method should act like skip_to() and return true if
219      *  that can be done at little extra cost.
220      *
221      *  Otherwise it should simply check if a particular docid is present,
222      *  returning true if it is, and false if it isn't.
223      *
224      *  The default implementation calls skip_to() and always returns true.
225      *
226      *  Xapian will always call init() on a PostingSource before calling this
227      *  for the first time.
228      *
229      *  Note: in the case of a multi-database search, the docid specified is
230      *  the docid in the single subdatabase relevant to this posting source.
231      *  See the @a init() method for details.
232      *
233      *  @param did	The document id to check.
234      *  @param min_wt	The minimum weight contribution that is needed (this is
235      *			just a hint which subclasses may ignore).
236      */
237     virtual bool check(Xapian::docid did, double min_wt);
238 
239     /** Return true if the current position is past the last entry in this list.
240      *
241      *  At least one of @a next(), @a skip_to() or @a check() will be called
242      *  before this method is first called.
243      */
244     virtual bool at_end() const = 0;
245 
246     /** Clone the posting source.
247      *
248      *  The clone should inherit the configuration of the parent, but need not
249      *  inherit the state.  ie, the clone does not need to be in the same
250      *  iteration position as the original: the matcher will always call
251      *  init() on the clone before attempting to move the iterator, or read
252      *  the information about the current position of the iterator.
253      *
254      *  This may return NULL to indicate that cloning is not supported.  In
255      *  this case, the PostingSource may only be used with a single-database
256      *  search.
257      *
258      *  The default implementation returns NULL.
259      *
260      *  Note that the returned object will be deallocated by Xapian after use
261      *  with "delete".  If you want to handle the deletion in a special way
262      *  (for example when wrapping the Xapian API for use from another
263      *  language) then you can define a static <code>operator delete</code>
264      *  method in your subclass as shown here:
265      *  https://trac.xapian.org/ticket/554#comment:1
266      */
267     virtual PostingSource * clone() const;
268 
269     /** Name of the posting source class.
270      *
271      *  This is used when serialising and unserialising posting sources; for
272      *  example, for performing remote searches.
273      *
274      *  If the subclass is in a C++ namespace, the namespace should be included
275      *  in the name, using "::" as a separator.  For example, for a
276      *  PostingSource subclass called "FooPostingSource" in the "Xapian"
277      *  namespace the result of this call should be "Xapian::FooPostingSource".
278      *
279      *  This should only be implemented if serialise() and unserialise() are
280      *  also implemented.  The default implementation returns an empty string.
281      *
282      *  If this returns an empty string, Xapian will assume that serialise()
283      *  and unserialise() are not implemented.
284      */
285     virtual std::string name() const;
286 
287     /** Serialise object parameters into a string.
288      *
289      *  The serialised parameters should represent the configuration of the
290      *  posting source, but need not (indeed, should not) represent the current
291      *  iteration state.
292      *
293      *  If you don't want to support the remote backend, you can use the
294      *  default implementation which simply throws Xapian::UnimplementedError.
295      */
296     virtual std::string serialise() const;
297 
298     /** Create object given string serialisation returned by serialise().
299      *
300      *  Note that the returned object will be deallocated by Xapian after use
301      *  with "delete".  If you want to handle the deletion in a special way
302      *  (for example when wrapping the Xapian API for use from another
303      *  language) then you can define a static <code>operator delete</code>
304      *  method in your subclass as shown here:
305      *  https://trac.xapian.org/ticket/554#comment:1
306      *
307      *  If you don't want to support the remote backend, you can use the
308      *  default implementation which simply throws Xapian::UnimplementedError.
309      *
310      *  @param serialised A serialised instance of this PostingSource subclass.
311      */
312     virtual PostingSource * unserialise(const std::string &serialised) const;
313 
314     /** Create object given string serialisation returned by serialise().
315      *
316      *  Note that the returned object will be deallocated by Xapian after use
317      *  with "delete".  If you want to handle the deletion in a special way
318      *  (for example when wrapping the Xapian API for use from another
319      *  language) then you can define a static <code>operator delete</code>
320      *  method in your subclass as shown here:
321      *  https://trac.xapian.org/ticket/554#comment:1
322      *
323      *  This method is supplied with a Registry object, which can be used when
324      *  unserialising objects contained within the posting source.  The default
325      *  implementation simply calls unserialise() which doesn't take the
326      *  Registry object, so you do not need to implement this method unless you
327      *  want to take advantage of the Registry object when unserialising.
328      *
329      *  @param serialised A serialised instance of this PostingSource subclass.
330      *  @param registry   The Xapian::Registry object to use.
331      */
332     virtual PostingSource * unserialise_with_registry(const std::string &serialised,
333 				      const Registry & registry) const;
334 
335     /** Set this PostingSource to the start of the list of postings.
336      *
337      *  This is called automatically by the matcher prior to each query being
338      *  processed.
339      *
340      *  If a PostingSource is used for multiple searches, @a init() will
341      *  therefore be called multiple times, and must handle this by using the
342      *  database passed in the most recent call.
343      *
344      *  @param db The database which the PostingSource should iterate through.
345      *
346      *  Note: in the case of a multi-database search, a separate PostingSource
347      *  will be used for each database (the separate PostingSources will be
348      *  obtained using @a clone()), and each PostingSource will be passed one of
349      *  the sub-databases as the @a db parameter here.  The @a db parameter
350      *  will therefore always refer to a single database.  All docids passed
351      *  to, or returned from, the PostingSource refer to docids in that single
352      *  database, rather than in the multi-database.
353      */
354     virtual void init(const Database & db) = 0;
355 
356     /** Return a string describing this object.
357      *
358      *  This default implementation returns a generic answer.  This default
359      *  it provided to avoid forcing those deriving their own PostingSource
360      *  subclass from having to implement this (they may not care what
361      *  get_description() gives for their subclass).
362      */
363     virtual std::string get_description() const;
364 
365     /** Start reference counting this object.
366      *
367      *  You can hand ownership of a dynamically allocated PostingSource
368      *  object to Xapian by calling release() and then passing the object to a
369      *  Xapian method.  Xapian will arrange to delete the object once it is no
370      *  longer required.
371      */
release()372     PostingSource * release() {
373 	opt_intrusive_base::release();
374 	return this;
375     }
376 
377     /** Start reference counting this object.
378      *
379      *  You can hand ownership of a dynamically allocated PostingSource
380      *  object to Xapian by calling release() and then passing the object to a
381      *  Xapian method.  Xapian will arrange to delete the object once it is no
382      *  longer required.
383      */
release()384     const PostingSource * release() const {
385 	opt_intrusive_base::release();
386 	return this;
387     }
388 };
389 
390 
391 /** A posting source which generates weights from a value slot.
392  *
393  *  This is a base class for classes which generate weights using values stored
394  *  in the specified slot. For example, ValueWeightPostingSource uses
395  *  sortable_unserialise to convert values directly to weights.
396  *
397  *  The upper bound on the weight returned is set to DBL_MAX.  Subclasses
398  *  should call set_maxweight() in their init() methods after calling
399  *  ValuePostingSource::init() if they know a tighter bound on the weight.
400  */
401 class XAPIAN_VISIBILITY_DEFAULT ValuePostingSource : public PostingSource {
402     // We want to give a deprecation warning for uses of the members from user
403     // code, but we also want to be able to inline functions to access them,
404     // without those functions generating deprecated warnings.  To achieve
405     // this, we make the old names references to members with a "real_" prefix
406     // and then use the latter in the inlined accessor functions.  The
407     // constructor initialises all the references to point to their "real_"
408     // counterparts.
409     Xapian::Database real_db;
410 
411     Xapian::valueno real_slot;
412 
413     Xapian::ValueIterator real_value_it;
414 
415     bool real_started;
416 
417     Xapian::doccount real_termfreq_min;
418 
419     Xapian::doccount real_termfreq_est;
420 
421     Xapian::doccount real_termfreq_max;
422 
423   protected:
424     /** The database we're reading values from.
425      *
426      *  @deprecated Use @a get_database() in preference.
427      */
428     XAPIAN_DEPRECATED(Xapian::Database& db);
429 
430     /** The slot we're reading values from.
431      *
432      *  @deprecated Use @a get_slot() in preference.
433      */
434     XAPIAN_DEPRECATED(Xapian::valueno& slot);
435 
436     /** Value stream iterator.
437      *
438      *  @deprecated Use @a get_value() in preference to *value_it, and other
439      *  methods of ValuePostingSource in preference to calling methods of
440      *  value_it.
441      */
442     XAPIAN_DEPRECATED(Xapian::ValueIterator& value_it);
443 
444     /** Flag indicating if we've started (true if we have).
445      *
446      *  @deprecated Use @a get_started() in preference.
447      */
448     XAPIAN_DEPRECATED(bool& started);
449 
450     /** A lower bound on the term frequency.
451      *
452      *  Subclasses should set this if they are overriding the next(), skip_to()
453      *  or check() methods to return fewer documents.
454      *
455      *  @deprecated Use @a set_termfreq_min() in preference.
456      */
457     XAPIAN_DEPRECATED(Xapian::doccount& termfreq_min);
458 
459     /** An estimate of the term frequency.
460      *
461      *  Subclasses should set this if they are overriding the next(), skip_to()
462      *  or check() methods.
463      *
464      *  @deprecated Use @a set_termfreq_est() in preference.
465      */
466     XAPIAN_DEPRECATED(Xapian::doccount& termfreq_est);
467 
468     /** An upper bound on the term frequency.
469      *
470      *  Subclasses should set this if they are overriding the next(), skip_to()
471      *  or check() methods.
472      *
473      *  @deprecated Use @a set_termfreq_max() in preference.
474      */
475     XAPIAN_DEPRECATED(Xapian::doccount& termfreq_max);
476 
477   public:
478     /** Construct a ValuePostingSource.
479      *
480      *  @param slot_ The value slot to read values from.
481      */
482     explicit ValuePostingSource(Xapian::valueno slot_);
483 
484     Xapian::doccount get_termfreq_min() const;
485     Xapian::doccount get_termfreq_est() const;
486     Xapian::doccount get_termfreq_max() const;
487 
488     void next(double min_wt);
489     void skip_to(Xapian::docid min_docid, double min_wt);
490     bool check(Xapian::docid min_docid, double min_wt);
491 
492     bool at_end() const;
493 
494     Xapian::docid get_docid() const;
495 
496     void init(const Database & db_);
497 
498     /** The database we're reading values from.
499      *
500      *  Added in 1.2.23 and 1.3.5.
501      */
get_database()502     Xapian::Database get_database() const { return real_db; }
503 
504     /** The slot we're reading values from.
505      *
506      *  Added in 1.2.23 and 1.3.5.
507      */
get_slot()508     Xapian::valueno get_slot() const { return real_slot; }
509 
510     /** Read current value.
511      *
512      *  Added in 1.2.23 and 1.3.5.
513      */
get_value()514     std::string get_value() const { return *real_value_it; }
515 
516     /** End the iteration.
517      *
518      *  Calls to at_end() will return true after calling this method.
519      *
520      *  Added in 1.2.23 and 1.3.5.
521      */
done()522     void done() {
523 	real_value_it = real_db.valuestream_end(real_slot);
524 	real_started = true;
525     }
526 
527     /** Flag indicating if we've started (true if we have).
528      *
529      *  Added in 1.2.23 and 1.3.5.
530      */
get_started()531     bool get_started() const { return real_started; }
532 
533     /** Set a lower bound on the term frequency.
534      *
535      *  Subclasses should set this if they are overriding the next(), skip_to()
536      *  or check() methods to return fewer documents.
537      *
538      *  Added in 1.2.23 and 1.3.5.
539      */
set_termfreq_min(Xapian::doccount termfreq_min_)540     void set_termfreq_min(Xapian::doccount termfreq_min_) {
541 	real_termfreq_min = termfreq_min_;
542     }
543 
544     /** An estimate of the term frequency.
545      *
546      *  Subclasses should set this if they are overriding the next(), skip_to()
547      *  or check() methods.
548      *
549      *  Added in 1.2.23 and 1.3.5.
550      */
set_termfreq_est(Xapian::doccount termfreq_est_)551     void set_termfreq_est(Xapian::doccount termfreq_est_) {
552 	real_termfreq_est = termfreq_est_;
553     }
554 
555     /** An upper bound on the term frequency.
556      *
557      *  Subclasses should set this if they are overriding the next(), skip_to()
558      *  or check() methods.
559      *
560      *  Added in 1.2.23 and 1.3.5.
561      */
set_termfreq_max(Xapian::doccount termfreq_max_)562     void set_termfreq_max(Xapian::doccount termfreq_max_) {
563 	real_termfreq_max = termfreq_max_;
564     }
565 };
566 
567 
568 /** A posting source which reads weights from a value slot.
569  *
570  *  This returns entries for all documents in the given database which have a
571  *  non empty values in the specified slot.  It returns a weight calculated by
572  *  applying sortable_unserialise to the value stored in the slot (so the
573  *  values stored should probably have been calculated by applying
574  *  sortable_serialise to a floating point number at index time).
575  *
576  *  The upper bound on the weight returned is set using the upper bound on the
577  *  values in the specified slot, or DBL_MAX if value bounds aren't supported
578  *  by the current backend.
579  *
580  *  For efficiency, this posting source doesn't check that the stored values
581  *  are valid in any way, so it will never raise an exception due to invalid
582  *  stored values.  In particular, it doesn't ensure that the unserialised
583  *  values are positive, which is a requirement for weights.  The behaviour if
584  *  the slot contains values which unserialise to negative values is undefined.
585  */
586 class XAPIAN_VISIBILITY_DEFAULT ValueWeightPostingSource
587 	: public ValuePostingSource {
588   public:
589     /** Construct a ValueWeightPostingSource.
590      *
591      *  @param slot_ The value slot to read values from.
592      */
593     explicit ValueWeightPostingSource(Xapian::valueno slot_);
594 
595     double get_weight() const;
596     ValueWeightPostingSource * clone() const;
597     std::string name() const;
598     std::string serialise() const;
599     ValueWeightPostingSource * unserialise(const std::string &serialised) const;
600     void init(const Database & db_);
601 
602     std::string get_description() const;
603 };
604 
605 
606 /** Read weights from a value which is known to decrease as docid increases.
607  *
608  *  This posting source can be used, like ValueWeightPostingSource, to add a
609  *  weight contribution to a query based on the values stored in a slot.  The
610  *  values in the slot must be serialised as by @a sortable_serialise().
611  *
612  *  However, this posting source is additionally given a range of document IDs,
613  *  within which the weight is known to be decreasing.  ie, for all documents
614  *  with ids A and B within this range (including the endpoints), where A is
615  *  less than B, the weight of A is less than or equal to the weight of B.
616  *  This can allow the posting source to skip to the end of the range quickly
617  *  if insufficient weight is left in the posting source for a particular
618  *  source.
619  *
620  *  By default, the range is assumed to cover all document IDs.
621  *
622  *  The ordering property can be arranged at index time, or by sorting an
623  *  indexed database to produce a new, sorted, database.
624  */
625 class XAPIAN_VISIBILITY_DEFAULT DecreasingValueWeightPostingSource
626 	: public Xapian::ValueWeightPostingSource {
627   protected:
628     /** Start of range of docids for which weights are known to be decreasing.
629      *
630      *  0 => first docid.
631      */
632     Xapian::docid range_start;
633 
634     /** End of range of docids for which weights are known to be decreasing.
635      *
636      *  0 => last docid.
637      */
638     Xapian::docid range_end;
639 
640     /// Weight at current position.
641     double curr_weight;
642 
643     /// Flag, set to true if there are docs after the end of the range.
644     bool items_at_end;
645 
646     /// Skip the iterator forward if in the decreasing range, and weight is low.
647     void skip_if_in_range(double min_wt);
648 
649   public:
650     /** Construct a DecreasingValueWeightPostingSource.
651      *
652      *  @param slot_ The value slot to read values from.
653      *  @param range_start_ Start of range of docids for which weights are
654      *			known to be decreasing (default: first docid)
655      *  @param range_end_ End of range of docids for which weights are
656      *			known to be decreasing (default: last docid)
657      */
658     DecreasingValueWeightPostingSource(Xapian::valueno slot_,
659 				       Xapian::docid range_start_ = 0,
660 				       Xapian::docid range_end_ = 0);
661 
662     double get_weight() const;
663     DecreasingValueWeightPostingSource * clone() const;
664     std::string name() const;
665     std::string serialise() const;
666     DecreasingValueWeightPostingSource * unserialise(const std::string &serialised) const;
667     void init(const Xapian::Database & db_);
668 
669     void next(double min_wt);
670     void skip_to(Xapian::docid min_docid, double min_wt);
671     bool check(Xapian::docid min_docid, double min_wt);
672 
673     std::string get_description() const;
674 };
675 
676 
677 /** A posting source which looks up weights in a map using values as the key.
678  *
679  *  This allows will return entries for all documents in the given database
680  *  which have a value in the slot specified.  The values will be mapped to the
681  *  corresponding weight in the weight map. If there is no mapping for a
682  *  particular value, the default weight will be returned (which itself
683  *  defaults to 0.0).
684  */
685 class XAPIAN_VISIBILITY_DEFAULT ValueMapPostingSource
686 	: public ValuePostingSource {
687     /// The default weight
688     double default_weight;
689 
690     /// The maximum weight in weight_map.
691     double max_weight_in_map;
692 
693     /// The value -> weight map
694     std::map<std::string, double> weight_map;
695 
696   public:
697     /** Construct a ValueMapPostingSource.
698      *
699      *  @param slot_ The value slot to read values from.
700      */
701     explicit ValueMapPostingSource(Xapian::valueno slot_);
702 
703     /** Add a mapping.
704      *
705      *  @param key The key looked up from the value slot.
706      *  @param wt The weight to give this key.
707      */
708     void add_mapping(const std::string &key, double wt);
709 
710     /** Clear all mappings. */
711     void clear_mappings();
712 
713     /** Set a default weight for document values not in the map.
714      *
715      *  @param wt The weight to set as the default.
716      */
717     void set_default_weight(double wt);
718 
719     double get_weight() const;
720     ValueMapPostingSource * clone() const;
721     std::string name() const;
722     std::string serialise() const;
723     ValueMapPostingSource * unserialise(const std::string &serialised) const;
724     void init(const Database & db_);
725 
726     std::string get_description() const;
727 };
728 
729 
730 /** A posting source which returns a fixed weight for all documents.
731  *
732  *  This returns entries for all documents in the given database, with a fixed
733  *  weight (specified by a parameter to the constructor).
734  */
735 class XAPIAN_VISIBILITY_DEFAULT FixedWeightPostingSource : public PostingSource {
736     /// The database we're reading documents from.
737     Xapian::Database db;
738 
739     /// Number of documents in the posting source.
740     Xapian::doccount termfreq;
741 
742     /// Iterator over all documents.
743     Xapian::PostingIterator it;
744 
745     /// Flag indicating if we've started (true if we have).
746     bool started;
747 
748     /// The docid last passed to check() (0 if check() wasn't the last move).
749     Xapian::docid check_docid;
750 
751   public:
752     /** Construct a FixedWeightPostingSource.
753      *
754      *  @param wt The fixed weight to return.
755      */
756     explicit FixedWeightPostingSource(double wt);
757 
758     Xapian::doccount get_termfreq_min() const;
759     Xapian::doccount get_termfreq_est() const;
760     Xapian::doccount get_termfreq_max() const;
761 
762     double get_weight() const;
763 
764     void next(double min_wt);
765     void skip_to(Xapian::docid min_docid, double min_wt);
766     bool check(Xapian::docid min_docid, double min_wt);
767 
768     bool at_end() const;
769 
770     Xapian::docid get_docid() const;
771 
772     FixedWeightPostingSource * clone() const;
773     std::string name() const;
774     std::string serialise() const;
775     FixedWeightPostingSource * unserialise(const std::string &serialised) const;
776     void init(const Database & db_);
777 
778     std::string get_description() const;
779 };
780 
781 }
782 
783 #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H
784