1 /* andmaybepostlist.h: Merged postlist: items from one list, weights from both
2  *
3  * AND MAYBE of two posting lists
4  * A AND MAYBE B is logically just A, but we keep B around for weight purposes
5  *
6  * Copyright 1999,2000,2001 BrightStation PLC
7  * Copyright 2002 Ananova Ltd
8  * Copyright 2003,2004,2009 Olly Betts
9  * Copyright 2009 Lemur Consulting Ltd
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License as
13  * published by the Free Software Foundation; either version 2 of the
14  * License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
24  * USA
25  */
26 
27 
28 #ifndef OM_HGUARD_ANDMAYBEPOSTLIST_H
29 #define OM_HGUARD_ANDMAYBEPOSTLIST_H
30 
31 #include "branchpostlist.h"
32 
33 /** A postlist with weights modified by another postlist.
34  *
35  *  This postlist returns a posting if and only if it is in the left
36  *  sub-postlist.
37  *
38  *  If the posting does not occur in the right postlist, the weight for the
39  *  posting is simply that in the left postlist.  If the posting occurs in
40  *  both postlists, the weight for the posting is the sum of the weights in
41  *  the sub-postlists.
42  *
43  *  This type of postlist is useful for specifying a set of terms which
44  *  must appear in the query result: these terms can be specified as the
45  *  left hand argument, with the rest of the query being on the right hand
46  *  side, and having the effect of modifying the weights.
47  *
48  *  The postlist is also used as a "decay product" of other postlist types
49  *  during the match process: when a postlist can no longer cause a
50  *  document to enter the mset on its own, but can influence relative
51  *  rankings, it may be combined using one of these.
52  */
53 class AndMaybePostList : public BranchPostList {
54     private:
55 	Xapian::doccount dbsize; // only need in case we decay to an AndPostList
56 	Xapian::docid lhead, rhead;
57 	Xapian::weight lmax, rmax;
58 
59         PostList * process_next_or_skip_to(Xapian::weight w_min, PostList *ret);
60     public:
61 	Xapian::doccount get_termfreq_max() const;
62 	Xapian::doccount get_termfreq_min() const;
63 	Xapian::doccount get_termfreq_est() const;
64 
65 	TermFreqs get_termfreq_est_using_stats(
66 	    const Xapian::Weight::Internal & stats) const;
67 
68 	Xapian::docid  get_docid() const;
69 	Xapian::weight get_weight() const;
70 	Xapian::weight get_maxweight() const;
71 
72         Xapian::weight recalc_maxweight();
73 
74 	PostList *next(Xapian::weight w_min);
75 	PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
76 	bool   at_end() const;
77 
78 	std::string get_description() const;
79 
80 	/** Return the document length of the document the current term
81 	 *  comes from.
82 	 */
83 	virtual Xapian::termcount get_doclength() const;
84 
AndMaybePostList(PostList * left_,PostList * right_,MultiMatch * matcher_,Xapian::doccount dbsize_)85         AndMaybePostList(PostList *left_,
86 			 PostList *right_,
87 			 MultiMatch *matcher_,
88 			 Xapian::doccount dbsize_)
89 		: BranchPostList(left_, right_, matcher_),
90 		  dbsize(dbsize_), lhead(0), rhead(0)
91 	{
92 	    // lmax and rmax will get initialised by a recalc_maxweight
93 	}
94 
95 	/// Constructor for use by decomposing OrPostList
AndMaybePostList(PostList * left_,PostList * right_,MultiMatch * matcher_,Xapian::doccount dbsize_,Xapian::docid lhead_,Xapian::docid rhead_)96         AndMaybePostList(PostList *left_,
97 			 PostList *right_,
98 			 MultiMatch *matcher_,
99 			 Xapian::doccount dbsize_,
100 			 Xapian::docid lhead_,
101 			 Xapian::docid rhead_)
102 		: BranchPostList(left_, right_, matcher_),
103 		  dbsize(dbsize_), lhead(lhead_), rhead(rhead_)
104 	{
105 	    // Initialise the maxweights from the kids so we can avoid forcing
106 	    // a full maxweight recalc
107 	    lmax = l->get_maxweight();
108 	    rmax = r->get_maxweight();
109 	}
110 
111 	/** Synchronise the RHS to the LHS after construction.
112 	 *  Used after constructing from a decomposing OrPostList
113 	 */
114 	PostList * sync_rhs(Xapian::weight w_min);
115 
116 	/** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the
117 	 *  sub postlists which are at the current document - this is desirable
118 	 *  when the ANDMAYBE is part of a synonym.
119 	 */
120 	Xapian::termcount get_wdf() const;
121 
122 	Xapian::termcount count_matching_subqs() const;
123 };
124 
125 #endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */
126