1 /** @file
2  * @brief tests of MatchSpy usage
3  */
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5  * Copyright 2009,2011,2012,2015,2019 Olly Betts
6  * Copyright 2010 Richard Boulton
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "api_matchspy.h"
27 
28 #include <xapian.h>
29 
30 #include <cmath>
31 #include <map>
32 #include <vector>
33 
34 #include "backendmanager.h"
35 #include "str.h"
36 #include "testsuite.h"
37 #include "testutils.h"
38 #include "apitest.h"
39 
40 using namespace std;
41 
42 // #######################################################################
43 // # Tests start here
44 
45 class SimpleMatchSpy : public Xapian::MatchSpy {
46   public:
47     // Vector which will be filled with all the document contents seen.
48     std::vector<std::string> seen;
49 
operator ()(const Xapian::Document & doc,double)50     void operator()(const Xapian::Document &doc, double) {
51 	// Note that this is not recommended usage of get_data() - you
52 	// generally shouldn't call get_data() from inside a MatchSpy, because
53 	// it is (likely to be) a slow operation resulting in considerable IO.
54 	seen.push_back(doc.get_data());
55     }
56 };
57 
58 // Basic test of a matchspy.
59 DEFINE_TESTCASE(matchspy1, backend && !remote) {
60     Xapian::Database db(get_database("apitest_simpledata"));
61     Xapian::Enquire enquire(db);
62     enquire.set_query(Xapian::Query("this"));
63 
64     SimpleMatchSpy myspy;
65 
66     Xapian::MSet nospymset = enquire.get_mset(0, 100);
67     enquire.add_matchspy(&myspy);
68     Xapian::MSet spymset = enquire.get_mset(0, 100);
69 
70     // Check that the match estimates aren't affected by the matchspy.
71     TEST_EQUAL(nospymset, spymset);
72 
73     vector<bool> docid_checked(db.get_lastdocid());
74 
75     // Check that we get the expected number of matches, and that the stored
76     // document contents are right.
77     Xapian::MSetIterator i = spymset.begin();
78     TEST(i != spymset.end());
79     TEST_EQUAL(spymset.size(), 6);
80     TEST_EQUAL(myspy.seen.size(), spymset.size());
81 
82     std::sort(myspy.seen.begin(), myspy.seen.end());
83 
84     std::vector<std::string> seen2;
85     for ( ; i != spymset.end(); ++i) {
86 	const Xapian::Document doc(i.get_document());
87 	seen2.push_back(doc.get_data());
88     }
89     std::sort(seen2.begin(), seen2.end());
90 
91     TEST_EQUAL(myspy.seen.size(), seen2.size());
92     std::vector<std::string>::const_iterator j = myspy.seen.begin();
93     std::vector<std::string>::const_iterator j2 = seen2.begin();
94     for (; j != myspy.seen.end(); ++j, ++j2) {
95 	TEST_EQUAL(*j, *j2);
96     }
97 }
98 
values_to_repr(const Xapian::ValueCountMatchSpy & spy)99 static string values_to_repr(const Xapian::ValueCountMatchSpy & spy) {
100     string resultrepr("|");
101     for (Xapian::TermIterator i = spy.values_begin();
102 	 i != spy.values_end();
103 	 ++i) {
104 	resultrepr += *i;
105 	resultrepr += ':';
106 	resultrepr += str(i.get_termfreq());
107 	resultrepr += '|';
108     }
109     return resultrepr;
110 }
111 
112 static void
make_matchspy2_db(Xapian::WritableDatabase & db,const string &)113 make_matchspy2_db(Xapian::WritableDatabase &db, const string &)
114 {
115     for (int c = 1; c <= 25; ++c) {
116 	Xapian::Document doc;
117 	doc.set_data("Document " + str(c));
118 	int factors = 0;
119 	for (int factor = 1; factor <= c; ++factor) {
120 	    doc.add_term("all");
121 	    if (c % factor == 0) {
122 		doc.add_term("XFACT" + str(factor));
123 		++factors;
124 	    }
125 	}
126 
127 	// Number of factors.
128 	doc.add_value(0, str(factors));
129 	// Units digits.
130 	doc.add_value(1, str(c % 10));
131 	// Constant.
132 	doc.add_value(2, "fish");
133 	// Number of digits.
134 	doc.add_value(3, str(str(c).size()));
135 
136 	db.add_document(doc);
137     }
138 }
139 
DEFINE_TESTCASE(matchspy2,generated)140 DEFINE_TESTCASE(matchspy2, generated)
141 {
142     Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
143 
144     Xapian::ValueCountMatchSpy spy0(0);
145     Xapian::ValueCountMatchSpy spy1(1);
146     Xapian::ValueCountMatchSpy spy3(3);
147 
148     Xapian::Enquire enq(db);
149 
150     enq.set_query(Xapian::Query("all"));
151     if (startswith(get_dbtype(), "multi")) {
152 	// Without this, we short-cut on the second shard because we don't get
153 	// the documents in ascending weight order.
154 	enq.set_weighting_scheme(Xapian::CoordWeight());
155     }
156 
157     enq.add_matchspy(&spy0);
158     enq.add_matchspy(&spy1);
159     enq.add_matchspy(&spy3);
160     Xapian::MSet mset = enq.get_mset(0, 10);
161 
162     TEST_EQUAL(spy0.get_total(), 25);
163     TEST_EQUAL(spy1.get_total(), 25);
164     TEST_EQUAL(spy3.get_total(), 25);
165 
166     static const char * const results[] = {
167 	"|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
168 	"|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
169 	"|1:9|2:16|",
170     };
171     TEST_STRINGS_EQUAL(values_to_repr(spy0), results[0]);
172     TEST_STRINGS_EQUAL(values_to_repr(spy1), results[1]);
173     TEST_STRINGS_EQUAL(values_to_repr(spy3), results[2]);
174 }
175 
DEFINE_TESTCASE(matchspy4,generated)176 DEFINE_TESTCASE(matchspy4, generated)
177 {
178     XFAIL_FOR_BACKEND("multi_remote",
179 		      "Matchspy counts hits on remote and locally");
180     XFAIL_FOR_BACKEND("multi_glass_remote",
181 		      "Matchspy counts hits on remote and locally");
182 
183     Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
184 
185     // We're going to run the match twice - once sorted by relevance, and once
186     // sorted by a value.  This is a regression test - the matcher used to fail
187     // to show some documents to the spy when sorting by non-pure-relevance.
188     Xapian::ValueCountMatchSpy spya0(0);
189     Xapian::ValueCountMatchSpy spya1(1);
190     Xapian::ValueCountMatchSpy spya3(3);
191     Xapian::ValueCountMatchSpy spyb0(0);
192     Xapian::ValueCountMatchSpy spyb1(1);
193     Xapian::ValueCountMatchSpy spyb3(3);
194 
195     Xapian::Enquire enqa(db);
196     Xapian::Enquire enqb(db);
197 
198     enqa.set_query(Xapian::Query("all"));
199     if (startswith(get_dbtype(), "multi")) {
200 	// Without this, we short-cut on the second shard because we don't get
201 	// the documents in ascending weight order.
202 	enqa.set_weighting_scheme(Xapian::CoordWeight());
203     }
204     enqb.set_query(Xapian::Query("all"));
205 
206     enqa.add_matchspy(&spya0);
207     enqa.add_matchspy(&spya1);
208     enqa.add_matchspy(&spya3);
209     enqb.add_matchspy(&spyb0);
210     enqb.add_matchspy(&spyb1);
211     enqb.add_matchspy(&spyb3);
212 
213     Xapian::MSet mseta = enqa.get_mset(0, 10);
214     enqb.set_sort_by_value(0, false);
215     Xapian::MSet msetb = enqb.get_mset(0, 10, 100);
216 
217     TEST_EQUAL(spya0.get_total(), 25);
218     TEST_EQUAL(spya1.get_total(), 25);
219     TEST_EQUAL(spya3.get_total(), 25);
220     TEST_EQUAL(spyb0.get_total(), 25);
221     TEST_EQUAL(spyb1.get_total(), 25);
222     TEST_EQUAL(spyb3.get_total(), 25);
223 
224     static const char * const results[] = {
225 	"|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
226 	"|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
227 	"|",
228 	"|2:16|1:9|",
229 	"|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
230 	"|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
231 	"|",
232 	"|2:16|1:9|",
233 	NULL
234     };
235     std::vector<Xapian::ValueCountMatchSpy *> spies;
236     spies.push_back(&spya0);
237     spies.push_back(&spya1);
238     spies.push_back(NULL);
239     spies.push_back(&spya3);
240     spies.push_back(&spyb0);
241     spies.push_back(&spyb1);
242     spies.push_back(NULL);
243     spies.push_back(&spyb3);
244     for (Xapian::valueno v = 0; results[v]; ++v) {
245 	tout << "value " << v << endl;
246 	Xapian::ValueCountMatchSpy * spy = spies[v];
247 	string allvals_str("|");
248 	if (spy != NULL) {
249 	    size_t allvals_size = 0;
250 	    for (Xapian::TermIterator i = spy->top_values_begin(100);
251 		 i != spy->top_values_end(100);
252 		 ++i, ++allvals_size) {
253 		allvals_str += *i;
254 		allvals_str += ':';
255 		allvals_str += str(i.get_termfreq());
256 		allvals_str += '|';
257 	    }
258 	    tout << allvals_str << endl;
259 	    TEST_STRINGS_EQUAL(allvals_str, results[v]);
260 
261 	    for (size_t count = 0; count < allvals_size; ++count) {
262 		tout << "count " << count << endl;
263 		for (Xapian::TermIterator i = spy->top_values_begin(100),
264 		     j = spy->top_values_begin(count);
265 		     i != spy->top_values_end(100) &&
266 		     j != spy->top_values_end(count);
267 		     ++i, ++j) {
268 		    tout << "j " << j << endl;
269 		    TEST_EQUAL(*i, *j);
270 		    TEST_EQUAL(i.get_termfreq(), j.get_termfreq());
271 		}
272 	    }
273 	}
274     }
275 }
276 
277 // Test builtin match spies
DEFINE_TESTCASE(matchspy5,backend)278 DEFINE_TESTCASE(matchspy5, backend)
279 {
280     Xapian::Database db(get_database("apitest_simpledata"));
281     Xapian::Enquire enquire(db);
282     enquire.set_query(Xapian::Query("this"));
283 
284     Xapian::ValueCountMatchSpy myspy1(1);
285     Xapian::ValueCountMatchSpy myspy2(1);
286 
287     enquire.add_matchspy(&myspy1);
288     enquire.add_matchspy(&myspy2);
289     Xapian::MSet mymset = enquire.get_mset(0, 100);
290     TEST_EQUAL(mymset.size(), 6);
291 
292     Xapian::TermIterator i = myspy1.values_begin();
293     TEST(i != myspy1.values_end());
294     TEST(*i == "h");
295     TEST_EQUAL(i.get_termfreq(), 5);
296     ++i;
297     TEST(i != myspy1.values_end());
298     TEST(*i == "n");
299     TEST_EQUAL(i.get_termfreq(), 1);
300     ++i;
301     TEST(i == myspy1.values_end());
302 
303     i = myspy2.values_begin();
304     TEST(i != myspy2.values_end());
305     TEST(*i == "h");
306     TEST_EQUAL(i.get_termfreq(), 5);
307     ++i;
308     TEST(i != myspy2.values_end());
309     TEST(*i == "n");
310     TEST_EQUAL(i.get_termfreq(), 1);
311     ++i;
312     TEST(i == myspy2.values_end());
313 }
314 
315 class MySpy : public Xapian::MatchSpy {
operator ()(const Xapian::Document &,double)316     void operator()(const Xapian::Document &, double) {
317     }
318 };
319 
320 // Test exceptions from matchspy base class, and get_description method.
321 DEFINE_TESTCASE(matchspy6, !backend)
322 {
323     MySpy spy;
324 
325     TEST_EXCEPTION(Xapian::UnimplementedError, spy.clone());
326     TEST_EXCEPTION(Xapian::UnimplementedError, spy.name());
327     TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise());
328     TEST_EXCEPTION(Xapian::UnimplementedError,
329 		   spy.unserialise(std::string(), Xapian::Registry()));
330     TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise_results());
331     TEST_EXCEPTION(Xapian::UnimplementedError,
332 		   spy.merge_results(std::string()));
333     TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
334 }
335 
336 /// Regression test for bug fixed in 1.4.12.
337 DEFINE_TESTCASE(matchspy7, !backend)
338 {
339     Xapian::ValueCountMatchSpy myspy(1);
340     string s = myspy.serialise_results();
341     s += 'x';
342     // This merge_results() call used to enter an infinite loop.
343     TEST_EXCEPTION(Xapian::NetworkError, myspy.merge_results(s));
344 }
345