1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #include "QueryUtils.h"
8 #include "CLucene/search/Scorer.h"
9 #include "CheckHits.h"
10
11 /////////////////////////////////////////////////////////////////////////////
12 int32_t QueryUtils::skip_op = 0;
13 int32_t QueryUtils::next_op = 1;
14 float_t QueryUtils::maxDiff = 1e-5f;
15
16
17 /////////////////////////////////////////////////////////////////////////////
18 class WhackyQuery : public CL_NS(search)::Query
19 {
20 public:
WhackyQuery()21 WhackyQuery() {};
~WhackyQuery()22 virtual ~WhackyQuery() {}
23
clone() const24 Query * clone() const
25 {
26 return _CLNEW WhackyQuery();
27 }
28
getClassName()29 static const char * getClassName()
30 {
31 return "WhackyQuery";
32 }
33
getObjectName() const34 const char * getObjectName() const
35 {
36 return getClassName();
37 }
38
toString(const TCHAR * field) const39 TCHAR* toString(const TCHAR* field) const
40 {
41 return STRDUP_TtoT( _T( "My Whacky Query" ));
42 }
43
equals(Query * other) const44 bool equals(Query* other) const
45 {
46 if( this == other ) return true;
47 if( other == NULL || !( other->instanceOf( WhackyQuery::getClassName() )))
48 return false;
49
50 return true;
51 }
52
hashCode() const53 size_t hashCode() const
54 {
55 size_t result = Similarity::floatToByte( getBoost() ) ^ 0x97AF937F;
56 return result;
57 }
58 };
59
60 /////////////////////////////////////////////////////////////////////////////
61 class QueryUtilsHitCollector1 : public CL_NS(search)::HitCollector
62 {
63 public:
64 int32_t * order;
65 int32_t * opidx;
66 int32_t orderLength;
67 int32_t * sdoc;
68 Scorer * scorer;
69 Query * q;
70 CuTest * tc;
71
72 public:
collect(const int32_t doc,const float_t score)73 void collect( const int32_t doc, const float_t score )
74 {
75 int32_t op = order[ (opidx[ 0 ]++ ) % orderLength ];
76 bool more = ( op == QueryUtils::skip_op ) ? scorer->skipTo( sdoc[ 0 ] + 1 ) : scorer->next();
77 sdoc[ 0 ] = scorer->doc();
78 float_t scorerScore = scorer->score();
79 float_t scorerScore2 = scorer->score();
80 float_t scoreDiff = score > scorerScore ? score - scorerScore : scorerScore - score;
81 float_t scorerDiff = scorerScore2 > scorerScore2 ? scorerScore2 - scorerScore : scorerScore - scorerScore2;
82 if( ! more || doc != sdoc[ 0 ] || scoreDiff > QueryUtils::maxDiff || scorerDiff > QueryUtils::maxDiff )
83 {
84 StringBuffer buffer;
85 buffer.append( _T( "ERROR matching docs:\n\t" ));
86
87 buffer.append( doc != sdoc[ 0 ] ? _T( "--> doc=" ) : _T( "doc=" ));
88 buffer.appendInt( sdoc[ 0 ] );
89
90 buffer.append( ! more ? _T( "\n\t--> tscorer.more=" ) : _T( "\n\ttscorer.more=" ));
91 buffer.appendBool( more );
92
93 buffer.append( scoreDiff > QueryUtils::maxDiff ? _T( "\n\t--> scorerScore=" ) : _T( "\n\tscorerScore=" ));
94 buffer.appendFloat( scorerScore, 2 );
95 buffer.append( _T( " scoreDiff=" ));
96 buffer.appendFloat( scoreDiff, 2 );
97 buffer.append( _T( " maxDiff=" ));
98 buffer.appendFloat( QueryUtils::maxDiff, 2 );
99
100 buffer.append( scorerDiff > QueryUtils::maxDiff ? _T( "\n\t--> scorerScore2=" ) : _T( "\n\tscorerScore2=" ));
101 buffer.appendFloat( scorerScore2, 2 );
102 buffer.append( _T( " scorerDiff=" ));
103 buffer.appendFloat( scorerDiff, 2 );
104
105 buffer.append( _T( "\n\thitCollector.doc=" ));
106 buffer.appendInt( doc );
107 buffer.append( _T( " score=" ));
108 buffer.appendFloat( score, 2 );
109
110 buffer.append( _T( "\n\t Scorer=" ));
111 TCHAR * tmp = scorer->toString();
112 buffer.append( tmp );
113 _CLDELETE_LARRAY( tmp );
114
115 buffer.append( _T( "\n\t Query=" ));
116 tmp = q->toString();
117 buffer.append( tmp );
118 _CLDELETE_ARRAY( tmp );
119
120 buffer.append( _T( "\n\t Order=" ));
121 for( int32_t i = 0; i < orderLength; i++)
122 buffer.append( order[ i ] == QueryUtils::skip_op ? _T( " skip()" ): _T( " next()" ));
123
124 buffer.append( _T( "\n\t Op=" ));
125 buffer.append( op == QueryUtils::skip_op ? _T( " skip()" ) : _T( " next()" ));
126
127 assertTrueMsg( buffer.getBuffer(), false );
128 }
129 }
130 };
131
132 /////////////////////////////////////////////////////////////////////////////
133 class QueryUtilsHitCollector2 : public CL_NS(search)::HitCollector
134 {
135 public:
136 int32_t * lastDoc;
137 Query * q;
138 IndexSearcher * s;
139 CuTest * tc;
140
141 public:
collect(const int32_t doc,const float_t score)142 void collect( const int32_t doc, const float_t score )
143 {
144 for( int32_t i = lastDoc[ 0 ] + 1; i <= doc; i++ )
145 {
146 Weight * w = q->weight( s );
147 Scorer * scorer = w->scorer( s->getReader() );
148
149 if( ! scorer->skipTo( i ) )
150 {
151 StringBuffer buffer;
152 buffer.append( _T( "query collected " ));
153 buffer.appendInt( doc );
154 buffer.append( _T( " but skipTo(" ));
155 buffer.appendInt( i );
156 buffer.append( _T( ") says no more docs!" ));
157 assertTrueMsg( buffer.getBuffer(), false );
158 }
159
160 if( doc != scorer->doc() )
161 {
162 StringBuffer buffer;
163 buffer.append( _T( "query collected " ));
164 buffer.appendInt( doc );
165 buffer.append( _T( " but skipTo(" ));
166 buffer.appendInt( i );
167 buffer.append( _T( ") got to " ));
168 buffer.appendInt( scorer->doc() );
169 assertTrueMsg( buffer.getBuffer(), false );
170 }
171
172 float_t skipToScore = scorer->score();
173 float_t sd = skipToScore - scorer->score();
174 if( ( sd < 0 ? sd * -1 : sd ) > QueryUtils::maxDiff )
175 {
176 StringBuffer buffer;
177 buffer.append( _T( "unstable skipTo(" ));
178 buffer.appendInt( i );
179 buffer.append( _T( ") score: " ));
180 buffer.appendFloat( skipToScore, 2 );
181 buffer.append( _T( "/") );
182 buffer.appendFloat( QueryUtils::maxDiff, 2 );
183 assertTrueMsg( buffer.getBuffer(), false );
184 }
185
186 if( ( skipToScore > score ? skipToScore - score : score - skipToScore ) > QueryUtils::maxDiff )
187 {
188 StringBuffer buffer;
189 buffer.append( _T( "query assigned doc " ));
190 buffer.appendInt( doc );
191 buffer.append( _T( " a score of <" ));
192 buffer.appendFloat( score, 2 );
193 buffer.append( _T( "> but skipTo(" ));
194 buffer.appendInt( i );
195 buffer.append( _T( ") has <" ));
196 buffer.appendFloat( skipToScore, 2 );
197 buffer.append( _T( ">!" ));
198 assertTrueMsg( buffer.getBuffer(), false );
199 }
200
201 _CLLDELETE( scorer );
202 _CLLDELETE( w );
203 }
204 lastDoc[ 0 ] = doc;
205 }
206 };
207
208 /////////////////////////////////////////////////////////////////////////////
check(CuTest * tc,Query * q)209 void QueryUtils::check( CuTest* tc, Query * q )
210 {
211 checkHashEquals( tc, q );
212 }
213
checkHashEquals(CuTest * tc,Query * q)214 void QueryUtils::checkHashEquals( CuTest* tc, Query * q )
215 {
216 Query * q2 = q->clone();
217 checkEqual( tc, q, q2 );
218
219 Query * q3 = q->clone();
220 q3->setBoost( 7.21792348f );
221 checkUnequal( tc, q, q3 );
222
223 // test that a class check is done so that no exception is thrown
224 // in the implementation of equals()
225 Query * whacky = _CLNEW WhackyQuery();
226 whacky->setBoost( q->getBoost() );
227 checkUnequal( tc, q, whacky );
228
229 _CLLDELETE( q2 );
230 _CLLDELETE( q3 );
231 _CLLDELETE( whacky );
232 }
233
checkEqual(CuTest * tc,Query * q1,Query * q2)234 void QueryUtils::checkEqual( CuTest* tc, Query * q1, Query * q2 )
235 {
236 assertTrue( q1->equals( q2 ));
237 assertTrue( q2->equals( q1 ));
238 assertTrue( q1->hashCode() == q2->hashCode() );
239 }
240
checkUnequal(CuTest * tc,Query * q1,Query * q2)241 void QueryUtils::checkUnequal( CuTest* tc, Query * q1, Query * q2 )
242 {
243 assertTrue( ! q1->equals( q2 ));
244 assertTrue( ! q2->equals( q1 ));
245
246 // possible this test can fail on a hash collision... if that
247 // happens, please change test to use a different example.
248 assertTrue( q1->hashCode() != q2->hashCode());
249 }
250
checkExplanations(CuTest * tc,Query * q,Searcher * s)251 void QueryUtils::checkExplanations( CuTest* tc, Query * q, Searcher * s )
252 {
253 CheckHits::checkExplanations( tc, q, NULL, s, true );
254 }
255
check(CuTest * tc,Query * q1,Searcher * s)256 void QueryUtils::check( CuTest* tc, Query * q1, Searcher * s )
257 {
258 check( tc, q1 );
259 if( s )
260 {
261 if( s->getObjectName() == IndexSearcher::getClassName())
262 {
263 IndexSearcher * is = (IndexSearcher*) s;
264 checkFirstSkipTo( tc, q1, is );
265 checkSkipTo( tc, q1, is );
266 }
267
268 checkExplanations( tc, q1, s );
269 checkSerialization( tc, q1, s );
270 }
271 }
272
checkSerialization(CuTest * tc,Query * q,Searcher * s)273 void QueryUtils::checkSerialization( CuTest* tc, Query * q, Searcher * s )
274 {
275 Weight * w = q->weight( s );
276 // TODO: Port this test
277 // try {
278 // ByteArrayOutputStream bos = new ByteArrayOutputStream();
279 // ObjectOutputStream oos = new ObjectOutputStream(bos);
280 // oos.writeObject(w.);
281 // oos.close();
282 // ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray()));
283 // ois.readObject();
284 // ois.close();
285 //
286 // //skip rquals() test for now - most weights don't overide equals() and we won't add this just for the tests.
287 // //TestCase.assertEquals("writeObject(w) != w. ("+w+")",w2,w);
288 //
289 // } catch (Exception e) {
290 // IOException e2 = new IOException("Serialization failed for "+w);
291 // e2.initCause(e);
292 // throw e2;
293 // }
294 _CLLDELETE( w );
295 }
296
checkSkipTo(CuTest * tc,Query * q,IndexSearcher * s)297 void QueryUtils::checkSkipTo( CuTest* tc, Query * q, IndexSearcher * s )
298 {
299 if( BooleanQuery::getAllowDocsOutOfOrder())
300 return; // in this case order of skipTo() might differ from that of next().
301
302 int32_t order0[] = {next_op};
303 int32_t order1[] = {skip_op};
304 int32_t order2[] = {skip_op, next_op};
305 int32_t order3[] = {next_op, skip_op};
306 int32_t order4[] = {skip_op, skip_op, next_op, next_op};
307 int32_t order5[] = {next_op, next_op, skip_op, skip_op};
308 int32_t order6[] = {skip_op, skip_op, skip_op, next_op, next_op};
309 int32_t ordersLength[] = { 1, 1, 2, 2, 4, 4, 5 };
310 int32_t * orders[] = { order0, order1, order2, order3, order4, order5, order6 };
311 size_t ordersCount = 7;
312
313 for( size_t k = 0; k < ordersCount; k++ )
314 {
315 int32_t * order = orders[ k ];
316 int32_t opidx[] = { 0 };
317
318 Weight * w = q->weight( s );
319 Scorer * scorer = w->scorer( s->getReader() );
320
321 // FUTURE: ensure scorer.doc()==-1
322
323 int32_t * sdoc = _CL_NEWARRAY( int32_t, 1 );
324 sdoc[ 0 ] = -1;
325
326 QueryUtilsHitCollector1 hitCollector;
327 hitCollector.order = order;
328 hitCollector.opidx = opidx;
329 hitCollector.orderLength = ordersLength[ k ];
330 hitCollector.sdoc = sdoc;
331 hitCollector.scorer = scorer;
332 hitCollector.q = q;
333 hitCollector.tc = tc;
334
335 s->_search( q, NULL, &hitCollector );
336
337 // make sure next call to scorer is false.
338 int32_t op = order[ (opidx[ 0 ]++ ) % ordersLength[ k ] ];
339 bool more = ( op == skip_op ) ? scorer->skipTo( sdoc[ 0 ] + 1 ) : scorer->next();
340 assertTrue( ! more );
341
342 _CLDELETE_LARRAY( sdoc );
343 _CLLDELETE( scorer );
344 _CLLDELETE( w );
345 }
346 }
347
checkFirstSkipTo(CuTest * tc,Query * q,IndexSearcher * s)348 void QueryUtils::checkFirstSkipTo( CuTest* tc, Query * q, IndexSearcher * s )
349 {
350 int32_t lastDoc[] = {-1};
351 QueryUtilsHitCollector2 hitCollector;
352 hitCollector.lastDoc = lastDoc;
353 hitCollector.q = q;
354 hitCollector.s = s;
355 hitCollector.tc = tc;
356
357 s->_search( q, NULL, &hitCollector );
358
359 Weight * w = q->weight( s );
360 Scorer * scorer = w->scorer( s->getReader() );
361 bool more = scorer->skipTo( lastDoc[ 0 ] + 1 );
362 if( more )
363 {
364 StringBuffer buffer;
365 buffer.append( _T( "query's last doc was " ));
366 buffer.appendInt( lastDoc[ 0 ] );
367 buffer.append( _T( " but skipTo(" ));
368 buffer.appendInt( lastDoc[ 0 ] + 1 );
369 buffer.append( _T( ") got to " ));
370 buffer.appendInt( scorer->doc() );
371 assertTrueMsg( buffer.getBuffer(), false );
372 }
373
374 _CLLDELETE( scorer );
375 _CLLDELETE( w );
376 }
377