1 /*
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 *
7 * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
8 */
9 #include "CLucene/StdHeader.h"
10 #include "IndexSearcher.h"
11
12 #include "SearchHeader.h"
13 #include "Scorer.h"
14 #include "FieldDocSortedHitQueue.h"
15 #include "CLucene/store/Directory.h"
16 #include "CLucene/document/Document.h"
17 #include "CLucene/index/IndexReader.h"
18 #include "CLucene/index/Term.h"
19 #include "CLucene/util/BitSet.h"
20 #include "FieldSortedHitQueue.h"
21
22 CL_NS_USE(index)
23 CL_NS_USE(util)
24 CL_NS_USE(document)
25
26 CL_NS_DEF(search)
27
28 class SimpleTopDocsCollector : public HitCollector
29 {
30 private:
31 qreal minScore;
32 const CL_NS(util)::BitSet* bits;
33 HitQueue* hq;
34 size_t nDocs;
35 int32_t* totalHits;
36
37 public:
SimpleTopDocsCollector(const CL_NS (util)::BitSet * bs,HitQueue * hitQueue,int32_t * totalhits,size_t ndocs,const qreal ms=-1.0f)38 SimpleTopDocsCollector(const CL_NS(util)::BitSet* bs, HitQueue* hitQueue,
39 int32_t* totalhits, size_t ndocs, const qreal ms=-1.0f)
40 : minScore(ms),
41 bits(bs),
42 hq(hitQueue),
43 nDocs(ndocs),
44 totalHits(totalhits) {}
~SimpleTopDocsCollector()45 ~SimpleTopDocsCollector() {}
46
collect(const int32_t doc,const qreal score)47 void collect(const int32_t doc, const qreal score)
48 {
49 if (score > 0.0f // ignore zeroed buckets
50 && (bits == NULL || bits->get(doc))) { // skip docs not in bits
51 ++totalHits[0];
52 if (hq->size() < nDocs || (minScore==-1.0f || score >= minScore)) {
53 ScoreDoc sd = {doc, score};
54 hq->insert(sd); // update hit queue
55 if ( minScore != -1.0f )
56 minScore = hq->top().score; // maintain minScore
57 }
58 }
59 }
60 };
61
62 class SortedTopDocsCollector : public HitCollector
63 {
64 private:
65 const CL_NS(util)::BitSet* bits;
66 FieldSortedHitQueue* hq;
67 size_t nDocs;
68 int32_t* totalHits;
69 public:
SortedTopDocsCollector(const CL_NS (util)::BitSet * bs,FieldSortedHitQueue * hitQueue,int32_t * totalhits,size_t _nDocs)70 SortedTopDocsCollector(const CL_NS(util)::BitSet* bs,
71 FieldSortedHitQueue* hitQueue, int32_t* totalhits, size_t _nDocs)
72 : bits(bs),
73 hq(hitQueue),
74 nDocs(_nDocs),
75 totalHits(totalhits)
76 {
77 }
~SortedTopDocsCollector()78 ~SortedTopDocsCollector() {}
79
collect(const int32_t doc,const qreal score)80 void collect(const int32_t doc, const qreal score)
81 {
82 if (score > 0.0f && // ignore zeroed buckets
83 (bits==NULL || bits->get(doc))) { // skip docs not in bits
84 ++totalHits[0];
85 // TODO: see jlucene way... with fields def???
86 FieldDoc* fd = _CLNEW FieldDoc(doc, score);
87 if ( !hq->insert(fd) ) // update hit queue
88 _CLDELETE(fd);
89 }
90 }
91 };
92
93 class SimpleFilteredCollector : public HitCollector
94 {
95 private:
96 CL_NS(util)::BitSet* bits;
97 HitCollector* results;
98 public:
SimpleFilteredCollector(CL_NS (util)::BitSet * bs,HitCollector * collector)99 SimpleFilteredCollector(CL_NS(util)::BitSet* bs, HitCollector* collector)
100 : bits(bs),
101 results(collector) {}
~SimpleFilteredCollector()102 ~SimpleFilteredCollector() {}
103
104 protected:
collect(const int32_t doc,const qreal score)105 void collect(const int32_t doc, const qreal score)
106 {
107 // skip docs not in bits
108 if (bits->get(doc))
109 results->collect(doc, score);
110 }
111 };
112
113
IndexSearcher(const QString & path)114 IndexSearcher::IndexSearcher(const QString& path)
115 {
116 //Func - Constructor
117 // Creates a searcher searching the index in the named directory.
118 //Pre - path != NULL
119 //Post - The instance has been created
120
121 CND_PRECONDITION(!path.isEmpty(), "path is NULL");
122
123 reader = IndexReader::open(path);
124 readerOwner = true;
125 }
126
IndexSearcher(CL_NS (store)::Directory * directory)127 IndexSearcher::IndexSearcher(CL_NS(store)::Directory* directory)
128 {
129 //Func - Constructor
130 // Creates a searcher searching the index in the specified directory.
131 //Pre - path != NULL
132 //Post - The instance has been created
133
134 CND_PRECONDITION(directory != NULL, "directory is NULL");
135
136 reader = IndexReader::open(directory);
137 readerOwner = true;
138 }
139
IndexSearcher(IndexReader * r)140 IndexSearcher::IndexSearcher(IndexReader* r)
141 {
142 //Func - Constructor
143 // Creates a searcher searching the index with the provide IndexReader
144 //Pre - path != NULL
145 //Post - The instance has been created
146
147 reader = r;
148 readerOwner = false;
149 }
150
~IndexSearcher()151 IndexSearcher::~IndexSearcher()
152 {
153 //Func - Destructor
154 //Pre - true
155 //Post - The instance has been destroyed
156
157 close();
158 }
159
close()160 void IndexSearcher::close()
161 {
162 //Func - Frees resources associated with this Searcher.
163 //Pre - true
164 //Post - The resources associated have been freed
165 if (readerOwner && reader){
166 reader->close();
167 _CLDELETE(reader);
168 }
169 }
170
171 // inherit javadoc
docFreq(const Term * term) const172 int32_t IndexSearcher::docFreq(const Term* term) const
173 {
174 //Func -
175 //Pre - reader != NULL
176 //Post -
177
178 CND_PRECONDITION(reader != NULL, "reader is NULL");
179 return reader->docFreq(term);
180 }
181
182 // inherit javadoc
doc(int32_t i,CL_NS (document)::Document * d)183 bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document* d)
184 {
185 //Func - Retrieves i-th document found
186 // For use by HitCollector implementations.
187 //Pre - reader != NULL
188 //Post - The i-th document has been returned
189
190 CND_PRECONDITION(reader != NULL, "reader is NULL");
191 return reader->document(i,d);
192 }
193
194 // inherit javadoc
maxDoc() const195 int32_t IndexSearcher::maxDoc() const
196 {
197 //Func - Return total number of documents including the ones marked deleted
198 //Pre - reader != NULL
199 //Post - The total number of documents including the ones marked deleted
200 // has been returned
201
202 CND_PRECONDITION(reader != NULL, "reader is NULL");
203 return reader->maxDoc();
204 }
205
_search(Query * query,Filter * filter,const int32_t nDocs)206 TopDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs)
207 {
208 //Func -
209 //Pre - reader != NULL
210 //Post -
211
212 CND_PRECONDITION(reader != NULL, "reader is NULL");
213 CND_PRECONDITION(query != NULL, "query is NULL");
214
215 Weight* weight = query->weight(this);
216 Scorer* scorer = weight->scorer(reader);
217 if (scorer == NULL){
218 return _CLNEW TopDocs(0, NULL, 0);
219 }
220
221 BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
222 HitQueue* hq = _CLNEW HitQueue(nDocs);
223
224 //Check hq has been allocated properly
225 CND_CONDITION(hq != NULL, "Could not allocate memory for HitQueue hq");
226
227 int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
228 totalHits[0] = 0;
229
230 SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f);
231 scorer->score( &hitCol );
232 _CLDELETE(scorer);
233
234 int32_t scoreDocsLength = hq->size();
235
236 ScoreDoc* scoreDocs = _CL_NEWARRAY(ScoreDoc,scoreDocsLength);
237
238 for (int32_t i = scoreDocsLength-1; i >= 0; --i) // put docs in array
239 scoreDocs[i] = hq->pop();
240
241 int32_t totalHitsInt = totalHits[0];
242
243 _CLDELETE(hq);
244 if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
245 _CLDELETE(bits);
246 _CLDELETE_ARRAY(totalHits);
247 Query* wq = weight->getQuery();
248 if ( query != wq ) //query was re-written
249 _CLLDELETE(wq);
250 _CLDELETE(weight);
251
252 return _CLNEW TopDocs(totalHitsInt, scoreDocs, scoreDocsLength);
253 }
254
255 // inherit javadoc
_search(Query * query,Filter * filter,const int32_t nDocs,const Sort * sort)256 TopFieldDocs* IndexSearcher::_search(Query* query, Filter* filter,
257 const int32_t nDocs, const Sort* sort)
258 {
259 CND_PRECONDITION(reader != NULL, "reader is NULL");
260 CND_PRECONDITION(query != NULL, "query is NULL");
261
262 Weight* weight = query->weight(this);
263 Scorer* scorer = weight->scorer(reader);
264 if (scorer == NULL) {
265 return _CLNEW TopFieldDocs(0, NULL, 0, NULL );
266 }
267
268 BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
269 FieldSortedHitQueue hq(reader, sort->getSort(), nDocs);
270 int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
271 totalHits[0]=0;
272
273 SortedTopDocsCollector hitCol(bits,&hq,totalHits,nDocs);
274 scorer->score(&hitCol);
275 _CLDELETE(scorer);
276
277 int32_t hqLen = hq.size();
278 FieldDoc** fieldDocs = _CL_NEWARRAY(FieldDoc*,hqLen);
279 for (int32_t i = hqLen-1; i >= 0; --i){ // put docs in array
280 fieldDocs[i] = hq.fillFields (hq.pop());
281 }
282
283 Query* wq = weight->getQuery();
284 if ( query != wq ) //query was re-written
285 _CLLDELETE(wq);
286 _CLDELETE(weight);
287
288 SortField** hqFields = hq.getFields();
289 hq.setFields(NULL); //move ownership of memory over to TopFieldDocs
290 int32_t totalHits0 = totalHits[0];
291 if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
292 _CLDELETE(bits);
293 _CLDELETE_ARRAY(totalHits);
294 return _CLNEW TopFieldDocs(totalHits0, fieldDocs, hqLen, hqFields );
295 }
296
_search(Query * query,Filter * filter,HitCollector * results)297 void IndexSearcher::_search(Query* query, Filter* filter, HitCollector* results)
298 {
299 //Func - _search an index and fetch the results
300 // Applications should only use this if they need all of the
301 // matching documents. The high-level search API (search(Query))
302 // is usually more efficient, as it skips non-high-scoring hits.
303 //Pre - query is a valid reference to a query filter may or may not be NULL
304 // results is a valid reference to a HitCollector and used to store the results
305 //Post - filter if non-NULL, a bitset used to eliminate some documents
306
307 CND_PRECONDITION(reader != NULL, "reader is NULL");
308 CND_PRECONDITION(query != NULL, "query is NULL");
309
310 BitSet* bits = NULL;
311 SimpleFilteredCollector* fc = NULL;
312
313 if (filter != NULL){
314 bits = filter->bits(reader);
315 fc = _CLNEW SimpleFilteredCollector(bits, results);
316 }
317
318 Weight* weight = query->weight(this);
319 Scorer* scorer = weight->scorer(reader);
320 if (scorer != NULL) {
321 if (fc == NULL){
322 scorer->score(results);
323 }else{
324 scorer->score((HitCollector*)fc);
325 }
326 _CLDELETE(scorer);
327 }
328
329 _CLDELETE(fc);
330 _CLDELETE(weight);
331 if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
332 _CLDELETE(bits);
333 }
334
rewrite(Query * original)335 Query* IndexSearcher::rewrite(Query* original)
336 {
337 Query* query = original;
338 Query* last = original;
339 for (Query* rewrittenQuery = query->rewrite(reader);
340 rewrittenQuery != query;
341 rewrittenQuery = query->rewrite(reader)) {
342 query = rewrittenQuery;
343 if ( query != last && last != original) {
344 _CLDELETE(last);
345 }
346 last = query;
347 }
348 return query;
349 }
350
explain(Query * query,int32_t doc,Explanation * ret)351 void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret)
352 {
353 Weight* weight = query->weight(this);
354 weight->explain(reader, doc, ret);
355
356 Query* wq = weight->getQuery();
357 if ( query != wq ) //query was re-written
358 _CLLDELETE(wq);
359 _CLDELETE(weight);
360 }
361
362 CL_NS_END
363