1 /*
2  * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3  *
4  * Distributable under the terms of either the Apache License (Version 2.0) or
5  * the GNU Lesser General Public License, as specified in the COPYING file.
6  *
7  * Changes are Copyright(C) 2007, 2008 by Nokia Corporation and/or its subsidiary(-ies), all rights reserved.
8 */
9 #include "CLucene/StdHeader.h"
10 #include "IndexSearcher.h"
11 
12 #include "SearchHeader.h"
13 #include "Scorer.h"
14 #include "FieldDocSortedHitQueue.h"
15 #include "CLucene/store/Directory.h"
16 #include "CLucene/document/Document.h"
17 #include "CLucene/index/IndexReader.h"
18 #include "CLucene/index/Term.h"
19 #include "CLucene/util/BitSet.h"
20 #include "FieldSortedHitQueue.h"
21 
22 CL_NS_USE(index)
23 CL_NS_USE(util)
24 CL_NS_USE(document)
25 
26 CL_NS_DEF(search)
27 
28 class SimpleTopDocsCollector : public HitCollector
29 {
30 private:
31     qreal minScore;
32     const CL_NS(util)::BitSet* bits;
33     HitQueue* hq;
34     size_t nDocs;
35     int32_t* totalHits;
36 
37 public:
SimpleTopDocsCollector(const CL_NS (util)::BitSet * bs,HitQueue * hitQueue,int32_t * totalhits,size_t ndocs,const qreal ms=-1.0f)38     SimpleTopDocsCollector(const CL_NS(util)::BitSet* bs, HitQueue* hitQueue,
39         int32_t* totalhits, size_t ndocs, const qreal ms=-1.0f)
40         : minScore(ms),
41         bits(bs),
42         hq(hitQueue),
43         nDocs(ndocs),
44         totalHits(totalhits) {}
~SimpleTopDocsCollector()45     ~SimpleTopDocsCollector() {}
46 
collect(const int32_t doc,const qreal score)47     void collect(const int32_t doc, const qreal score)
48     {
49         if (score > 0.0f    // ignore zeroed buckets
50             && (bits == NULL || bits->get(doc))) {	  // skip docs not in bits
51                 ++totalHits[0];
52                 if (hq->size() < nDocs || (minScore==-1.0f || score >= minScore)) {
53                     ScoreDoc sd = {doc, score};
54                     hq->insert(sd);	  // update hit queue
55                     if ( minScore != -1.0f )
56                         minScore = hq->top().score; // maintain minScore
57                 }
58         }
59     }
60 };
61 
62 class SortedTopDocsCollector : public HitCollector
63 {
64 private:
65     const CL_NS(util)::BitSet* bits;
66     FieldSortedHitQueue* hq;
67     size_t nDocs;
68     int32_t* totalHits;
69 public:
SortedTopDocsCollector(const CL_NS (util)::BitSet * bs,FieldSortedHitQueue * hitQueue,int32_t * totalhits,size_t _nDocs)70     SortedTopDocsCollector(const CL_NS(util)::BitSet* bs,
71         FieldSortedHitQueue* hitQueue, int32_t* totalhits, size_t _nDocs)
72         : bits(bs),
73           hq(hitQueue),
74           nDocs(_nDocs),
75           totalHits(totalhits)
76       {
77       }
~SortedTopDocsCollector()78       ~SortedTopDocsCollector() {}
79 
collect(const int32_t doc,const qreal score)80       void collect(const int32_t doc, const qreal score)
81       {
82           if (score > 0.0f &&			  // ignore zeroed buckets
83               (bits==NULL || bits->get(doc))) {	  // skip docs not in bits
84                   ++totalHits[0];
85                   // TODO: see jlucene way... with fields def???
86                   FieldDoc* fd = _CLNEW FieldDoc(doc, score);
87                   if ( !hq->insert(fd) )	  // update hit queue
88                       _CLDELETE(fd);
89           }
90       }
91 };
92 
93 class SimpleFilteredCollector : public HitCollector
94 {
95 private:
96     CL_NS(util)::BitSet* bits;
97     HitCollector* results;
98 public:
SimpleFilteredCollector(CL_NS (util)::BitSet * bs,HitCollector * collector)99     SimpleFilteredCollector(CL_NS(util)::BitSet* bs, HitCollector* collector)
100         : bits(bs),
101           results(collector) {}
~SimpleFilteredCollector()102       ~SimpleFilteredCollector() {}
103 
104 protected:
collect(const int32_t doc,const qreal score)105     void collect(const int32_t doc, const qreal score)
106     {
107         // skip docs not in bits
108         if (bits->get(doc))
109             results->collect(doc, score);
110     }
111 };
112 
113 
IndexSearcher(const QString & path)114 IndexSearcher::IndexSearcher(const QString& path)
115 {
116     //Func - Constructor
117     //       Creates a searcher searching the index in the named directory.
118     //Pre  - path != NULL
119     //Post - The instance has been created
120 
121     CND_PRECONDITION(!path.isEmpty(), "path is NULL");
122 
123     reader = IndexReader::open(path);
124     readerOwner = true;
125 }
126 
IndexSearcher(CL_NS (store)::Directory * directory)127 IndexSearcher::IndexSearcher(CL_NS(store)::Directory* directory)
128 {
129     //Func - Constructor
130     //       Creates a searcher searching the index in the specified directory.
131     //Pre  - path != NULL
132     //Post - The instance has been created
133 
134     CND_PRECONDITION(directory != NULL, "directory is NULL");
135 
136     reader = IndexReader::open(directory);
137     readerOwner = true;
138 }
139 
IndexSearcher(IndexReader * r)140 IndexSearcher::IndexSearcher(IndexReader* r)
141 {
142     //Func - Constructor
143     //       Creates a searcher searching the index with the provide IndexReader
144     //Pre  - path != NULL
145     //Post - The instance has been created
146 
147     reader      = r;
148     readerOwner = false;
149 }
150 
~IndexSearcher()151 IndexSearcher::~IndexSearcher()
152 {
153     //Func - Destructor
154     //Pre  - true
155     //Post - The instance has been destroyed
156 
157     close();
158 }
159 
close()160 void IndexSearcher::close()
161 {
162     //Func - Frees resources associated with this Searcher.
163     //Pre  - true
164     //Post - The resources associated have been freed
165     if (readerOwner && reader){
166         reader->close();
167         _CLDELETE(reader);
168     }
169 }
170 
171 // inherit javadoc
docFreq(const Term * term) const172 int32_t IndexSearcher::docFreq(const Term* term) const
173 {
174     //Func -
175     //Pre  - reader != NULL
176     //Post -
177 
178     CND_PRECONDITION(reader != NULL, "reader is NULL");
179     return reader->docFreq(term);
180 }
181 
182 // inherit javadoc
doc(int32_t i,CL_NS (document)::Document * d)183 bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document* d)
184 {
185     //Func - Retrieves i-th document found
186     //       For use by HitCollector implementations.
187     //Pre  - reader != NULL
188     //Post - The i-th document has been returned
189 
190     CND_PRECONDITION(reader != NULL, "reader is NULL");
191     return reader->document(i,d);
192 }
193 
194 // inherit javadoc
maxDoc() const195 int32_t IndexSearcher::maxDoc() const
196 {
197     //Func - Return total number of documents including the ones marked deleted
198     //Pre  - reader != NULL
199     //Post - The total number of documents including the ones marked deleted
200     //       has been returned
201 
202     CND_PRECONDITION(reader != NULL, "reader is NULL");
203     return reader->maxDoc();
204 }
205 
_search(Query * query,Filter * filter,const int32_t nDocs)206 TopDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs)
207 {
208     //Func -
209     //Pre  - reader != NULL
210     //Post -
211 
212     CND_PRECONDITION(reader != NULL, "reader is NULL");
213     CND_PRECONDITION(query != NULL, "query is NULL");
214 
215     Weight* weight = query->weight(this);
216     Scorer* scorer = weight->scorer(reader);
217     if (scorer == NULL){
218         return _CLNEW TopDocs(0, NULL, 0);
219     }
220 
221     BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
222     HitQueue* hq = _CLNEW HitQueue(nDocs);
223 
224     //Check hq has been allocated properly
225     CND_CONDITION(hq != NULL, "Could not allocate memory for HitQueue hq");
226 
227     int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
228     totalHits[0] = 0;
229 
230     SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f);
231     scorer->score( &hitCol );
232     _CLDELETE(scorer);
233 
234     int32_t scoreDocsLength = hq->size();
235 
236     ScoreDoc* scoreDocs = _CL_NEWARRAY(ScoreDoc,scoreDocsLength);
237 
238     for (int32_t i = scoreDocsLength-1; i >= 0; --i)	  // put docs in array
239         scoreDocs[i] = hq->pop();
240 
241     int32_t totalHitsInt = totalHits[0];
242 
243     _CLDELETE(hq);
244     if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
245         _CLDELETE(bits);
246     _CLDELETE_ARRAY(totalHits);
247     Query* wq = weight->getQuery();
248     if ( query != wq ) //query was re-written
249         _CLLDELETE(wq);
250     _CLDELETE(weight);
251 
252     return _CLNEW TopDocs(totalHitsInt, scoreDocs, scoreDocsLength);
253 }
254 
255 // inherit javadoc
_search(Query * query,Filter * filter,const int32_t nDocs,const Sort * sort)256 TopFieldDocs* IndexSearcher::_search(Query* query, Filter* filter,
257     const int32_t nDocs, const Sort* sort)
258 {
259     CND_PRECONDITION(reader != NULL, "reader is NULL");
260     CND_PRECONDITION(query != NULL, "query is NULL");
261 
262     Weight* weight = query->weight(this);
263     Scorer* scorer = weight->scorer(reader);
264     if (scorer == NULL) {
265         return _CLNEW TopFieldDocs(0, NULL, 0, NULL );
266     }
267 
268     BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
269     FieldSortedHitQueue hq(reader, sort->getSort(), nDocs);
270     int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
271     totalHits[0]=0;
272 
273     SortedTopDocsCollector hitCol(bits,&hq,totalHits,nDocs);
274     scorer->score(&hitCol);
275     _CLDELETE(scorer);
276 
277     int32_t hqLen = hq.size();
278     FieldDoc** fieldDocs = _CL_NEWARRAY(FieldDoc*,hqLen);
279     for (int32_t i = hqLen-1; i >= 0; --i){	  // put docs in array
280         fieldDocs[i] = hq.fillFields (hq.pop());
281     }
282 
283     Query* wq = weight->getQuery();
284     if ( query != wq ) //query was re-written
285         _CLLDELETE(wq);
286     _CLDELETE(weight);
287 
288     SortField** hqFields = hq.getFields();
289     hq.setFields(NULL); //move ownership of memory over to TopFieldDocs
290     int32_t totalHits0 = totalHits[0];
291     if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
292         _CLDELETE(bits);
293     _CLDELETE_ARRAY(totalHits);
294     return _CLNEW TopFieldDocs(totalHits0, fieldDocs, hqLen, hqFields );
295 }
296 
_search(Query * query,Filter * filter,HitCollector * results)297 void IndexSearcher::_search(Query* query, Filter* filter, HitCollector* results)
298 {
299     //Func - _search an index and fetch the results
300     //       Applications should only use this if they need all of the
301     //       matching documents.  The high-level search API (search(Query))
302     //       is usually more efficient, as it skips non-high-scoring hits.
303     //Pre  - query is a valid reference to a query filter may or may not be NULL
304     //       results is a valid reference to a HitCollector and used to store the results
305     //Post - filter if non-NULL, a bitset used to eliminate some documents
306 
307     CND_PRECONDITION(reader != NULL, "reader is NULL");
308     CND_PRECONDITION(query != NULL, "query is NULL");
309 
310     BitSet* bits = NULL;
311     SimpleFilteredCollector* fc = NULL;
312 
313     if (filter != NULL){
314         bits = filter->bits(reader);
315         fc = _CLNEW SimpleFilteredCollector(bits, results);
316     }
317 
318     Weight* weight = query->weight(this);
319     Scorer* scorer = weight->scorer(reader);
320     if (scorer != NULL) {
321         if (fc == NULL){
322             scorer->score(results);
323         }else{
324             scorer->score((HitCollector*)fc);
325         }
326         _CLDELETE(scorer);
327     }
328 
329     _CLDELETE(fc);
330     _CLDELETE(weight);
331     if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
332         _CLDELETE(bits);
333 }
334 
rewrite(Query * original)335 Query* IndexSearcher::rewrite(Query* original)
336 {
337     Query* query = original;
338     Query* last = original;
339     for (Query* rewrittenQuery = query->rewrite(reader);
340         rewrittenQuery != query;
341         rewrittenQuery = query->rewrite(reader)) {
342             query = rewrittenQuery;
343             if ( query != last && last != original) {
344                 _CLDELETE(last);
345             }
346             last = query;
347     }
348     return query;
349 }
350 
explain(Query * query,int32_t doc,Explanation * ret)351 void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret)
352 {
353     Weight* weight = query->weight(this);
354     weight->explain(reader, doc, ret);
355 
356     Query* wq = weight->getQuery();
357     if ( query != wq ) //query was re-written
358         _CLLDELETE(wq);
359     _CLDELETE(weight);
360 }
361 
362 CL_NS_END
363