1 /////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
5 /////////////////////////////////////////////////////////////////////////////
6
7 #include "LuceneInc.h"
8 #include "TermScorer.h"
9 #include "TermDocs.h"
10 #include "Similarity.h"
11 #include "Weight.h"
12 #include "Collector.h"
13
14 namespace Lucene {
15
16 const int32_t TermScorer::SCORE_CACHE_SIZE = 32;
17
TermScorer(const WeightPtr & weight,const TermDocsPtr & td,const SimilarityPtr & similarity,ByteArray norms)18 TermScorer::TermScorer(const WeightPtr& weight, const TermDocsPtr& td, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) {
19 this->weight = weight;
20 this->termDocs = td;
21 this->norms = norms;
22 this->weightValue = weight->getValue();
23 this->doc = -1;
24 this->docs = Collection<int32_t>::newInstance(32);
25 this->freqs = Collection<int32_t>::newInstance(32);
26 this->pointer = 0;
27 this->pointerMax = 0;
28 this->scoreCache = Collection<double>::newInstance(SCORE_CACHE_SIZE);
29
30 for (int32_t i = 0; i < SCORE_CACHE_SIZE; ++i) {
31 scoreCache[i] = getSimilarity()->tf(i) * weightValue;
32 }
33 }
34
~TermScorer()35 TermScorer::~TermScorer() {
36 }
37
SIM_NORM_DECODER()38 const Collection<double> TermScorer::SIM_NORM_DECODER() {
39 return Similarity::getNormDecoder();
40 }
41
score(const CollectorPtr & collector)42 void TermScorer::score(const CollectorPtr& collector) {
43 score(collector, INT_MAX, nextDoc());
44 }
45
score(const CollectorPtr & collector,int32_t max,int32_t firstDocID)46 bool TermScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) {
47 // firstDocID is ignored since nextDoc() sets 'doc'
48 collector->setScorer(shared_from_this());
49 while (doc < max) { // for docs in window
50 collector->collect(doc);
51
52 if (++pointer >= pointerMax) {
53 pointerMax = termDocs->read(docs, freqs); // refill buffers
54 if (pointerMax != 0) {
55 pointer = 0;
56 } else {
57 termDocs->close(); // close stream
58 doc = INT_MAX; // set to sentinel value
59 return false;
60 }
61 }
62 doc = docs[pointer];
63 freq = freqs[pointer];
64 }
65 return true;
66 }
67
docID()68 int32_t TermScorer::docID() {
69 return doc;
70 }
71
nextDoc()72 int32_t TermScorer::nextDoc() {
73 ++pointer;
74 if (pointer >= pointerMax) {
75 pointerMax = termDocs->read(docs, freqs); // refill buffer
76 if (pointerMax != 0) {
77 pointer = 0;
78 } else {
79 termDocs->close(); // close stream
80 doc = NO_MORE_DOCS;
81 return doc;
82 }
83 }
84 doc = docs[pointer];
85 freq = freqs[pointer];
86
87 return doc;
88 }
89
score()90 double TermScorer::score() {
91 BOOST_ASSERT(doc != -1);
92 double raw = freq < SCORE_CACHE_SIZE ? scoreCache[freq] : getSimilarity()->tf(freq) * weightValue; // compute tf(f) * weight
93 return norms ? raw * SIM_NORM_DECODER()[norms[doc] & 0xff] : raw; // normalize for field
94 }
95
advance(int32_t target)96 int32_t TermScorer::advance(int32_t target) {
97 // first scan in cache
98 for (++pointer; pointer < pointerMax; ++pointer) {
99 if (docs[pointer] >= target) {
100 doc = docs[pointer];
101 freq = freqs[pointer];
102 return doc;
103 }
104 }
105
106 // not found in cache, seek underlying stream
107 bool result = termDocs->skipTo(target);
108 if (result) {
109 pointerMax = 1;
110 pointer = 0;
111 doc = termDocs->doc();
112 docs[pointer] = doc;
113 freqs[pointer] = freq = termDocs->freq();
114 } else {
115 doc = NO_MORE_DOCS;
116 }
117 return doc;
118 }
119
toString()120 String TermScorer::toString() {
121 return L"term scorer(" + weight->toString() + L")";
122 }
123
124 }
125