1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #include "CLucene/_ApiHeader.h"
8 #include "TermQuery.h"
9 
10 #include "SearchHeader.h"
11 #include "Scorer.h"
12 #include "CLucene/index/Term.h"
13 #include "Explanation.h"
14 #include "Similarity.h"
15 #include "Searchable.h"
16 #include "_TermScorer.h"
17 #include "CLucene/index/IndexReader.h"
18 #include "CLucene/util/StringBuffer.h"
19 #include "CLucene/index/Terms.h"
20 
21 #include <assert.h>
22 
23 CL_NS_USE(index)
24 CL_NS_DEF(search)
25 
26 
27 
28 	class TermWeight: public Weight {
29 	private:
30 		Similarity* similarity; // ISH: was Searcher*, for no apparent reason
31 		float_t value;
32 		float_t idf;
33 		float_t queryNorm;
34 		float_t queryWeight;
35 
36 		TermQuery* parentQuery;	// CLucene specific
37 		CL_NS(index)::Term* _term;
38 
39 	public:
40 		TermWeight(Searcher* searcher, TermQuery* parentQuery, CL_NS(index)::Term* _term);
41 		virtual ~TermWeight();
42 
43 		// return a *new* string describing this object
44 		TCHAR* toString();
getQuery()45 		Query* getQuery() { return (Query*)parentQuery; }
getValue()46 		float_t getValue() { return value; }
47 
48 		float_t sumOfSquaredWeights();
49 		void normalize(float_t queryNorm);
50 		Scorer* scorer(CL_NS(index)::IndexReader* reader);
51 		Explanation* explain(CL_NS(index)::IndexReader* reader, int32_t doc);
52 	};
53 
54 
55 	/** Constructs a query for the term <code>t</code>. */
TermQuery(Term * t)56 	TermQuery::TermQuery(Term* t):
57 		term( _CL_POINTER(t) )
58 	{
59 	}
TermQuery(const TermQuery & clone)60 	TermQuery::TermQuery(const TermQuery& clone):
61   		Query(clone){
62 		this->term=_CL_POINTER(clone.term);
63 	}
~TermQuery()64 	TermQuery::~TermQuery(){
65 	    _CLLDECDELETE(term);
66 	}
67 
clone() const68 	Query* TermQuery::clone() const{
69 		return _CLNEW TermQuery(*this);
70 	}
71 
getClassName()72 	const char* TermQuery::getClassName(){
73 		return "TermQuery";
74 	}
getObjectName() const75 	const char* TermQuery::getObjectName() const{
76 		return getClassName();
77 	}
hashCode() const78 	size_t TermQuery::hashCode() const {
79 		return Similarity::floatToByte(getBoost()) ^ term->hashCode();
80 	}
81 
82 	//added by search highlighter
getTerm(bool pointer) const83 	Term* TermQuery::getTerm(bool pointer) const
84 	{
85 		if ( pointer )
86 			return _CL_POINTER(term);
87 		else
88 			return term;
89 	}
90 
toString(const TCHAR * field) const91 	TCHAR* TermQuery::toString(const TCHAR* field) const{
92 		CL_NS(util)::StringBuffer buffer;
93 		if ( field==NULL || _tcscmp(term->field(),field)!= 0 ) {
94 			buffer.append(term->field());
95 			buffer.append(_T(":"));
96 		}
97 		buffer.append(term->text());
98 		if (getBoost() != 1.0f) {
99 			buffer.append(_T("^"));
100 			buffer.appendFloat( getBoost(),1 );
101 		}
102 		return buffer.toString();
103 	}
104 
equals(Query * other) const105 	bool TermQuery::equals(Query* other) const {
106 		if (!(other->instanceOf(TermQuery::getClassName())))
107 			return false;
108 
109 		TermQuery* tq = (TermQuery*)other;
110 		return (this->getBoost() == tq->getBoost())
111 			&& this->term->equals(tq->term);
112 	}
113 
TermWeight(Searcher * _searcher,TermQuery * _parentQuery,Term * term)114    TermWeight::TermWeight(Searcher* _searcher, TermQuery* _parentQuery, Term* term):similarity(_searcher->getSimilarity()),
115 	   value(0), queryNorm(0),queryWeight(0), parentQuery(_parentQuery),_term(term)
116    {
117 		   idf = similarity->idf(term, _searcher); // compute idf
118    }
119 
~TermWeight()120    TermWeight::~TermWeight(){
121    }
122 
123    //
toString()124    TCHAR* TermWeight::toString() {
125 	   int32_t size=strlen(parentQuery->getObjectName()) + 10;
126 	   TCHAR* tmp = _CL_NEWARRAY(TCHAR, size);
127 	   _sntprintf(tmp,size,_T("weight(%S)"),parentQuery->getObjectName());
128 	   return tmp;
129    }
130 
sumOfSquaredWeights()131 	float_t TermWeight::sumOfSquaredWeights() {
132 		// legacy // idf = parentQuery->getSimilarity(searcher)->idf(_term, searcher); // compute idf
133 		queryWeight = idf * parentQuery->getBoost();             // compute query weight
134 		return queryWeight * queryWeight;           // square it
135 	}
136 
normalize(float_t _queryNorm)137 	void TermWeight::normalize(float_t _queryNorm) {
138 		this->queryNorm = _queryNorm;
139 		queryWeight *= queryNorm;                   // normalize query weight
140 		value = queryWeight * idf;                  // idf for document
141 	}
142 
scorer(IndexReader * reader)143 	Scorer* TermWeight::scorer(IndexReader* reader) {
144 		TermDocs* termDocs = reader->termDocs(_term);
145 
146 		if (termDocs == NULL)
147 			return NULL;
148 
149 		return _CLNEW TermScorer(this, termDocs, similarity,
150 								reader->norms(_term->field()));
151 	}
152 
explain(IndexReader * reader,int32_t doc)153 	Explanation* TermWeight::explain(IndexReader* reader, int32_t doc){
154 		ComplexExplanation* result = _CLNEW ComplexExplanation();
155 
156 		TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN];
157         TCHAR* tmp;
158 
159         tmp = getQuery()->toString();
160 		_sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,
161 			_T("weight(%s in %d), product of:"),tmp,doc);
162         _CLDELETE_LCARRAY(tmp);
163 		result->setDescription(buf);
164 
165 		_sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,
166 			_T("idf(docFreq=%d, numDocs=%d)"), reader->docFreq(_term), reader->numDocs() );
167 		Explanation* idfExpl = _CLNEW Explanation(idf, buf);
168 
169 		// explain query weight
170 		Explanation* queryExpl = _CLNEW Explanation();
171         tmp = getQuery()->toString();
172 		_sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,
173 			_T("queryWeight(%s), product of:"), tmp);
174         _CLDELETE_LCARRAY(tmp);
175 		queryExpl->setDescription(buf);
176 
177 		Explanation* boostExpl = _CLNEW Explanation(parentQuery->getBoost(), _T("boost"));
178 		if (parentQuery->getBoost() != 1.0f)
179 			queryExpl->addDetail(boostExpl);
180         else
181             _CLDELETE(boostExpl);
182 
183 		queryExpl->addDetail(idfExpl->clone());
184 
185 		Explanation* queryNormExpl = _CLNEW Explanation(queryNorm,_T("queryNorm"));
186 		queryExpl->addDetail(queryNormExpl);
187 
188 		queryExpl->setValue(parentQuery->getBoost()* // always 1.0 | TODO: original Java code is boostExpl.getValue()
189 							idfExpl->getValue() *
190 							queryNormExpl->getValue());
191 		result->addDetail(queryExpl);
192 
193 		// explain field weight
194 		const TCHAR* field = _term->field();
195 		ComplexExplanation* fieldExpl = _CLNEW ComplexExplanation();
196 
197         tmp = _term->toString();
198 		_sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,
199 			_T("fieldWeight(%s in %d), product of:"),tmp,doc);
200         _CLDELETE_LCARRAY(tmp);
201 		fieldExpl->setDescription(buf);
202 
203         Scorer* sc = scorer(reader);
204 		Explanation* tfExpl = sc->explain(doc);
205         _CLLDELETE(sc);
206 		fieldExpl->addDetail(tfExpl);
207 		fieldExpl->addDetail(idfExpl);
208 
209 		Explanation* fieldNormExpl = _CLNEW Explanation();
210 		uint8_t* fieldNorms = reader->norms(field);
211 		float_t fieldNorm =
212 			fieldNorms!=NULL ? Similarity::decodeNorm(fieldNorms[doc]) : 0.0f;
213 		fieldNormExpl->setValue(fieldNorm);
214 
215 		_sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,
216 			_T("fieldNorm(field=%s, doc=%d)"),field,doc);
217 		fieldNormExpl->setDescription(buf);
218 		fieldExpl->addDetail(fieldNormExpl);
219 
220 		fieldExpl->setMatch(tfExpl->isMatch());
221 		fieldExpl->setValue(tfExpl->getValue() *
222 							idfExpl->getValue() *
223 							fieldNormExpl->getValue());
224 
225         if (queryExpl->getValue() == 1.0f){
226 			_CLLDELETE(result);
227             return fieldExpl;
228         }
229 
230 		// combine them
231 		result->setValue(queryExpl->getValue() * fieldExpl->getValue());
232 
233 		result->addDetail(fieldExpl);
234 		result->setMatch(fieldExpl->getMatch());
235 
236 		return result;
237 	}
238 
_createWeight(Searcher * _searcher)239 	Weight* TermQuery::_createWeight(Searcher* _searcher) {
240         return _CLNEW TermWeight(_searcher,this,term);
241     }
242 
extractTerms(TermSet * termset) const243     void TermQuery::extractTerms( TermSet * termset ) const
244     {
245         if( term && termset->end() == termset->find( term ))
246             termset->insert( _CL_POINTER( term ));
247     }
248 
249 
250 CL_NS_END
251 
252