1 /*------------------------------------------------------------------------------ 2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 3 * 4 * Distributable under the terms of either the Apache License (Version 2.0) or 5 * the GNU Lesser General Public License, as specified in the COPYING file. 6 ------------------------------------------------------------------------------*/ 7 #include "CLucene/_ApiHeader.h" 8 #include "TermQuery.h" 9 10 #include "SearchHeader.h" 11 #include "Scorer.h" 12 #include "CLucene/index/Term.h" 13 #include "Explanation.h" 14 #include "Similarity.h" 15 #include "Searchable.h" 16 #include "_TermScorer.h" 17 #include "CLucene/index/IndexReader.h" 18 #include "CLucene/util/StringBuffer.h" 19 #include "CLucene/index/Terms.h" 20 21 #include <assert.h> 22 23 CL_NS_USE(index) 24 CL_NS_DEF(search) 25 26 27 28 class TermWeight: public Weight { 29 private: 30 Similarity* similarity; // ISH: was Searcher*, for no apparent reason 31 float_t value; 32 float_t idf; 33 float_t queryNorm; 34 float_t queryWeight; 35 36 TermQuery* parentQuery; // CLucene specific 37 CL_NS(index)::Term* _term; 38 39 public: 40 TermWeight(Searcher* searcher, TermQuery* parentQuery, CL_NS(index)::Term* _term); 41 virtual ~TermWeight(); 42 43 // return a *new* string describing this object 44 TCHAR* toString(); getQuery()45 Query* getQuery() { return (Query*)parentQuery; } getValue()46 float_t getValue() { return value; } 47 48 float_t sumOfSquaredWeights(); 49 void normalize(float_t queryNorm); 50 Scorer* scorer(CL_NS(index)::IndexReader* reader); 51 Explanation* explain(CL_NS(index)::IndexReader* reader, int32_t doc); 52 }; 53 54 55 /** Constructs a query for the term <code>t</code>. */ TermQuery(Term * t)56 TermQuery::TermQuery(Term* t): 57 term( _CL_POINTER(t) ) 58 { 59 } TermQuery(const TermQuery & clone)60 TermQuery::TermQuery(const TermQuery& clone): 61 Query(clone){ 62 this->term=_CL_POINTER(clone.term); 63 } ~TermQuery()64 TermQuery::~TermQuery(){ 65 _CLLDECDELETE(term); 66 } 67 clone() const68 Query* TermQuery::clone() const{ 69 return _CLNEW TermQuery(*this); 70 } 71 getClassName()72 const char* TermQuery::getClassName(){ 73 return "TermQuery"; 74 } getObjectName() const75 const char* TermQuery::getObjectName() const{ 76 return getClassName(); 77 } hashCode() const78 size_t TermQuery::hashCode() const { 79 return Similarity::floatToByte(getBoost()) ^ term->hashCode(); 80 } 81 82 //added by search highlighter getTerm(bool pointer) const83 Term* TermQuery::getTerm(bool pointer) const 84 { 85 if ( pointer ) 86 return _CL_POINTER(term); 87 else 88 return term; 89 } 90 toString(const TCHAR * field) const91 TCHAR* TermQuery::toString(const TCHAR* field) const{ 92 CL_NS(util)::StringBuffer buffer; 93 if ( field==NULL || _tcscmp(term->field(),field)!= 0 ) { 94 buffer.append(term->field()); 95 buffer.append(_T(":")); 96 } 97 buffer.append(term->text()); 98 if (getBoost() != 1.0f) { 99 buffer.append(_T("^")); 100 buffer.appendFloat( getBoost(),1 ); 101 } 102 return buffer.toString(); 103 } 104 equals(Query * other) const105 bool TermQuery::equals(Query* other) const { 106 if (!(other->instanceOf(TermQuery::getClassName()))) 107 return false; 108 109 TermQuery* tq = (TermQuery*)other; 110 return (this->getBoost() == tq->getBoost()) 111 && this->term->equals(tq->term); 112 } 113 TermWeight(Searcher * _searcher,TermQuery * _parentQuery,Term * term)114 TermWeight::TermWeight(Searcher* _searcher, TermQuery* _parentQuery, Term* term):similarity(_searcher->getSimilarity()), 115 value(0), queryNorm(0),queryWeight(0), parentQuery(_parentQuery),_term(term) 116 { 117 idf = similarity->idf(term, _searcher); // compute idf 118 } 119 ~TermWeight()120 TermWeight::~TermWeight(){ 121 } 122 123 // toString()124 TCHAR* TermWeight::toString() { 125 int32_t size=strlen(parentQuery->getObjectName()) + 10; 126 TCHAR* tmp = _CL_NEWARRAY(TCHAR, size); 127 _sntprintf(tmp,size,_T("weight(%S)"),parentQuery->getObjectName()); 128 return tmp; 129 } 130 sumOfSquaredWeights()131 float_t TermWeight::sumOfSquaredWeights() { 132 // legacy // idf = parentQuery->getSimilarity(searcher)->idf(_term, searcher); // compute idf 133 queryWeight = idf * parentQuery->getBoost(); // compute query weight 134 return queryWeight * queryWeight; // square it 135 } 136 normalize(float_t _queryNorm)137 void TermWeight::normalize(float_t _queryNorm) { 138 this->queryNorm = _queryNorm; 139 queryWeight *= queryNorm; // normalize query weight 140 value = queryWeight * idf; // idf for document 141 } 142 scorer(IndexReader * reader)143 Scorer* TermWeight::scorer(IndexReader* reader) { 144 TermDocs* termDocs = reader->termDocs(_term); 145 146 if (termDocs == NULL) 147 return NULL; 148 149 return _CLNEW TermScorer(this, termDocs, similarity, 150 reader->norms(_term->field())); 151 } 152 explain(IndexReader * reader,int32_t doc)153 Explanation* TermWeight::explain(IndexReader* reader, int32_t doc){ 154 ComplexExplanation* result = _CLNEW ComplexExplanation(); 155 156 TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN]; 157 TCHAR* tmp; 158 159 tmp = getQuery()->toString(); 160 _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN, 161 _T("weight(%s in %d), product of:"),tmp,doc); 162 _CLDELETE_LCARRAY(tmp); 163 result->setDescription(buf); 164 165 _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN, 166 _T("idf(docFreq=%d, numDocs=%d)"), reader->docFreq(_term), reader->numDocs() ); 167 Explanation* idfExpl = _CLNEW Explanation(idf, buf); 168 169 // explain query weight 170 Explanation* queryExpl = _CLNEW Explanation(); 171 tmp = getQuery()->toString(); 172 _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN, 173 _T("queryWeight(%s), product of:"), tmp); 174 _CLDELETE_LCARRAY(tmp); 175 queryExpl->setDescription(buf); 176 177 Explanation* boostExpl = _CLNEW Explanation(parentQuery->getBoost(), _T("boost")); 178 if (parentQuery->getBoost() != 1.0f) 179 queryExpl->addDetail(boostExpl); 180 else 181 _CLDELETE(boostExpl); 182 183 queryExpl->addDetail(idfExpl->clone()); 184 185 Explanation* queryNormExpl = _CLNEW Explanation(queryNorm,_T("queryNorm")); 186 queryExpl->addDetail(queryNormExpl); 187 188 queryExpl->setValue(parentQuery->getBoost()* // always 1.0 | TODO: original Java code is boostExpl.getValue() 189 idfExpl->getValue() * 190 queryNormExpl->getValue()); 191 result->addDetail(queryExpl); 192 193 // explain field weight 194 const TCHAR* field = _term->field(); 195 ComplexExplanation* fieldExpl = _CLNEW ComplexExplanation(); 196 197 tmp = _term->toString(); 198 _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN, 199 _T("fieldWeight(%s in %d), product of:"),tmp,doc); 200 _CLDELETE_LCARRAY(tmp); 201 fieldExpl->setDescription(buf); 202 203 Scorer* sc = scorer(reader); 204 Explanation* tfExpl = sc->explain(doc); 205 _CLLDELETE(sc); 206 fieldExpl->addDetail(tfExpl); 207 fieldExpl->addDetail(idfExpl); 208 209 Explanation* fieldNormExpl = _CLNEW Explanation(); 210 uint8_t* fieldNorms = reader->norms(field); 211 float_t fieldNorm = 212 fieldNorms!=NULL ? Similarity::decodeNorm(fieldNorms[doc]) : 0.0f; 213 fieldNormExpl->setValue(fieldNorm); 214 215 _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN, 216 _T("fieldNorm(field=%s, doc=%d)"),field,doc); 217 fieldNormExpl->setDescription(buf); 218 fieldExpl->addDetail(fieldNormExpl); 219 220 fieldExpl->setMatch(tfExpl->isMatch()); 221 fieldExpl->setValue(tfExpl->getValue() * 222 idfExpl->getValue() * 223 fieldNormExpl->getValue()); 224 225 if (queryExpl->getValue() == 1.0f){ 226 _CLLDELETE(result); 227 return fieldExpl; 228 } 229 230 // combine them 231 result->setValue(queryExpl->getValue() * fieldExpl->getValue()); 232 233 result->addDetail(fieldExpl); 234 result->setMatch(fieldExpl->getMatch()); 235 236 return result; 237 } 238 _createWeight(Searcher * _searcher)239 Weight* TermQuery::_createWeight(Searcher* _searcher) { 240 return _CLNEW TermWeight(_searcher,this,term); 241 } 242 extractTerms(TermSet * termset) const243 void TermQuery::extractTerms( TermSet * termset ) const 244 { 245 if( term && termset->end() == termset->find( term )) 246 termset->insert( _CL_POINTER( term )); 247 } 248 249 250 CL_NS_END 251 252