1 // 2 // $Id$ 3 // 4 5 // 6 // Copyright (c) 2001-2016, Andrew Aksyonoff 7 // Copyright (c) 2008-2016, Sphinx Technologies Inc 8 // All rights reserved 9 // 10 // This program is free software; you can redistribute it and/or modify 11 // it under the terms of the GNU General Public License. You should have 12 // received a copy of the GPL license along with this program; if you 13 // did not, you can find it at http://www.gnu.org/ 14 // 15 16 #ifndef _sphinxquery_ 17 #define _sphinxquery_ 18 19 #include "sphinx.h" 20 21 ////////////////////////////////////////////////////////////////////////////// 22 23 /// extended query word with attached position within atom 24 struct XQKeyword_t 25 { 26 CSphString m_sWord; 27 int m_iAtomPos; 28 int m_iSkippedBefore; ///< positions skipped before this token (because of blended chars) 29 bool m_bFieldStart; ///< must occur at very start 30 bool m_bFieldEnd; ///< must occur at very end 31 float m_fBoost; ///< keyword IDF will be multiplied by this 32 bool m_bExpanded; ///< added by prefix expansion 33 bool m_bExcluded; ///< excluded by query (rval to operator NOT) 34 bool m_bMorphed; ///< morphology processing (wordforms, stemming etc) already done 35 void * m_pPayload; 36 XQKeyword_tXQKeyword_t37 XQKeyword_t () 38 : m_iAtomPos ( -1 ) 39 , m_iSkippedBefore ( 0 ) 40 , m_bFieldStart ( false ) 41 , m_bFieldEnd ( false ) 42 , m_fBoost ( 1.0f ) 43 , m_bExpanded ( false ) 44 , m_bExcluded ( false ) 45 , m_bMorphed ( false ) 46 , m_pPayload ( NULL ) 47 {} 48 XQKeyword_tXQKeyword_t49 XQKeyword_t ( const char * sWord, int iPos ) 50 : m_sWord ( sWord ) 51 , m_iAtomPos ( iPos ) 52 , m_iSkippedBefore ( 0 ) 53 , m_bFieldStart ( false ) 54 , m_bFieldEnd ( false ) 55 , m_fBoost ( 1.0f ) 56 , m_bExpanded ( false ) 57 , m_bExcluded ( false ) 58 , m_bMorphed ( false ) 59 , m_pPayload ( NULL ) 60 {} 61 }; 62 63 64 /// extended query operator 65 enum XQOperator_e 66 { 67 SPH_QUERY_AND, 68 SPH_QUERY_OR, 69 SPH_QUERY_MAYBE, 70 SPH_QUERY_NOT, 71 SPH_QUERY_ANDNOT, 72 SPH_QUERY_BEFORE, 73 SPH_QUERY_PHRASE, 74 SPH_QUERY_PROXIMITY, 75 SPH_QUERY_QUORUM, 76 SPH_QUERY_NEAR, 77 SPH_QUERY_SENTENCE, 78 SPH_QUERY_PARAGRAPH, 79 SPH_QUERY_NULL 80 }; 81 82 // the limit of field or zone or zonespan 83 struct XQLimitSpec_t 84 { 85 bool m_bFieldSpec; ///< whether field spec was already explicitly set 86 FieldMask_t m_dFieldMask; ///< fields mask (spec part) 87 int m_iFieldMaxPos; ///< max position within field (spec part) 88 CSphVector<int> m_dZones; ///< zone indexes in per-query zones list 89 bool m_bZoneSpan; ///< if we need to hits within only one span 90 91 public: XQLimitSpec_tXQLimitSpec_t92 XQLimitSpec_t () 93 { 94 Reset(); 95 } 96 ResetXQLimitSpec_t97 inline void Reset () 98 { 99 m_bFieldSpec = false; 100 m_iFieldMaxPos = 0; 101 m_bZoneSpan = false; 102 m_dFieldMask.SetAll(); 103 m_dZones.Reset(); 104 } 105 IsEmptyXQLimitSpec_t106 bool IsEmpty() const 107 { 108 return m_bFieldSpec==false && m_iFieldMaxPos==0 && m_bZoneSpan==false && m_dZones.GetLength()==0; 109 } 110 XQLimitSpec_tXQLimitSpec_t111 XQLimitSpec_t ( const XQLimitSpec_t& dLimit ) 112 { 113 if ( this==&dLimit ) 114 return; 115 Reset(); 116 *this = dLimit; 117 } 118 119 XQLimitSpec_t & operator = ( const XQLimitSpec_t& dLimit ) 120 { 121 if ( this==&dLimit ) 122 return *this; 123 124 if ( dLimit.m_bFieldSpec ) 125 SetFieldSpec ( dLimit.m_dFieldMask, dLimit.m_iFieldMaxPos ); 126 127 if ( dLimit.m_dZones.GetLength() ) 128 SetZoneSpec ( dLimit.m_dZones, dLimit.m_bZoneSpan ); 129 130 return *this; 131 } 132 public: 133 void SetZoneSpec ( const CSphVector<int> & dZones, bool bZoneSpan ); 134 void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos ); 135 }; 136 137 /// extended query node 138 /// plain nodes are just an atom 139 /// non-plain nodes are a logical function over children nodes 140 struct XQNode_t : public ISphNoncopyable 141 { 142 XQNode_t * m_pParent; ///< my parent node (NULL for root ones) 143 144 private: 145 XQOperator_e m_eOp; ///< operation over childen 146 int m_iOrder; 147 int m_iCounter; 148 149 private: 150 mutable uint64_t m_iMagicHash; 151 mutable uint64_t m_iFuzzyHash; 152 153 public: 154 CSphVector<XQNode_t*> m_dChildren; ///< non-plain node children 155 XQLimitSpec_t m_dSpec; ///< specification by field, zone(s), etc. 156 157 CSphVector<XQKeyword_t> m_dWords; ///< query words (plain node) 158 int m_iOpArg; ///< operator argument (proximity distance, quorum count) 159 int m_iAtomPos; ///< atom position override (currently only used within expanded nodes) 160 int m_iUser; 161 bool m_bVirtuallyPlain; ///< "virtually plain" flag (currently only used by expanded nodes) 162 bool m_bNotWeighted; ///< this our expanded but empty word's node 163 bool m_bPercentOp; 164 165 public: 166 /// ctor 167 explicit XQNode_t ( const XQLimitSpec_t & dSpec ); 168 169 /// dtor 170 ~XQNode_t (); 171 172 /// check if i'm empty IsEmptyXQNode_t173 bool IsEmpty () const 174 { 175 assert ( m_dWords.GetLength()==0 || m_dChildren.GetLength()==0 ); 176 return m_dWords.GetLength()==0 && m_dChildren.GetLength()==0; 177 } 178 179 /// setup field limits 180 void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos ); 181 182 /// setup zone limits 183 void SetZoneSpec ( const CSphVector<int> & dZones, bool bZoneSpan=false ); 184 185 /// copy field/zone limits from another node 186 void CopySpecs ( const XQNode_t * pSpecs ); 187 188 /// unconditionally clear field mask 189 void ClearFieldMask (); 190 191 public: 192 /// get my operator GetOpXQNode_t193 XQOperator_e GetOp () const 194 { 195 return m_eOp; 196 } 197 198 /// get my cache order GetOrderXQNode_t199 DWORD GetOrder () const 200 { 201 return m_iOrder; 202 } 203 204 /// get my cache counter GetCountXQNode_t205 int GetCount () const 206 { 207 return m_iCounter; 208 } 209 210 /// setup common nodes for caching TagAsCommonXQNode_t211 void TagAsCommon ( int iOrder, int iCounter ) 212 { 213 m_iCounter = iCounter; 214 m_iOrder = iOrder; 215 } 216 217 /// hash me 218 uint64_t GetHash () const; 219 220 /// fuzzy hash ( a hash value is equal for proximity and phrase nodes 221 /// with similar keywords ) 222 uint64_t GetFuzzyHash () const; 223 224 /// setup new operator and args 225 void SetOp ( XQOperator_e eOp, XQNode_t * pArg1, XQNode_t * pArg2=NULL ); 226 227 /// setup new operator and args SetOpXQNode_t228 void SetOp ( XQOperator_e eOp, CSphVector<XQNode_t*> & dArgs ) 229 { 230 m_eOp = eOp; 231 m_dChildren.SwapData(dArgs); 232 ARRAY_FOREACH ( i, m_dChildren ) 233 m_dChildren[i]->m_pParent = this; 234 } 235 236 /// setup new operator (careful parser/transform use only) SetOpXQNode_t237 void SetOp ( XQOperator_e eOp ) 238 { 239 m_eOp = eOp; 240 } 241 242 /// fixup atom positions in case of proximity queries and blended chars 243 /// we need to handle tokens with blended characters as simple tokens in this case 244 /// and we need to remove possible gaps in atom positions 245 int FixupAtomPos(); 246 247 /// return node like current 248 inline XQNode_t * Clone (); 249 250 /// force resetting magic hash value ( that changed after transformation ) 251 inline bool ResetHash (); 252 253 #ifndef NDEBUG 254 /// consistency check CheckXQNode_t255 void Check ( bool bRoot ) 256 { 257 assert ( bRoot || !IsEmpty() ); // empty leaves must be removed from the final tree; empty root is allowed 258 assert (!( m_dWords.GetLength() && m_eOp!=SPH_QUERY_AND && m_eOp!=SPH_QUERY_OR && m_eOp!=SPH_QUERY_PHRASE 259 && m_eOp!=SPH_QUERY_PROXIMITY && m_eOp!=SPH_QUERY_QUORUM )); // words are only allowed in these node types 260 assert ( ( m_dWords.GetLength()==1 && ( m_eOp==SPH_QUERY_AND || m_eOp==SPH_QUERY_OR ) ) || 261 m_dWords.GetLength()!=1 ); // 1-word leaves must be of AND | OR types 262 263 ARRAY_FOREACH ( i, m_dChildren ) 264 { 265 assert ( m_dChildren[i]->m_pParent==this ); 266 m_dChildren[i]->Check ( false ); 267 } 268 } 269 #else CheckXQNode_t270 void Check ( bool ) {} 271 #endif 272 }; 273 274 275 /// extended query 276 struct XQQuery_t : public ISphNoncopyable 277 { 278 CSphString m_sParseError; 279 CSphString m_sParseWarning; 280 281 CSphVector<CSphString> m_dZones; 282 XQNode_t * m_pRoot; 283 bool m_bNeedSZlist; 284 bool m_bSingleWord; 285 286 /// ctor XQQuery_tXQQuery_t287 XQQuery_t () 288 { 289 m_pRoot = NULL; 290 m_bNeedSZlist = false; 291 m_bSingleWord = false; 292 } 293 294 /// dtor ~XQQuery_tXQQuery_t295 ~XQQuery_t () 296 { 297 SafeDelete ( m_pRoot ); 298 } 299 }; 300 301 ////////////////////////////////////////////////////////////////////////////// 302 303 /// setup tokenizer for query parsing (ie. add all specials and whatnot) 304 void sphSetupQueryTokenizer ( ISphTokenizer * pTokenizer ); 305 306 /// parses the query and returns the resulting tree 307 /// return false and fills tQuery.m_sParseError on error 308 /// WARNING, parsed tree might be NULL (eg. if query was empty) 309 /// lots of arguments here instead of simply the index pointer, because 310 /// a) we do not always have an actual real index class, and 311 /// b) might need to tweak stuff even we do 312 /// FIXME! remove either pQuery or sQuery 313 bool sphParseExtendedQuery ( XQQuery_t & tQuery, const char * sQuery, const CSphQuery * pQuery, const ISphTokenizer * pTokenizer, const CSphSchema * pSchema, CSphDict * pDict, const CSphIndexSettings & tSettings ); 314 315 // perform boolean optimization on tree 316 void sphOptimizeBoolean ( XQNode_t ** pXQ, const ISphKeywordsStat * pKeywords ); 317 318 /// analyze vector of trees and tag common parts of them (to cache them later) 319 int sphMarkCommonSubtrees ( int iXQ, const XQQuery_t * pXQ ); 320 321 #endif // _sphinxquery_ 322 323 // 324 // $Id$ 325 // 326