1 //
2 // $Id$
3 //
4 
5 //
6 // Copyright (c) 2001-2016, Andrew Aksyonoff
7 // Copyright (c) 2008-2016, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 #ifndef _sphinxquery_
17 #define _sphinxquery_
18 
19 #include "sphinx.h"
20 
21 //////////////////////////////////////////////////////////////////////////////
22 
23 /// extended query word with attached position within atom
24 struct XQKeyword_t
25 {
26 	CSphString			m_sWord;
27 	int					m_iAtomPos;
28 	int					m_iSkippedBefore; ///< positions skipped before this token (because of blended chars)
29 	bool				m_bFieldStart;	///< must occur at very start
30 	bool				m_bFieldEnd;	///< must occur at very end
31 	float				m_fBoost;		///< keyword IDF will be multiplied by this
32 	bool				m_bExpanded;	///< added by prefix expansion
33 	bool				m_bExcluded;	///< excluded by query (rval to operator NOT)
34 	bool				m_bMorphed;		///< morphology processing (wordforms, stemming etc) already done
35 	void *				m_pPayload;
36 
XQKeyword_tXQKeyword_t37 	XQKeyword_t ()
38 		: m_iAtomPos ( -1 )
39 		, m_iSkippedBefore ( 0 )
40 		, m_bFieldStart ( false )
41 		, m_bFieldEnd ( false )
42 		, m_fBoost ( 1.0f )
43 		, m_bExpanded ( false )
44 		, m_bExcluded ( false )
45 		, m_bMorphed ( false )
46 		, m_pPayload ( NULL )
47 	{}
48 
XQKeyword_tXQKeyword_t49 	XQKeyword_t ( const char * sWord, int iPos )
50 		: m_sWord ( sWord )
51 		, m_iAtomPos ( iPos )
52 		, m_iSkippedBefore ( 0 )
53 		, m_bFieldStart ( false )
54 		, m_bFieldEnd ( false )
55 		, m_fBoost ( 1.0f )
56 		, m_bExpanded ( false )
57 		, m_bExcluded ( false )
58 		, m_bMorphed ( false )
59 		, m_pPayload ( NULL )
60 	{}
61 };
62 
63 
64 /// extended query operator
65 enum XQOperator_e
66 {
67 	SPH_QUERY_AND,
68 	SPH_QUERY_OR,
69 	SPH_QUERY_MAYBE,
70 	SPH_QUERY_NOT,
71 	SPH_QUERY_ANDNOT,
72 	SPH_QUERY_BEFORE,
73 	SPH_QUERY_PHRASE,
74 	SPH_QUERY_PROXIMITY,
75 	SPH_QUERY_QUORUM,
76 	SPH_QUERY_NEAR,
77 	SPH_QUERY_SENTENCE,
78 	SPH_QUERY_PARAGRAPH,
79 	SPH_QUERY_NULL
80 };
81 
82 // the limit of field or zone or zonespan
83 struct XQLimitSpec_t
84 {
85 	bool					m_bFieldSpec;	///< whether field spec was already explicitly set
86 	FieldMask_t			m_dFieldMask;	///< fields mask (spec part)
87 	int						m_iFieldMaxPos;	///< max position within field (spec part)
88 	CSphVector<int>			m_dZones;		///< zone indexes in per-query zones list
89 	bool					m_bZoneSpan;	///< if we need to hits within only one span
90 
91 public:
XQLimitSpec_tXQLimitSpec_t92 	XQLimitSpec_t ()
93 	{
94 		Reset();
95 	}
96 
ResetXQLimitSpec_t97 	inline void Reset ()
98 	{
99 		m_bFieldSpec = false;
100 		m_iFieldMaxPos = 0;
101 		m_bZoneSpan = false;
102 		m_dFieldMask.SetAll();
103 		m_dZones.Reset();
104 	}
105 
IsEmptyXQLimitSpec_t106 	bool IsEmpty() const
107 	{
108 		return m_bFieldSpec==false && m_iFieldMaxPos==0 && m_bZoneSpan==false && m_dZones.GetLength()==0;
109 	}
110 
XQLimitSpec_tXQLimitSpec_t111 	XQLimitSpec_t ( const XQLimitSpec_t& dLimit )
112 	{
113 		if ( this==&dLimit )
114 			return;
115 		Reset();
116 		*this = dLimit;
117 	}
118 
119 	XQLimitSpec_t & operator = ( const XQLimitSpec_t& dLimit )
120 	{
121 		if ( this==&dLimit )
122 			return *this;
123 
124 		if ( dLimit.m_bFieldSpec )
125 			SetFieldSpec ( dLimit.m_dFieldMask, dLimit.m_iFieldMaxPos );
126 
127 		if ( dLimit.m_dZones.GetLength() )
128 			SetZoneSpec ( dLimit.m_dZones, dLimit.m_bZoneSpan );
129 
130 		return *this;
131 	}
132 public:
133 	void SetZoneSpec ( const CSphVector<int> & dZones, bool bZoneSpan );
134 	void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos );
135 };
136 
137 /// extended query node
138 /// plain nodes are just an atom
139 /// non-plain nodes are a logical function over children nodes
140 struct XQNode_t : public ISphNoncopyable
141 {
142 	XQNode_t *				m_pParent;		///< my parent node (NULL for root ones)
143 
144 private:
145 	XQOperator_e			m_eOp;			///< operation over childen
146 	int						m_iOrder;
147 	int						m_iCounter;
148 
149 private:
150 	mutable uint64_t		m_iMagicHash;
151 	mutable uint64_t		m_iFuzzyHash;
152 
153 public:
154 	CSphVector<XQNode_t*>	m_dChildren;	///< non-plain node children
155 	XQLimitSpec_t			m_dSpec;		///< specification by field, zone(s), etc.
156 
157 	CSphVector<XQKeyword_t>	m_dWords;		///< query words (plain node)
158 	int						m_iOpArg;		///< operator argument (proximity distance, quorum count)
159 	int						m_iAtomPos;		///< atom position override (currently only used within expanded nodes)
160 	int						m_iUser;
161 	bool					m_bVirtuallyPlain;	///< "virtually plain" flag (currently only used by expanded nodes)
162 	bool					m_bNotWeighted;	///< this our expanded but empty word's node
163 	bool					m_bPercentOp;
164 
165 public:
166 	/// ctor
167 	explicit XQNode_t ( const XQLimitSpec_t & dSpec );
168 
169 	/// dtor
170 	~XQNode_t ();
171 
172 	/// check if i'm empty
IsEmptyXQNode_t173 	bool IsEmpty () const
174 	{
175 		assert ( m_dWords.GetLength()==0 || m_dChildren.GetLength()==0 );
176 		return m_dWords.GetLength()==0 && m_dChildren.GetLength()==0;
177 	}
178 
179 	/// setup field limits
180 	void SetFieldSpec ( const FieldMask_t& uMask, int iMaxPos );
181 
182 	/// setup zone limits
183 	void SetZoneSpec ( const CSphVector<int> & dZones, bool bZoneSpan=false );
184 
185 	/// copy field/zone limits from another node
186 	void CopySpecs ( const XQNode_t * pSpecs );
187 
188 	/// unconditionally clear field mask
189 	void ClearFieldMask ();
190 
191 public:
192 	/// get my operator
GetOpXQNode_t193 	XQOperator_e GetOp () const
194 	{
195 		return m_eOp;
196 	}
197 
198 	/// get my cache order
GetOrderXQNode_t199 	DWORD GetOrder () const
200 	{
201 		return m_iOrder;
202 	}
203 
204 	/// get my cache counter
GetCountXQNode_t205 	int GetCount () const
206 	{
207 		return m_iCounter;
208 	}
209 
210 	/// setup common nodes for caching
TagAsCommonXQNode_t211 	void TagAsCommon ( int iOrder, int iCounter )
212 	{
213 		m_iCounter = iCounter;
214 		m_iOrder = iOrder;
215 	}
216 
217 	/// hash me
218 	uint64_t GetHash () const;
219 
220 	/// fuzzy hash ( a hash value is equal for proximity and phrase nodes
221 	/// with similar keywords )
222 	uint64_t GetFuzzyHash () const;
223 
224 	/// setup new operator and args
225 	void SetOp ( XQOperator_e eOp, XQNode_t * pArg1, XQNode_t * pArg2=NULL );
226 
227 	/// setup new operator and args
SetOpXQNode_t228 	void SetOp ( XQOperator_e eOp, CSphVector<XQNode_t*> & dArgs )
229 	{
230 		m_eOp = eOp;
231 		m_dChildren.SwapData(dArgs);
232 		ARRAY_FOREACH ( i, m_dChildren )
233 			m_dChildren[i]->m_pParent = this;
234 	}
235 
236 	/// setup new operator (careful parser/transform use only)
SetOpXQNode_t237 	void SetOp ( XQOperator_e eOp )
238 	{
239 		m_eOp = eOp;
240 	}
241 
242 	/// fixup atom positions in case of proximity queries and blended chars
243 	/// we need to handle tokens with blended characters as simple tokens in this case
244 	/// and we need to remove possible gaps in atom positions
245 	int FixupAtomPos();
246 
247 	/// return node like current
248 	inline XQNode_t * Clone ();
249 
250 	/// force resetting magic hash value ( that changed after transformation )
251 	inline bool ResetHash ();
252 
253 #ifndef NDEBUG
254 	/// consistency check
CheckXQNode_t255 	void Check ( bool bRoot )
256 	{
257 		assert ( bRoot || !IsEmpty() ); // empty leaves must be removed from the final tree; empty root is allowed
258 		assert (!( m_dWords.GetLength() && m_eOp!=SPH_QUERY_AND && m_eOp!=SPH_QUERY_OR && m_eOp!=SPH_QUERY_PHRASE
259 			&& m_eOp!=SPH_QUERY_PROXIMITY && m_eOp!=SPH_QUERY_QUORUM )); // words are only allowed in these node types
260 		assert ( ( m_dWords.GetLength()==1 && ( m_eOp==SPH_QUERY_AND || m_eOp==SPH_QUERY_OR ) ) ||
261 			m_dWords.GetLength()!=1 ); // 1-word leaves must be of AND | OR types
262 
263 		ARRAY_FOREACH ( i, m_dChildren )
264 		{
265 			assert ( m_dChildren[i]->m_pParent==this );
266 			m_dChildren[i]->Check ( false );
267 		}
268 	}
269 #else
CheckXQNode_t270 	void Check ( bool ) {}
271 #endif
272 };
273 
274 
275 /// extended query
276 struct XQQuery_t : public ISphNoncopyable
277 {
278 	CSphString				m_sParseError;
279 	CSphString				m_sParseWarning;
280 
281 	CSphVector<CSphString>	m_dZones;
282 	XQNode_t *				m_pRoot;
283 	bool					m_bNeedSZlist;
284 	bool					m_bSingleWord;
285 
286 	/// ctor
XQQuery_tXQQuery_t287 	XQQuery_t ()
288 	{
289 		m_pRoot = NULL;
290 		m_bNeedSZlist = false;
291 		m_bSingleWord = false;
292 	}
293 
294 	/// dtor
~XQQuery_tXQQuery_t295 	~XQQuery_t ()
296 	{
297 		SafeDelete ( m_pRoot );
298 	}
299 };
300 
301 //////////////////////////////////////////////////////////////////////////////
302 
303 /// setup tokenizer for query parsing (ie. add all specials and whatnot)
304 void	sphSetupQueryTokenizer ( ISphTokenizer * pTokenizer );
305 
306 /// parses the query and returns the resulting tree
307 /// return false and fills tQuery.m_sParseError on error
308 /// WARNING, parsed tree might be NULL (eg. if query was empty)
309 /// lots of arguments here instead of simply the index pointer, because
310 /// a) we do not always have an actual real index class, and
311 /// b) might need to tweak stuff even we do
312 /// FIXME! remove either pQuery or sQuery
313 bool	sphParseExtendedQuery ( XQQuery_t & tQuery, const char * sQuery, const CSphQuery * pQuery, const ISphTokenizer * pTokenizer, const CSphSchema * pSchema, CSphDict * pDict, const CSphIndexSettings & tSettings );
314 
315 // perform boolean optimization on tree
316 void	sphOptimizeBoolean ( XQNode_t ** pXQ, const ISphKeywordsStat * pKeywords );
317 
318 /// analyze vector of trees and tag common parts of them (to cache them later)
319 int		sphMarkCommonSubtrees ( int iXQ, const XQQuery_t * pXQ );
320 
321 #endif // _sphinxquery_
322 
323 //
324 // $Id$
325 //
326