1 //
2 // $Id: sphinxquery.h 4113 2013-08-26 07:43:28Z deogar $
3 //
4 
5 //
6 // Copyright (c) 2001-2013, Andrew Aksyonoff
7 // Copyright (c) 2008-2013, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 #ifndef _sphinxquery_
17 #define _sphinxquery_
18 
19 #include "sphinx.h"
20 
21 //////////////////////////////////////////////////////////////////////////////
22 
23 enum XQStarPosition
24 {
25 	STAR_NONE	= 0,
26 	STAR_FRONT	= 1,
27 	STAR_BACK	= 2,
28 	STAR_BOTH	= 3
29 };
30 
31 /// extended query word with attached position within atom
32 struct XQKeyword_t
33 {
34 	CSphString			m_sWord;
35 	int					m_iAtomPos;
36 	bool				m_bFieldStart;	///< must occur at very start
37 	bool				m_bFieldEnd;	///< must occur at very end
38 	DWORD				m_uStarPosition;
39 	bool				m_bExpanded;	///< added by prefix expansion
40 	bool				m_bExcluded;	///< excluded by query (rval to operator NOT)
41 
XQKeyword_tXQKeyword_t42 	XQKeyword_t ()
43 		: m_iAtomPos ( -1 )
44 		, m_bFieldStart ( false )
45 		, m_bFieldEnd ( false )
46 		, m_uStarPosition ( STAR_NONE )
47 		, m_bExpanded ( false )
48 		, m_bExcluded ( false )
49 	{}
50 
XQKeyword_tXQKeyword_t51 	XQKeyword_t ( const char * sWord, int iPos )
52 		: m_sWord ( sWord )
53 		, m_iAtomPos ( iPos )
54 		, m_bFieldStart ( false )
55 		, m_bFieldEnd ( false )
56 		, m_uStarPosition ( STAR_NONE )
57 		, m_bExpanded ( false )
58 		, m_bExcluded ( false )
59 	{}
60 };
61 
62 
63 /// extended query operator
64 enum XQOperator_e
65 {
66 	SPH_QUERY_AND,
67 	SPH_QUERY_OR,
68 	SPH_QUERY_NOT,
69 	SPH_QUERY_ANDNOT,
70 	SPH_QUERY_BEFORE,
71 	SPH_QUERY_PHRASE,
72 	SPH_QUERY_PROXIMITY,
73 	SPH_QUERY_QUORUM,
74 	SPH_QUERY_NEAR,
75 	SPH_QUERY_SENTENCE,
76 	SPH_QUERY_PARAGRAPH
77 };
78 
79 // the limit of field or zone
80 struct XQLimitSpec_t
81 {
82 	bool					m_bFieldSpec;	///< whether field spec was already explicitly set
83 	CSphSmallBitvec			m_dFieldMask;	///< fields mask (spec part)
84 	int						m_iFieldMaxPos;	///< max position within field (spec part)
85 	CSphVector<int>			m_dZones;		///< zone indexes in per-query zones list
86 
87 public:
XQLimitSpec_tXQLimitSpec_t88 	XQLimitSpec_t ()
89 	{
90 		Reset();
91 	}
92 
ResetXQLimitSpec_t93 	inline void Reset ()
94 	{
95 		m_bFieldSpec = false;
96 		m_iFieldMaxPos = 0;
97 		m_dFieldMask.Set();
98 		m_dZones.Reset();
99 	}
100 
XQLimitSpec_tXQLimitSpec_t101 	XQLimitSpec_t ( const XQLimitSpec_t& dLimit )
102 	{
103 		if ( this==&dLimit )
104 			return;
105 		Reset();
106 		*this = dLimit;
107 	}
108 
109 	XQLimitSpec_t & operator = ( const XQLimitSpec_t& dLimit )
110 	{
111 		if ( this==&dLimit )
112 			return *this;
113 
114 		if ( dLimit.m_bFieldSpec )
115 			SetFieldSpec ( dLimit.m_dFieldMask, dLimit.m_iFieldMaxPos );
116 
117 		if ( dLimit.m_dZones.GetLength() )
118 			SetZoneSpec ( dLimit.m_dZones );
119 
120 		return *this;
121 	}
122 public:
123 	void SetZoneSpec ( const CSphVector<int> & dZones );
124 	void SetFieldSpec ( const CSphSmallBitvec& uMask, int iMaxPos );
125 };
126 
127 /// extended query node
128 /// plain nodes are just an atom
129 /// non-plain nodes are a logical function over children nodes
130 struct XQNode_t : public ISphNoncopyable
131 {
132 	XQNode_t *				m_pParent;		///< my parent node (NULL for root ones)
133 
134 private:
135 	XQOperator_e			m_eOp;			///< operation over childen
136 	int						m_iOrder;
137 	int						m_iCounter;
138 
139 private:
140 	mutable uint64_t		m_iMagicHash;
141 
142 public:
143 	CSphVector<XQNode_t*>	m_dChildren;	///< non-plain node children
144 	XQLimitSpec_t			m_dSpec;		///< specification by field, zone(s), etc.
145 
146 	CSphVector<XQKeyword_t>	m_dWords;		///< query words (plain node)
147 	int						m_iOpArg;		///< operator argument (proximity distance, quorum count)
148 	int						m_iAtomPos;		///< atom position override (currently only used within expanded nodes)
149 	bool					m_bVirtuallyPlain;	///< "virtually plain" flag (currently only used by expanded nodes)
150 	bool					m_bNotWeighted;	///< this our expanded but empty word's node
151 
152 public:
153 	/// ctor
XQNode_tXQNode_t154 	explicit XQNode_t ( const XQLimitSpec_t & dSpec )
155 		: m_pParent ( NULL )
156 		, m_eOp ( SPH_QUERY_AND )
157 		, m_iOrder ( 0 )
158 		, m_iCounter ( 0 )
159 		, m_iMagicHash ( 0 )
160 		, m_dSpec ( dSpec )
161 		, m_iOpArg ( 0 )
162 		, m_iAtomPos ( -1 )
163 		, m_bVirtuallyPlain ( false )
164 		, m_bNotWeighted ( false )
165 	{}
166 
167 	/// dtor
~XQNode_tXQNode_t168 	~XQNode_t ()
169 	{
170 		ARRAY_FOREACH ( i, m_dChildren )
171 			SafeDelete ( m_dChildren[i] );
172 	}
173 
174 	/// check if i'm empty
IsEmptyXQNode_t175 	bool IsEmpty () const
176 	{
177 		assert ( m_dWords.GetLength()==0 || m_dChildren.GetLength()==0 );
178 		return m_dWords.GetLength()==0 && m_dChildren.GetLength()==0;
179 	}
180 
181 	/// setup field limits
182 	void SetFieldSpec ( const CSphSmallBitvec& uMask, int iMaxPos );
183 
184 	/// setup zone limits
185 	void SetZoneSpec ( const CSphVector<int> & dZones );
186 
187 	/// copy field/zone limits from another node
188 	void CopySpecs ( const XQNode_t * pSpecs );
189 
190 	/// unconditionally clear field mask
191 	void ClearFieldMask ();
192 
193 public:
194 	/// get my operator
GetOpXQNode_t195 	XQOperator_e GetOp () const
196 	{
197 		return m_eOp;
198 	}
199 
200 	/// get my cache order
GetOrderXQNode_t201 	DWORD GetOrder () const
202 	{
203 		return m_iOrder;
204 	}
205 
206 	/// get my cache counter
GetCountXQNode_t207 	int GetCount () const
208 	{
209 		return m_iCounter;
210 	}
211 
212 	/// setup common nodes for caching
TagAsCommonXQNode_t213 	void TagAsCommon ( int iOrder, int iCounter )
214 	{
215 		m_iCounter = iCounter;
216 		m_iOrder = iOrder;
217 	}
218 
219 	/// precise comparison
220 	bool IsEqualTo ( const XQNode_t * pNode );
221 
222 	/// hash me
223 	uint64_t GetHash () const;
224 
225 	/// setup new operator and args
226 	void SetOp ( XQOperator_e eOp, XQNode_t * pArg1, XQNode_t * pArg2=NULL );
227 
228 	/// setup new operator and args
SetOpXQNode_t229 	void SetOp ( XQOperator_e eOp, CSphVector<XQNode_t*> & dArgs )
230 	{
231 		m_eOp = eOp;
232 		m_dChildren.SwapData(dArgs);
233 	}
234 
235 	/// setup new operator (careful parser/transform use only)
SetOpXQNode_t236 	void SetOp ( XQOperator_e eOp )
237 	{
238 		m_eOp = eOp;
239 	}
240 
241 #ifndef NDEBUG
242 	/// consistency check
CheckXQNode_t243 	void Check ( bool bRoot )
244 	{
245 		assert ( bRoot || !IsEmpty() ); // empty leaves must be removed from the final tree; empty root is allowed
246 		assert (!( m_dWords.GetLength() && m_eOp!=SPH_QUERY_AND && m_eOp!=SPH_QUERY_PHRASE && m_eOp!=SPH_QUERY_PROXIMITY && m_eOp!=SPH_QUERY_QUORUM )); // words are only allowed in these node types
247 		assert (!( m_dWords.GetLength()==1 && m_eOp!=SPH_QUERY_AND )); // 1-word leaves must be of AND type
248 
249 		ARRAY_FOREACH ( i, m_dChildren )
250 			m_dChildren[i]->Check ( false );
251 	}
252 #endif
253 };
254 
255 
256 /// extended query
257 struct XQQuery_t : public ISphNoncopyable
258 {
259 	CSphString				m_sParseError;
260 	CSphString				m_sParseWarning;
261 
262 	CSphVector<CSphString>	m_dZones;
263 	XQNode_t *				m_pRoot;
264 	bool					m_bSingleWord;
265 
266 	/// ctor
XQQuery_tXQQuery_t267 	XQQuery_t ()
268 	{
269 		m_pRoot = NULL;
270 		m_bSingleWord = false;
271 	}
272 
273 	/// dtor
~XQQuery_tXQQuery_t274 	~XQQuery_t ()
275 	{
276 		SafeDelete ( m_pRoot );
277 	}
278 };
279 
280 //////////////////////////////////////////////////////////////////////////////
281 
282 /// parses the query and returns the resulting tree
283 /// return false and fills tQuery.m_sParseError on error
284 /// WARNING, parsed tree might be NULL (eg. if query was empty)
285 bool	sphParseExtendedQuery ( XQQuery_t & tQuery, const char * sQuery, const ISphTokenizer * pTokenizer, const CSphSchema * pSchema, CSphDict * pDict, int iStopwordStep );
286 
287 /// analyse vector of trees and tag common parts of them (to cache them later)
288 int		sphMarkCommonSubtrees ( int iXQ, const XQQuery_t * pXQ );
289 
290 #endif // _sphinxquery_
291 
292 //
293 // $Id: sphinxquery.h 4113 2013-08-26 07:43:28Z deogar $
294 //
295