1 //
2 // $Id$
3 //
4 
5 //
6 // Copyright (c) 2001-2016, Andrew Aksyonoff
7 // Copyright (c) 2008-2016, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 #include "sphinx.h"
17 #include "sphinxexpr.h"
18 #include "sphinxplugin.h"
19 
20 #include "sphinxutils.h"
21 #include "sphinxint.h"
22 #include "sphinxjson.h"
23 #include <time.h>
24 #include <math.h>
25 
26 #ifndef M_LOG2E
27 #define M_LOG2E		1.44269504088896340736
28 #endif
29 
30 #ifndef M_LOG10E
31 #define M_LOG10E	0.434294481903251827651
32 #endif
33 
34 // hack hack hack
35 UservarIntSet_c * ( *g_pUservarsHook )( const CSphString & sUservar );
36 
37 //////////////////////////////////////////////////////////////////////////
38 // EVALUATION ENGINE
39 //////////////////////////////////////////////////////////////////////////
40 
41 struct ExprLocatorTraits_t : public ISphExpr
42 {
43 	CSphAttrLocator m_tLocator;
44 	int m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
45 
ExprLocatorTraits_tExprLocatorTraits_t46 	ExprLocatorTraits_t ( const CSphAttrLocator & tLocator, int iLocator ) : m_tLocator ( tLocator ), m_iLocator ( iLocator ) {}
47 
CommandExprLocatorTraits_t48 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
49 	{
50 		if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
51 			static_cast < CSphVector<int>* >(pArg)->Add ( m_iLocator );
52 	}
53 };
54 
55 
56 struct Expr_GetInt_c : public ExprLocatorTraits_t
57 {
Expr_GetInt_cExpr_GetInt_c58 	Expr_GetInt_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetInt_c59 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); } // FIXME! OPTIMIZE!!! we can go the short route here
IntEvalExpr_GetInt_c60 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetInt_c61 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
62 };
63 
64 
65 struct Expr_GetBits_c : public ExprLocatorTraits_t
66 {
Expr_GetBits_cExpr_GetBits_c67 	Expr_GetBits_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetBits_c68 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); }
IntEvalExpr_GetBits_c69 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetBits_c70 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
71 };
72 
73 
74 struct Expr_GetSint_c : public ExprLocatorTraits_t
75 {
Expr_GetSint_cExpr_GetSint_c76 	Expr_GetSint_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetSint_c77 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)(int)tMatch.GetAttr ( m_tLocator ); }
IntEvalExpr_GetSint_c78 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetSint_c79 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
80 };
81 
82 
83 struct Expr_GetFloat_c : public ExprLocatorTraits_t
84 {
Expr_GetFloat_cExpr_GetFloat_c85 	Expr_GetFloat_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetFloat_c86 	virtual float Eval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrFloat ( m_tLocator ); }
87 };
88 
89 
90 struct Expr_GetString_c : public ExprLocatorTraits_t
91 {
92 	const BYTE * m_pStrings;
93 
Expr_GetString_cExpr_GetString_c94 	Expr_GetString_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetString_c95 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
CommandExpr_GetString_c96 	virtual void Command ( ESphExprCommand eCmd, void * pArg ) { if ( eCmd==SPH_EXPR_SET_STRING_POOL ) m_pStrings = (const BYTE*)pArg; }
97 
StringEvalExpr_GetString_c98 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
99 	{
100 		SphAttr_t iOff = tMatch.GetAttr ( m_tLocator );
101 		if ( iOff>0 )
102 			return sphUnpackStr ( m_pStrings + iOff, ppStr );
103 
104 		*ppStr = NULL;
105 		return 0;
106 	}
107 };
108 
109 
110 struct Expr_GetMva_c : public ExprLocatorTraits_t
111 {
112 	const DWORD * m_pMva;
113 	bool m_bArenaProhibit;
114 
Expr_GetMva_cExpr_GetMva_c115 	Expr_GetMva_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ), m_pMva ( NULL ), m_bArenaProhibit ( false ) {}
EvalExpr_GetMva_c116 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
CommandExpr_GetMva_c117 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
118 	{
119 		if ( eCmd==SPH_EXPR_SET_MVA_POOL )
120 		{
121 			const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
122 			assert ( pPool );
123 			m_pMva = pPool->m_pMva;
124 			m_bArenaProhibit = pPool->m_bArenaProhibit;
125 		}
126 	}
MvaEvalExpr_GetMva_c127 	virtual const DWORD * MvaEval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrMVA ( m_tLocator, m_pMva, m_bArenaProhibit ); }
128 };
129 
130 
131 struct Expr_GetFactorsAttr_c : public ExprLocatorTraits_t
132 {
Expr_GetFactorsAttr_cExpr_GetFactorsAttr_c133 	Expr_GetFactorsAttr_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetFactorsAttr_c134 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
FactorEvalExpr_GetFactorsAttr_c135 	virtual const DWORD * FactorEval ( const CSphMatch & tMatch ) const { return (DWORD *)tMatch.GetAttr ( m_tLocator ); }
136 };
137 
138 
139 struct Expr_GetConst_c : public ISphExpr
140 {
141 	float m_fValue;
Expr_GetConst_cExpr_GetConst_c142 	explicit Expr_GetConst_c ( float fValue ) : m_fValue ( fValue ) {}
EvalExpr_GetConst_c143 	virtual float Eval ( const CSphMatch & ) const { return m_fValue; }
IntEvalExpr_GetConst_c144 	virtual int IntEval ( const CSphMatch & ) const { return (int)m_fValue; }
Int64EvalExpr_GetConst_c145 	virtual int64_t Int64Eval ( const CSphMatch & ) const { return (int64_t)m_fValue; }
IsConstExpr_GetConst_c146 	virtual bool IsConst () const { return true; }
147 };
148 
149 
150 struct Expr_GetIntConst_c : public ISphExpr
151 {
152 	int m_iValue;
Expr_GetIntConst_cExpr_GetIntConst_c153 	explicit Expr_GetIntConst_c ( int iValue ) : m_iValue ( iValue ) {}
EvalExpr_GetIntConst_c154 	virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
IntEvalExpr_GetIntConst_c155 	virtual int IntEval ( const CSphMatch & ) const { return m_iValue; }
Int64EvalExpr_GetIntConst_c156 	virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
IsConstExpr_GetIntConst_c157 	virtual bool IsConst () const { return true; }
158 };
159 
160 
161 struct Expr_GetInt64Const_c : public ISphExpr
162 {
163 	int64_t m_iValue;
Expr_GetInt64Const_cExpr_GetInt64Const_c164 	explicit Expr_GetInt64Const_c ( int64_t iValue ) : m_iValue ( iValue ) {}
EvalExpr_GetInt64Const_c165 	virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
IntEvalExpr_GetInt64Const_c166 	virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return (int)m_iValue; }
Int64EvalExpr_GetInt64Const_c167 	virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
IsConstExpr_GetInt64Const_c168 	virtual bool IsConst () const { return true; }
169 };
170 
171 
172 struct Expr_GetStrConst_c : public ISphStringExpr
173 {
174 	CSphString m_sVal;
175 	int m_iLen;
176 
Expr_GetStrConst_cExpr_GetStrConst_c177 	explicit Expr_GetStrConst_c ( const char * sVal, int iLen, bool bUnescape )
178 	{
179 		if ( iLen>0 )
180 		{
181 			if ( bUnescape )
182 				SqlUnescape ( m_sVal, sVal, iLen );
183 			else
184 				m_sVal.SetBinary ( sVal, iLen );
185 		}
186 		m_iLen = m_sVal.Length();
187 	}
188 
StringEvalExpr_GetStrConst_c189 	virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
190 	{
191 		*ppStr = (const BYTE*) m_sVal.cstr();
192 		return m_iLen;
193 	}
194 
EvalExpr_GetStrConst_c195 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
IntEvalExpr_GetStrConst_c196 	virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
Int64EvalExpr_GetStrConst_c197 	virtual int64_t Int64Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
IsConstExpr_GetStrConst_c198 	virtual bool IsConst () const { return true; }
199 };
200 
201 
202 struct Expr_GetZonespanlist_c : public ISphStringExpr
203 {
204 	const CSphVector<int> * m_pData;
205 	mutable CSphStringBuilder m_sBuilder;
206 
Expr_GetZonespanlist_cExpr_GetZonespanlist_c207 	explicit Expr_GetZonespanlist_c ()
208 		: m_pData ( NULL )
209 	{}
210 
StringEvalExpr_GetZonespanlist_c211 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
212 	{
213 		assert ( ppStr );
214 		if ( !m_pData || !m_pData->GetLength() )
215 		{
216 			*ppStr = NULL;
217 			return 0;
218 		}
219 		m_sBuilder.Clear();
220 		const CSphVector<int> & dSpans = *m_pData;
221 		int iStart = tMatch.m_iTag + 1; // spans[tag] contains the length, so the 1st data index is tag+1
222 		int iEnd = iStart + dSpans [ tMatch.m_iTag ]; // [start,end) now covers all data indexes
223 		for ( int i=iStart; i<iEnd; i+=2 )
224 			m_sBuilder.Appendf ( " %d:%d", 1+dSpans[i], 1+dSpans[i+1] ); // convert our 0-based span numbers to human 1-based ones
225 		*ppStr = (const BYTE *) CSphString ( m_sBuilder.cstr() ).Leak();
226 		return m_sBuilder.Length();
227 	}
228 
CommandExpr_GetZonespanlist_c229 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
230 	{
231 		if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
232 			static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_ZONESPANS, (void**)&m_pData );
233 	}
234 
IsStringPtrExpr_GetZonespanlist_c235 	virtual bool IsStringPtr() const
236 	{
237 		return true;
238 	}
239 };
240 
241 
242 struct Expr_GetRankFactors_c : public ISphStringExpr
243 {
244 	/// hash type MUST BE IN SYNC with RankerState_Export_fn in sphinxsearch.cpp
245 	CSphOrderedHash < CSphString, SphDocID_t, IdentityHash_fn, 256 > * m_pFactors;
246 
Expr_GetRankFactors_cExpr_GetRankFactors_c247 	explicit Expr_GetRankFactors_c ()
248 		: m_pFactors ( NULL )
249 	{}
250 
StringEvalExpr_GetRankFactors_c251 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
252 	{
253 		assert ( ppStr );
254 		if ( !m_pFactors )
255 		{
256 			*ppStr = NULL;
257 			return 0;
258 		}
259 
260 		CSphString * sVal = (*m_pFactors) ( tMatch.m_uDocID );
261 		if ( !sVal )
262 		{
263 			*ppStr = NULL;
264 			return 0;
265 		}
266 		int iLen = sVal->Length();
267 		*ppStr = (const BYTE*)sVal->Leak();
268 		m_pFactors->Delete ( tMatch.m_uDocID );
269 		return iLen;
270 	}
271 
CommandExpr_GetRankFactors_c272 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
273 	{
274 		if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
275 			static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_RANKFACTORS, (void**)&m_pFactors );
276 	}
277 
IsStringPtrExpr_GetRankFactors_c278 	virtual bool IsStringPtr() const
279 	{
280 		return true;
281 	}
282 };
283 
284 
285 struct Expr_GetPackedFactors_c : public ISphStringExpr
286 {
287 	SphFactorHash_t * m_pHash;
288 
Expr_GetPackedFactors_cExpr_GetPackedFactors_c289 	explicit Expr_GetPackedFactors_c ()
290 		: m_pHash ( NULL )
291 	{}
292 
FactorEvalExpr_GetPackedFactors_c293 	virtual const DWORD * FactorEval ( const CSphMatch & tMatch ) const
294 	{
295 		if ( !m_pHash || !m_pHash->GetLength() )
296 			return NULL;
297 
298 		SphFactorHashEntry_t * pEntry = (*m_pHash)[ (int)( tMatch.m_uDocID % m_pHash->GetLength() ) ];
299 		assert ( pEntry );
300 
301 		while ( pEntry && pEntry->m_iId!=tMatch.m_uDocID )
302 			pEntry = pEntry->m_pNext;
303 
304 		if ( !pEntry )
305 			return NULL;
306 
307 		DWORD uDataLen = (BYTE *)pEntry - (BYTE *)pEntry->m_pData;
308 
309 		BYTE * pData = new BYTE[uDataLen];
310 		memcpy ( pData, pEntry->m_pData, uDataLen );
311 
312 		return (DWORD *)pData;
313 	}
314 
CommandExpr_GetPackedFactors_c315 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
316 	{
317 		if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
318 			static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_PACKEDFACTORS, (void**)&m_pHash );
319 	}
320 
IsStringPtrExpr_GetPackedFactors_c321 	virtual bool IsStringPtr() const
322 	{
323 		return true;
324 	}
325 };
326 
327 
328 struct Expr_BM25F_c : public ISphExpr
329 {
330 	SphExtraDataRankerState_t	m_tRankerState;
331 	float						m_fK1;
332 	float						m_fB;
333 	float						m_fWeightedAvgDocLen;
334 	CSphVector<int>				m_dWeights;		///< per field weights
335 	SphFactorHash_t *			m_pHash;
336 	CSphVector<CSphNamedVariant>	m_dFieldWeights;
337 
Expr_BM25F_cExpr_BM25F_c338 	Expr_BM25F_c ( float k1, float b, CSphVector<CSphNamedVariant> * pFieldWeights )
339 		: m_pHash ( NULL )
340 	{
341 		// bind k1, b
342 		m_fK1 = k1;
343 		m_fB = b;
344 		if ( pFieldWeights )
345 			m_dFieldWeights.SwapData ( *pFieldWeights );
346 	}
347 
EvalExpr_BM25F_c348 	float Eval ( const CSphMatch & tMatch ) const
349 	{
350 		if ( !m_pHash || !m_pHash->GetLength() )
351 			return 0.0f;
352 
353 		SphFactorHashEntry_t * pEntry = (*m_pHash)[ (int)( tMatch.m_uDocID % m_pHash->GetLength() ) ];
354 		assert ( pEntry );
355 
356 		while ( pEntry && pEntry->m_iId!=tMatch.m_uDocID )
357 			pEntry = pEntry->m_pNext;
358 
359 		if ( !pEntry )
360 			return 0.0f;
361 
362 		SPH_UDF_FACTORS tUnpacked;
363 		sphinx_factors_init ( &tUnpacked );
364 #ifndef NDEBUG
365 		Verify ( sphinx_factors_unpack ( (const unsigned int*)pEntry->m_pData, &tUnpacked )==0 );
366 #else
367 		sphinx_factors_unpack ( (const unsigned int*)pEntry->m_pData, &tUnpacked ); // fix MSVC Release warning
368 #endif
369 
370 		// compute document length
371 		// OPTIMIZE? could precompute and store total dl in attrs, but at a storage cost
372 		// OPTIMIZE? could at least share between multiple BM25F instances, if there are many
373 		float dl = 0;
374 		CSphAttrLocator tLoc = m_tRankerState.m_tFieldLensLoc;
375 		if ( tLoc.m_iBitOffset>=0 )
376 		{
377 			for ( int i=0; i<m_tRankerState.m_iFields; i++ )
378 			{
379 				dl += tMatch.GetAttr ( tLoc ) * m_dWeights[i];
380 				tLoc.m_iBitOffset += 32;
381 			}
382 		}
383 
384 		// compute (the current instance of) BM25F
385 		float fRes = 0.0f;
386 		for ( int iWord=0; iWord<m_tRankerState.m_iMaxQpos; iWord++ )
387 		{
388 			if ( !tUnpacked.term[iWord].keyword_mask )
389 				continue;
390 
391 			// compute weighted TF
392 			float tf = 0.0f;
393 			for ( int i=0; i<m_tRankerState.m_iFields; i++ )
394 			{
395 				tf += tUnpacked.field_tf[ iWord + 1 + i * ( 1 + m_tRankerState.m_iMaxQpos ) ] * m_dWeights[i];
396 			}
397 			float idf = tUnpacked.term[iWord].idf; // FIXME? zeroed out for dupes!
398 			fRes += tf / ( tf + m_fK1 * ( 1.0f - m_fB + m_fB * dl / m_fWeightedAvgDocLen ) ) * idf;
399 		}
400 
401 		sphinx_factors_deinit ( &tUnpacked );
402 
403 		return fRes + 0.5f; // map to [0..1] range
404 	}
405 
CommandExpr_BM25F_c406 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
407 	{
408 		if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
409 			return;
410 
411 		bool bGotHash = static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_PACKEDFACTORS, (void**)&m_pHash );
412 		if ( !bGotHash )
413 			return;
414 
415 		bool bGotState = static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_RANKER_STATE, (void**)&m_tRankerState );
416 		if ( !bGotState )
417 			return;
418 
419 		// bind weights
420 		m_dWeights.Resize ( m_tRankerState.m_iFields );
421 		m_dWeights.Fill ( 1 );
422 		if ( m_dFieldWeights.GetLength() )
423 		{
424 			ARRAY_FOREACH ( i, m_dFieldWeights )
425 			{
426 				// FIXME? report errors if field was not found?
427 				CSphString & sField = m_dFieldWeights[i].m_sKey;
428 				int iField = m_tRankerState.m_pSchema->GetFieldIndex ( sField.cstr() );
429 				if ( iField>=0 )
430 					m_dWeights[iField] = m_dFieldWeights[i].m_iValue;
431 			}
432 		}
433 
434 		// compute weighted avgdl
435 		m_fWeightedAvgDocLen = 1.0f;
436 		if ( m_tRankerState.m_pFieldLens )
437 		{
438 			m_fWeightedAvgDocLen = 0.0f;
439 			ARRAY_FOREACH ( i, m_dWeights )
440 				m_fWeightedAvgDocLen += m_tRankerState.m_pFieldLens[i] * m_dWeights[i];
441 		}
442 		m_fWeightedAvgDocLen /= m_tRankerState.m_iTotalDocuments;
443 	}
444 };
445 
446 
447 struct Expr_GetId_c : public ISphExpr
448 {
EvalExpr_GetId_c449 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_uDocID; }
IntEvalExpr_GetId_c450 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_uDocID; }
Int64EvalExpr_GetId_c451 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_uDocID; }
452 };
453 
454 
455 struct Expr_GetWeight_c : public ISphExpr
456 {
EvalExpr_GetWeight_c457 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_iWeight; }
IntEvalExpr_GetWeight_c458 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_iWeight; }
Int64EvalExpr_GetWeight_c459 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_iWeight; }
460 };
461 
462 //////////////////////////////////////////////////////////////////////////
463 
464 struct Expr_Arglist_c : public ISphExpr
465 {
466 	CSphVector<ISphExpr *> m_dArgs;
467 
Expr_Arglist_cExpr_Arglist_c468 	Expr_Arglist_c ( ISphExpr * pLeft, ISphExpr * pRight )
469 	{
470 		AddArgs ( pLeft );
471 		AddArgs ( pRight );
472 	}
473 
~Expr_Arglist_cExpr_Arglist_c474 	~Expr_Arglist_c ()
475 	{
476 		ARRAY_FOREACH ( i, m_dArgs )
477 			SafeRelease ( m_dArgs[i] );
478 	}
479 
AddArgsExpr_Arglist_c480 	void AddArgs ( ISphExpr * pExpr )
481 	{
482 		// not an arglist? just add it
483 		if ( !pExpr->IsArglist() )
484 		{
485 			m_dArgs.Add ( pExpr );
486 			return;
487 		}
488 
489 		// arglist? take ownership of its args, and dismiss it
490 		Expr_Arglist_c * pArgs = (Expr_Arglist_c *) pExpr;
491 		ARRAY_FOREACH ( i, pArgs->m_dArgs )
492 		{
493 			m_dArgs.Add ( pArgs->m_dArgs[i] );
494 			pArgs->m_dArgs[i] = NULL;
495 		}
496 		SafeRelease ( pExpr );
497 	}
498 
IsArglistExpr_Arglist_c499 	virtual bool IsArglist () const
500 	{
501 		return true;
502 	}
503 
GetArgExpr_Arglist_c504 	virtual ISphExpr * GetArg ( int i ) const
505 	{
506 		if ( i>=m_dArgs.GetLength() )
507 			return NULL;
508 		return m_dArgs[i];
509 	}
510 
GetNumArgsExpr_Arglist_c511 	virtual int GetNumArgs() const
512 	{
513 		return m_dArgs.GetLength();
514 	}
515 
EvalExpr_Arglist_c516 	virtual float Eval ( const CSphMatch & ) const
517 	{
518 		assert ( 0 && "internal error: Eval() must not be explicitly called on arglist" );
519 		return 0.0f;
520 	}
521 
CommandExpr_Arglist_c522 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
523 	{
524 		ARRAY_FOREACH ( i, m_dArgs )
525 			m_dArgs[i]->Command ( eCmd, pArg );
526 	}
527 };
528 
529 //////////////////////////////////////////////////////////////////////////
530 
531 struct Expr_Unary_c : public ISphExpr
532 {
533 	ISphExpr * m_pFirst;
534 
Expr_Unary_cExpr_Unary_c535 	explicit Expr_Unary_c ( ISphExpr * p ) : m_pFirst(p) {}
~Expr_Unary_cExpr_Unary_c536 	~Expr_Unary_c() { SafeRelease ( m_pFirst ); }
537 
CommandExpr_Unary_c538 	virtual void Command ( ESphExprCommand eCmd, void * pArg ) { m_pFirst->Command ( eCmd, pArg ); }
539 };
540 
541 
542 struct Expr_Crc32_c : public Expr_Unary_c
543 {
Expr_Crc32_cExpr_Crc32_c544 	explicit Expr_Crc32_c ( ISphExpr * pFirst ) : Expr_Unary_c ( pFirst ) {}
EvalExpr_Crc32_c545 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
IntEvalExpr_Crc32_c546 	virtual int IntEval ( const CSphMatch & tMatch ) const
547 	{
548 		const BYTE * pStr;
549 		int iLen = m_pFirst->StringEval ( tMatch, &pStr );
550 		DWORD uCrc = sphCRC32 ( pStr, iLen );
551 		if ( m_pFirst->IsStringPtr() )
552 			SafeDeleteArray ( pStr );
553 		return uCrc;
554 	}
Int64EvalExpr_Crc32_c555 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)(DWORD)IntEval ( tMatch ); }
556 };
557 
558 
Fibonacci(int i)559 static inline int Fibonacci ( int i )
560 {
561 	if ( i<0 )
562 		return 0;
563 	int f0 = 0;
564 	int f1 = 1;
565 	int j = 0;
566 	for ( j=0; j+1<i; j+=2 )
567 	{
568 		f0 += f1; // f_j
569 		f1 += f0; // f_{j+1}
570 	}
571 	return ( i & 1 ) ? f1 : f0;
572 }
573 
574 
575 struct Expr_Fibonacci_c : public Expr_Unary_c
576 {
Expr_Fibonacci_cExpr_Fibonacci_c577 	explicit Expr_Fibonacci_c ( ISphExpr * pFirst ) : Expr_Unary_c ( pFirst ) {}
578 
EvalExpr_Fibonacci_c579 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
IntEvalExpr_Fibonacci_c580 	virtual int IntEval ( const CSphMatch & tMatch ) const { return Fibonacci ( m_pFirst->IntEval ( tMatch ) ); }
Int64EvalExpr_Fibonacci_c581 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
582 };
583 
584 
585 struct Expr_ToString_c : public Expr_Unary_c
586 {
587 protected:
588 	ESphAttr	m_eArg;
589 	mutable CSphStringBuilder m_sBuilder;
590 
591 public:
Expr_ToString_cExpr_ToString_c592 	Expr_ToString_c ( ISphExpr * pArg, ESphAttr eArg )
593 		: Expr_Unary_c ( pArg )
594 		, m_eArg ( eArg )
595 	{}
596 
EvalExpr_ToString_c597 	virtual float Eval ( const CSphMatch & ) const
598 	{
599 		assert ( 0 );
600 		return 0.0f;
601 	}
602 
StringEvalExpr_ToString_c603 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
604 	{
605 		m_sBuilder.Clear();
606 		switch ( m_eArg )
607 		{
608 			case SPH_ATTR_INTEGER:	m_sBuilder.Appendf ( "%u", m_pFirst->IntEval ( tMatch ) ); break;
609 			case SPH_ATTR_BIGINT:	m_sBuilder.Appendf ( INT64_FMT, m_pFirst->Int64Eval ( tMatch ) ); break;
610 			case SPH_ATTR_FLOAT:	m_sBuilder.Appendf ( "%f", m_pFirst->Eval ( tMatch ) ); break;
611 			case SPH_ATTR_UINT32SET:
612 			case SPH_ATTR_INT64SET:
613 				{
614 					const DWORD * pValues = m_pFirst->MvaEval ( tMatch );
615 					if ( !pValues || !*pValues )
616 						break;
617 
618 					DWORD nValues = *pValues++;
619 					assert (!( m_eArg==SPH_ATTR_INT64SET && ( nValues & 1 ) ));
620 
621 					// OPTIMIZE? minibuffer on stack, less allocs, manual formatting vs printf, etc
622 					if ( m_eArg==SPH_ATTR_UINT32SET )
623 					{
624 						while ( nValues-- )
625 						{
626 							if ( m_sBuilder.Length() )
627 								m_sBuilder += ",";
628 							m_sBuilder.Appendf ( "%u", *pValues++ );
629 						}
630 					} else
631 					{
632 						for ( ; nValues; nValues-=2, pValues+=2 )
633 						{
634 							if ( m_sBuilder.Length() )
635 								m_sBuilder += ",";
636 							m_sBuilder.Appendf ( INT64_FMT, MVA_UPSIZE ( pValues ) );
637 						}
638 					}
639 				}
640 				break;
641 			case SPH_ATTR_STRINGPTR:
642 				return m_pFirst->StringEval ( tMatch, ppStr );
643 
644 			default:
645 				assert ( 0 && "unhandled arg type in TO_STRING()" );
646 				break;
647 		}
648 		if ( !m_sBuilder.Length() )
649 		{
650 			*ppStr = NULL;
651 			return 0;
652 		}
653 		*ppStr = (const BYTE *) CSphString ( m_sBuilder.cstr() ).Leak();
654 		return m_sBuilder.Length();
655 	}
656 
IsStringPtrExpr_ToString_c657 	virtual bool IsStringPtr() const
658 	{
659 		return true;
660 	}
661 };
662 
663 //////////////////////////////////////////////////////////////////////////
664 
665 /// generic JSON value evaluation
666 /// can handle arbitrary stacks of jsoncol.key1.arr2[indexexpr3].key4[keynameexpr5]
667 /// m_dArgs holds the expressions that return actual accessors (either keynames or indexes)
668 /// m_dRetTypes holds their respective types
669 struct Expr_JsonField_c : public ExprLocatorTraits_t
670 {
671 protected:
672 	const BYTE *			m_pStrings;
673 	CSphVector<ISphExpr *>	m_dArgs;
674 	CSphVector<ESphAttr>	m_dRetTypes;
675 
676 public:
677 	/// takes over the expressions
Expr_JsonField_cExpr_JsonField_c678 	Expr_JsonField_c ( const CSphAttrLocator & tLocator, int iLocator, CSphVector<ISphExpr*> & dArgs, CSphVector<ESphAttr> & dRetTypes )
679 		: ExprLocatorTraits_t ( tLocator, iLocator )
680 		, m_pStrings ( NULL )
681 	{
682 		assert ( dArgs.GetLength()==dRetTypes.GetLength() );
683 		m_dArgs.SwapData ( dArgs );
684 		m_dRetTypes.SwapData ( dRetTypes );
685 	}
686 
~Expr_JsonField_cExpr_JsonField_c687 	~Expr_JsonField_c ()
688 	{
689 		ARRAY_FOREACH ( i, m_dArgs )
690 			SafeRelease ( m_dArgs[i] );
691 	}
692 
CommandExpr_JsonField_c693 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
694 	{
695 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
696 			m_pStrings = (const BYTE*)pArg;
697 		else if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS && m_iLocator!=-1 )
698 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
699 		ARRAY_FOREACH ( i, m_dArgs )
700 			if ( m_dArgs[i] )
701 				m_dArgs[i]->Command ( eCmd, pArg );
702 	}
703 
EvalExpr_JsonField_c704 	virtual float Eval ( const CSphMatch & ) const
705 	{
706 		assert ( 0 && "one just does not simply evaluate a JSON as float" );
707 		return 0;
708 	}
709 
DoEvalExpr_JsonField_c710 	virtual int64_t DoEval ( ESphJsonType eJson, const BYTE * pVal, const CSphMatch & tMatch ) const
711 	{
712 		int iLen;
713 		const BYTE * pStr;
714 
715 		ARRAY_FOREACH ( i, m_dRetTypes )
716 		{
717 			switch ( m_dRetTypes[i] )
718 			{
719 			case SPH_ATTR_INTEGER:	eJson = sphJsonFindByIndex ( eJson, &pVal, m_dArgs[i]->IntEval ( tMatch ) ); break;
720 			case SPH_ATTR_BIGINT:	eJson = sphJsonFindByIndex ( eJson, &pVal, (int)m_dArgs[i]->Int64Eval ( tMatch ) ); break;
721 			case SPH_ATTR_FLOAT:	eJson = sphJsonFindByIndex ( eJson, &pVal, (int)m_dArgs[i]->Eval ( tMatch ) ); break;
722 			case SPH_ATTR_STRING:
723 				// is this assert will fail someday it's ok
724 				// just remove it and add this code instead to handle possible memory leak
725 				// if ( m_dArgv[i]->IsStringPtr() ) SafeDeleteArray ( pStr );
726 				assert ( !m_dArgs[i]->IsStringPtr() );
727 				iLen = m_dArgs[i]->StringEval ( tMatch, &pStr );
728 				eJson = sphJsonFindByKey ( eJson, &pVal, (const void *)pStr, iLen, sphJsonKeyMask ( (const char *)pStr, iLen ) );
729 				break;
730 			case SPH_ATTR_JSON_FIELD: // handle cases like "json.a [ json.b ]"
731 				{
732 					uint64_t uValue = m_dArgs[i]->Int64Eval ( tMatch );
733 					const BYTE * p = m_pStrings + ( uValue & 0xffffffff );
734 					ESphJsonType eType = (ESphJsonType)( uValue >> 32 );
735 
736 					switch ( eType )
737 					{
738 					case JSON_INT32:	eJson = sphJsonFindByIndex ( eJson, &pVal, sphJsonLoadInt ( &p ) ); break;
739 					case JSON_INT64:	eJson = sphJsonFindByIndex ( eJson, &pVal, (int)sphJsonLoadBigint ( &p ) ); break;
740 					case JSON_DOUBLE:	eJson = sphJsonFindByIndex ( eJson, &pVal, (int)sphQW2D ( sphJsonLoadBigint ( &p ) ) ); break;
741 					case JSON_STRING:
742 						iLen = sphJsonUnpackInt ( &p );
743 						eJson = sphJsonFindByKey ( eJson, &pVal, (const void *)p, iLen, sphJsonKeyMask ( (const char *)p, iLen ) );
744 						break;
745 					default:
746 						return 0;
747 					}
748 					break;
749 				}
750 			default:
751 				return 0;
752 			}
753 
754 			if ( eJson==JSON_EOF )
755 				return 0;
756 		}
757 
758 		// keep actual attribute type and offset to data packed
759 		int64_t iPacked = ( ( (int64_t)( pVal-m_pStrings ) ) | ( ( (int64_t)eJson )<<32 ) );
760 		return iPacked;
761 	}
762 
Int64EvalExpr_JsonField_c763 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
764 	{
765 		if ( !m_pStrings )
766 			return 0;
767 
768 		uint64_t uOffset = tMatch.GetAttr ( m_tLocator );
769 		if ( !uOffset )
770 			return 0;
771 
772 		if ( m_tLocator.m_bDynamic )
773 		{
774 			// extends precalculated (aliased) field
775 			const BYTE * pVal = m_pStrings + ( uOffset & 0xffffffff );
776 			ESphJsonType eJson = (ESphJsonType)( uOffset >> 32 );
777 			return DoEval ( eJson, pVal, tMatch );
778 		}
779 
780 		const BYTE * pVal = NULL;
781 		sphUnpackStr ( m_pStrings + uOffset, &pVal );
782 		if ( !pVal )
783 			return 0;
784 
785 		ESphJsonType eJson = sphJsonFindFirst ( &pVal );
786 		return DoEval ( eJson, pVal, tMatch );
787 	}
788 };
789 
790 
791 /// fastpath (instead of generic JsonField_c) for jsoncol.key access by a static key name
792 struct Expr_JsonFastKey_c : public ExprLocatorTraits_t
793 {
794 protected:
795 	const BYTE *	m_pStrings;
796 	CSphString		m_sKey;
797 	int				m_iKeyLen;
798 	DWORD			m_uKeyBloom;
799 
800 public:
801 	/// takes over the expressions
Expr_JsonFastKey_cExpr_JsonFastKey_c802 	Expr_JsonFastKey_c ( const CSphAttrLocator & tLocator, int iLocator, ISphExpr * pArg )
803 		: ExprLocatorTraits_t ( tLocator, iLocator )
804 		, m_pStrings ( NULL )
805 	{
806 		assert ( ( tLocator.m_iBitOffset % ROWITEM_BITS )==0 );
807 		assert ( tLocator.m_iBitCount==ROWITEM_BITS );
808 
809 		Expr_GetStrConst_c * pKey = (Expr_GetStrConst_c*)pArg;
810 		m_sKey = pKey->m_sVal;
811 		m_iKeyLen = pKey->m_iLen;
812 		m_uKeyBloom = sphJsonKeyMask ( m_sKey.cstr(), m_iKeyLen );
813 		SafeRelease ( pArg );
814 	}
815 
CommandExpr_JsonFastKey_c816 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
817 	{
818 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
819 			m_pStrings = (const BYTE*)pArg;
820 	}
821 
EvalExpr_JsonFastKey_c822 	virtual float Eval ( const CSphMatch & ) const
823 	{
824 		assert ( 0 && "one just does not simply evaluate a JSON as float" );
825 		return 0;
826 	}
827 
Int64EvalExpr_JsonFastKey_c828 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
829 	{
830 		// get pointer to JSON blob data
831 		assert ( m_pStrings );
832 		DWORD uOffset = m_tLocator.m_bDynamic
833 			? tMatch.m_pDynamic [ m_tLocator.m_iBitOffset >> ROWITEM_SHIFT ]
834 			: tMatch.m_pStatic [ m_tLocator.m_iBitOffset >> ROWITEM_SHIFT ];
835 		if ( !uOffset )
836 			return 0;
837 		const BYTE * pJson;
838 		sphUnpackStr ( m_pStrings + uOffset, &pJson );
839 
840 		// all root objects start with a Bloom mask; quickly check it
841 		if ( ( sphGetDword(pJson) & m_uKeyBloom )!=m_uKeyBloom )
842 			return 0;
843 
844 		// OPTIMIZE? FindByKey does an extra (redundant) bloom check inside
845 		ESphJsonType eJson = sphJsonFindByKey ( JSON_ROOT, &pJson, m_sKey.cstr(), m_iKeyLen, m_uKeyBloom );
846 		if ( eJson==JSON_EOF )
847 			return 0;
848 
849 		// keep actual attribute type and offset to data packed
850 		int64_t iPacked = ( ( (int64_t)( pJson-m_pStrings ) ) | ( ( (int64_t)eJson )<<32 ) );
851 		return iPacked;
852 	}
853 };
854 
855 
856 struct Expr_JsonFieldConv_c : public ISphExpr
857 {
858 protected:
859 	const BYTE *	m_pStrings;
860 	ISphExpr *		m_pArg;
861 
862 public:
Expr_JsonFieldConv_cExpr_JsonFieldConv_c863 	explicit Expr_JsonFieldConv_c ( ISphExpr * pArg )
864 		: m_pStrings ( NULL )
865 		, m_pArg ( pArg )
866 	{}
867 
~Expr_JsonFieldConv_cExpr_JsonFieldConv_c868 	~Expr_JsonFieldConv_c()
869 	{
870 		SafeRelease ( m_pArg );
871 	}
872 
CommandExpr_JsonFieldConv_c873 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
874 	{
875 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
876 			m_pStrings = (const BYTE*)pArg;
877 		if ( m_pArg )
878 			m_pArg->Command ( eCmd, pArg );
879 	}
880 
881 protected:
GetKeyExpr_JsonFieldConv_c882 	virtual ESphJsonType GetKey ( const BYTE ** ppKey, const CSphMatch & tMatch ) const
883 	{
884 		assert ( ppKey );
885 		if ( !m_pStrings )
886 			return JSON_EOF;
887 		uint64_t uValue = m_pArg->Int64Eval ( tMatch );
888 		*ppKey = m_pStrings + ( uValue & 0xffffffff );
889 		return (ESphJsonType)( uValue >> 32 );
890 	}
891 
892 	// generic evaluate
893 	template < typename T >
DoEvalExpr_JsonFieldConv_c894 	T DoEval ( const CSphMatch & tMatch ) const
895 	{
896 		const BYTE * pVal = NULL;
897 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
898 		switch ( eJson )
899 		{
900 		case JSON_INT32:	return (T)sphJsonLoadInt ( &pVal );
901 		case JSON_INT64:	return (T)sphJsonLoadBigint ( &pVal );
902 		case JSON_DOUBLE:	return (T)sphQW2D ( sphJsonLoadBigint ( &pVal ) );
903 		case JSON_TRUE:		return 1;
904 		case JSON_STRING:
905 			{
906 				if ( !g_bJsonAutoconvNumbers )
907 					return 0;
908 				int iLen = sphJsonUnpackInt ( &pVal );
909 				int64_t iVal;
910 				double fVal;
911 				ESphJsonType eType;
912 				if ( sphJsonStringToNumber ( (const char*)pVal, iLen, eType, iVal, fVal ) )
913 					return eType==JSON_DOUBLE ? (T)fVal : (T)iVal;
914 			}
915 		default:			return 0;
916 		}
917 	}
918 
919 public:
StringEvalExpr_JsonFieldConv_c920 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
921 	{
922 		const BYTE * pVal = NULL;
923 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
924 		return ( eJson==JSON_STRING ) ? sphUnpackStr ( pVal, ppStr ) : 0;
925 	}
EvalExpr_JsonFieldConv_c926 	virtual float Eval ( const CSphMatch & tMatch ) const { return DoEval<float> ( tMatch ); }
IntEvalExpr_JsonFieldConv_c927 	virtual int IntEval ( const CSphMatch & tMatch ) const { return DoEval<int> ( tMatch ); }
Int64EvalExpr_JsonFieldConv_c928 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return DoEval<int64_t> ( tMatch ); }
929 };
930 
931 
932 struct Expr_JsonFieldAggr_c : public Expr_JsonFieldConv_c
933 {
934 protected:
935 	ESphAggrFunc m_eFunc;
936 
937 public:
Expr_JsonFieldAggr_cExpr_JsonFieldAggr_c938 	Expr_JsonFieldAggr_c ( ISphExpr * pArg, ESphAggrFunc eFunc )
939 		: Expr_JsonFieldConv_c ( pArg )
940 		, m_eFunc ( eFunc )
941 	{}
942 
IntEvalExpr_JsonFieldAggr_c943 	virtual int IntEval ( const CSphMatch & tMatch ) const
944 	{
945 		const BYTE * pVal = NULL;
946 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
947 		switch ( eJson )
948 		{
949 		case JSON_INT32_VECTOR:
950 			{
951 				int iVals = sphJsonUnpackInt ( &pVal );
952 				if ( iVals==0 )
953 					return 0;
954 
955 				const int * p = (const int*) pVal;
956 				int iRes = *p; // first value
957 
958 				switch ( m_eFunc )
959 				{
960 				case SPH_AGGR_MIN: while ( --iVals ) if ( *++p<iRes ) iRes = *p; break;
961 				case SPH_AGGR_MAX: while ( --iVals ) if ( *++p>iRes ) iRes = *p; break;
962 				default:
963 					return 0;
964 				}
965 				return iRes;
966 			}
967 		default:
968 			return 0;
969 		}
970 	}
971 
StringEvalExpr_JsonFieldAggr_c972 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
973 	{
974 		CSphString sBuf;
975 		*ppStr = NULL;
976 		const BYTE * pVal = NULL;
977 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
978 		switch ( eJson )
979 		{
980 		case JSON_INT32_VECTOR:
981 			sBuf.SetSprintf ( "%u", IntEval ( tMatch ) );
982 			*ppStr = (const BYTE *) sBuf.Leak();
983 			return strlen ( (const char*) *ppStr );
984 
985 		case JSON_STRING_VECTOR:
986 			{
987 				sphJsonUnpackInt ( &pVal ); // skip node length
988 
989 				int iVals = sphJsonUnpackInt ( &pVal );
990 				if ( iVals==0 )
991 					return 0;
992 
993 				switch ( m_eFunc )
994 				{
995 				case SPH_AGGR_MIN:
996 				case SPH_AGGR_MAX:
997 					{
998 						// first value
999 						int iLen = sphJsonUnpackInt ( &pVal );
1000 						const char *pRes = (const char*) pVal;
1001 						int iResLen = iLen;
1002 
1003 						while ( --iVals )
1004 						{
1005 							pVal += iLen;
1006 							iLen = sphJsonUnpackInt ( &pVal );
1007 
1008 							// binary string comparison
1009 							int iCmp = memcmp ( pRes, (const char*)pVal, iLen<iResLen ? iLen : iResLen );
1010 							if ( iCmp==0 && iLen!=iResLen )
1011 								iCmp = iResLen-iLen;
1012 
1013 							if ( ( m_eFunc==SPH_AGGR_MIN && iCmp>0 ) || ( m_eFunc==SPH_AGGR_MAX && iCmp<0 ) )
1014 							{
1015 								pRes = (const char*)pVal;
1016 								iResLen = iLen;
1017 							}
1018 						}
1019 
1020 						sBuf.SetBinary ( pRes, iResLen );
1021 						*ppStr = (const BYTE *) sBuf.Leak();
1022 						return iResLen;
1023 					}
1024 				default:
1025 					return 0;
1026 				}
1027 			}
1028 		default:
1029 			return 0;
1030 		}
1031 	}
1032 
EvalExpr_JsonFieldAggr_c1033 	virtual float	Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_JsonFieldAggr_c1034 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
IsStringPtrExpr_JsonFieldAggr_c1035 	virtual bool IsStringPtr() const { return true; }
1036 };
1037 
1038 
1039 struct Expr_JsonFieldLength_c : public Expr_JsonFieldConv_c
1040 {
1041 public:
Expr_JsonFieldLength_cExpr_JsonFieldLength_c1042 	explicit Expr_JsonFieldLength_c ( ISphExpr * pArg )
1043 		: Expr_JsonFieldConv_c ( pArg )
1044 	{}
1045 
IntEvalExpr_JsonFieldLength_c1046 	virtual int IntEval ( const CSphMatch & tMatch ) const
1047 	{
1048 		const BYTE * pVal = NULL;
1049 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
1050 		return sphJsonFieldLength ( eJson, pVal );
1051 	}
1052 
EvalExpr_JsonFieldLength_c1053 	virtual float	Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_JsonFieldLength_c1054 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1055 };
1056 
1057 
1058 struct Expr_Time_c : public ISphExpr
1059 {
1060 	bool m_bUTC;
1061 	bool m_bDate;
1062 
Expr_Time_cExpr_Time_c1063 	explicit Expr_Time_c ( bool bUTC, bool bDate )
1064 		: m_bUTC ( bUTC )
1065 		, m_bDate ( bDate )
1066 	{}
1067 
IntEvalExpr_Time_c1068 	virtual int IntEval ( const CSphMatch & ) const
1069 	{
1070 		struct tm s; // can't get non-UTC timestamp without mktime
1071 		time_t t = time ( NULL );
1072 		if ( m_bUTC )
1073 			gmtime_r ( &t, &s );
1074 		else
1075 			localtime_r ( &t, &s );
1076 		return (int) mktime ( &s );
1077 	}
1078 
StringEvalExpr_Time_c1079 	virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
1080 	{
1081 		CSphString sVal;
1082 		struct tm s;
1083 		time_t t = time ( NULL );
1084 		if ( m_bUTC )
1085 			gmtime_r ( &t, &s );
1086 		else
1087 			localtime_r ( &t, &s );
1088 		if ( m_bDate )
1089 			sVal.SetSprintf ( "%04d-%02d-%02d %02d:%02d:%02d", s.tm_year+1900, s.tm_mon+1, s.tm_mday, s.tm_hour, s.tm_min, s.tm_sec );
1090 		else
1091 			sVal.SetSprintf ( "%02d:%02d:%02d", s.tm_hour, s.tm_min, s.tm_sec );
1092 		*ppStr = (const BYTE*) sVal.Leak();
1093 		return sVal.Length();
1094 	}
1095 
EvalExpr_Time_c1096 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_Time_c1097 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
IsStringPtrExpr_Time_c1098 	virtual bool IsStringPtr () const { return true; }
1099 };
1100 
1101 
1102 struct Expr_TimeDiff_c : public ISphExpr
1103 {
1104 	ISphExpr * m_pFirst;
1105 	ISphExpr * m_pSecond;
1106 
Expr_TimeDiff_cExpr_TimeDiff_c1107 	Expr_TimeDiff_c ( ISphExpr * pFirst, ISphExpr * pSecond )
1108 		: m_pFirst ( pFirst )
1109 		, m_pSecond ( pSecond )
1110 	{}
1111 
~Expr_TimeDiff_cExpr_TimeDiff_c1112 	~Expr_TimeDiff_c()
1113 	{
1114 		SafeRelease ( m_pFirst );
1115 		SafeRelease ( m_pSecond );
1116 	}
1117 
IntEvalExpr_TimeDiff_c1118 	virtual int IntEval ( const CSphMatch & tMatch ) const
1119 	{
1120 		assert ( m_pFirst && m_pSecond );
1121 		return m_pFirst->IntEval ( tMatch )-m_pSecond->IntEval ( tMatch );
1122 	}
1123 
StringEvalExpr_TimeDiff_c1124 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
1125 	{
1126 		int iVal = IntEval ( tMatch );
1127 		CSphString sVal;
1128 		int t = iVal<0 ? -iVal : iVal;
1129 		sVal.SetSprintf ( "%s%02d:%02d:%02d", iVal<0 ? "-" : "", t/60/60, (t/60)%60, t%60 );
1130 		*ppStr = (const BYTE*) sVal.Leak();
1131 		return sVal.Length();
1132 	}
1133 
EvalExpr_TimeDiff_c1134 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_TimeDiff_c1135 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
IsStringPtrExpr_TimeDiff_c1136 	virtual bool IsStringPtr () const { return true; }
1137 };
1138 
1139 
1140 struct Expr_Iterator_c : Expr_JsonField_c
1141 {
1142 	SphAttr_t * m_pData;
1143 
Expr_Iterator_cExpr_Iterator_c1144 	Expr_Iterator_c ( const CSphAttrLocator & tLocator, int iLocator, CSphVector<ISphExpr*> & dArgs, CSphVector<ESphAttr> & dRetTypes, SphAttr_t * pData )
1145 		: Expr_JsonField_c ( tLocator, iLocator, dArgs, dRetTypes )
1146 		, m_pData ( pData )
1147 	{}
1148 
Int64EvalExpr_Iterator_c1149 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
1150 	{
1151 		uint64_t uValue = m_pData ? *m_pData : 0;
1152 		const BYTE * p = m_pStrings + ( uValue & 0xffffffff );
1153 		ESphJsonType eType = (ESphJsonType)( uValue >> 32 );
1154 		return DoEval ( eType, p, tMatch );
1155 	}
1156 };
1157 
1158 
1159 struct Expr_ForIn_c : public Expr_JsonFieldConv_c
1160 {
1161 	ISphExpr * m_pExpr;
1162 	bool m_bStrict;
1163 	bool m_bIndex;
1164 	mutable uint64_t m_uData;
1165 
Expr_ForIn_cExpr_ForIn_c1166 	Expr_ForIn_c ( ISphExpr * pArg, bool bStrict, bool bIndex )
1167 		: Expr_JsonFieldConv_c ( pArg )
1168 		, m_pExpr ( NULL )
1169 		, m_bStrict ( bStrict )
1170 		, m_bIndex ( bIndex )
1171 	{}
1172 
~Expr_ForIn_cExpr_ForIn_c1173 	~Expr_ForIn_c ()
1174 	{
1175 		SafeRelease ( m_pExpr );
1176 	}
1177 
GetRefExpr_ForIn_c1178 	SphAttr_t * GetRef ()
1179 	{
1180 		return (SphAttr_t*)&m_uData;
1181 	}
1182 
SetExprExpr_ForIn_c1183 	void SetExpr ( ISphExpr * pExpr )
1184 	{
1185 		m_pExpr = pExpr;
1186 	}
1187 
CommandExpr_ForIn_c1188 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
1189 	{
1190 		Expr_JsonFieldConv_c::Command ( eCmd, pArg );
1191 		if ( m_pExpr )
1192 			m_pExpr->Command ( eCmd, pArg );
1193 	}
1194 
ExprEvalExpr_ForIn_c1195 	bool ExprEval ( int * pResult, const CSphMatch & tMatch, int iIndex, ESphJsonType eType, const BYTE * pVal ) const
1196 	{
1197 		m_uData = ( ( (int64_t)( pVal-m_pStrings ) ) | ( ( (int64_t)eType )<<32 ) );
1198 		bool bMatch = m_pExpr->Eval ( tMatch )!=0;
1199 		*pResult = bMatch ? ( m_bIndex ? iIndex : 1 ) : ( m_bIndex ? -1 : 0 );
1200 		return m_bStrict ? bMatch : !bMatch;
1201 	}
1202 
IntEvalExpr_ForIn_c1203 	virtual int IntEval ( const CSphMatch & tMatch ) const
1204 	{
1205 		int iResult = m_bIndex ? -1 : 0;
1206 
1207 		if ( !m_pExpr )
1208 			return iResult;
1209 
1210 		const BYTE * p = NULL;
1211 		ESphJsonType eJson = GetKey ( &p, tMatch );
1212 
1213 		switch ( eJson )
1214 		{
1215 		case JSON_INT32_VECTOR:
1216 		case JSON_INT64_VECTOR:
1217 		case JSON_DOUBLE_VECTOR:
1218 			{
1219 				int iSize = eJson==JSON_INT32_VECTOR ? 4 : 8;
1220 				ESphJsonType eType = eJson==JSON_INT32_VECTOR ? JSON_INT32
1221 					: eJson==JSON_INT64_VECTOR ? JSON_INT64
1222 					: JSON_DOUBLE;
1223 				int iLen = sphJsonUnpackInt ( &p );
1224 				for ( int i=0; i<iLen; i++, p+=iSize )
1225 					if ( !ExprEval ( &iResult, tMatch, i, eType, p ) )
1226 						break;
1227 				break;
1228 			}
1229 		case JSON_STRING_VECTOR:
1230 			{
1231 				sphJsonUnpackInt ( &p );
1232 				int iLen = sphJsonUnpackInt ( &p );
1233 				for ( int i=0;i<iLen;i++ )
1234 				{
1235 					if ( !ExprEval ( &iResult, tMatch, i, JSON_STRING, p ) )
1236 						break;
1237 					sphJsonSkipNode ( JSON_STRING, &p );
1238 				}
1239 				break;
1240 			}
1241 		case JSON_MIXED_VECTOR:
1242 			{
1243 				sphJsonUnpackInt ( &p );
1244 				int iLen = sphJsonUnpackInt ( &p );
1245 				for ( int i=0; i<iLen; i++ )
1246 				{
1247 					ESphJsonType eType = (ESphJsonType)*p++;
1248 					if ( !ExprEval ( &iResult, tMatch, i, eType, p ) )
1249 						break;
1250 					sphJsonSkipNode ( eType, &p );
1251 				}
1252 				break;
1253 			}
1254 		default:
1255 			break;
1256 		}
1257 
1258 		return iResult;
1259 	}
1260 
EvalExpr_ForIn_c1261 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_ForIn_c1262 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1263 };
1264 
1265 
GetCollationFn(ESphCollation eCollation)1266 SphStringCmp_fn GetCollationFn ( ESphCollation eCollation )
1267 {
1268 	switch ( eCollation )
1269 	{
1270 		case SPH_COLLATION_LIBC_CS:			return sphCollateLibcCS;
1271 		case SPH_COLLATION_UTF8_GENERAL_CI:	return sphCollateUtf8GeneralCI;
1272 		case SPH_COLLATION_BINARY:			return sphCollateBinary;
1273 		default:							return sphCollateLibcCI;
1274 	}
1275 }
1276 
1277 
1278 struct Expr_StrEq_c : public ISphExpr
1279 {
1280 	ISphExpr * m_pLeft;
1281 	ISphExpr * m_pRight;
1282 	SphStringCmp_fn m_fnStrCmp;
1283 
Expr_StrEq_cExpr_StrEq_c1284 	Expr_StrEq_c ( ISphExpr * pLeft, ISphExpr * pRight, ESphCollation eCollation )
1285 		: m_pLeft ( pLeft )
1286 		, m_pRight ( pRight )
1287 	{
1288 		m_fnStrCmp = GetCollationFn ( eCollation );
1289 	}
1290 
~Expr_StrEq_cExpr_StrEq_c1291 	~Expr_StrEq_c ()
1292 	{
1293 		SafeRelease ( m_pLeft );
1294 		SafeRelease ( m_pRight );
1295 	}
1296 
CommandExpr_StrEq_c1297 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
1298 	{
1299 		assert ( m_pLeft && m_pRight );
1300 		m_pLeft->Command ( eCmd, pArg );
1301 		m_pRight->Command ( eCmd, pArg );
1302 	}
1303 
IntEvalExpr_StrEq_c1304 	virtual int IntEval ( const CSphMatch & tMatch ) const
1305 	{
1306 		const BYTE * pLeft;
1307 		const BYTE * pRight;
1308 		int iLeft = m_pLeft->StringEval ( tMatch, &pLeft );
1309 		int iRight = m_pRight->StringEval ( tMatch, &pRight );
1310 
1311 		CSphString sStr1 ( iLeft ? (const char*)pLeft : "", iLeft );
1312 		CSphString sStr2 ( iRight ? (const char*)pRight : "", iRight );
1313 
1314 		bool bEq = m_fnStrCmp ( (const BYTE*)sStr1.cstr(), (const BYTE*)sStr2.cstr(), false )==0;
1315 
1316 		if ( m_pLeft->IsStringPtr() )	SafeDeleteArray ( pLeft );
1317 		if ( m_pRight->IsStringPtr() )	SafeDeleteArray ( pRight );
1318 		return (int)bEq;
1319 	}
1320 
EvalExpr_StrEq_c1321 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_StrEq_c1322 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1323 };
1324 
1325 
1326 struct Expr_JsonFieldIsNull_c : public Expr_JsonFieldConv_c
1327 {
1328 	bool m_bEquals;
1329 
Expr_JsonFieldIsNull_cExpr_JsonFieldIsNull_c1330 	explicit Expr_JsonFieldIsNull_c ( ISphExpr * pArg, bool bEquals )
1331 		: Expr_JsonFieldConv_c ( pArg )
1332 		, m_bEquals ( bEquals )
1333 	{}
1334 
IntEvalExpr_JsonFieldIsNull_c1335 	virtual int IntEval ( const CSphMatch & tMatch ) const
1336 	{
1337 		const BYTE * pVal = NULL;
1338 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
1339 		return m_bEquals ^ ( eJson!=JSON_EOF && eJson!=JSON_NULL );
1340 	}
1341 
EvalExpr_JsonFieldIsNull_c1342 	virtual float	Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_JsonFieldIsNull_c1343 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1344 };
1345 
1346 //////////////////////////////////////////////////////////////////////////
1347 
1348 struct Expr_MinTopWeight : public ISphExpr
1349 {
1350 	int * m_pWeight;
1351 
Expr_MinTopWeightExpr_MinTopWeight1352 	Expr_MinTopWeight() : m_pWeight ( NULL ) {}
1353 
IntEvalExpr_MinTopWeight1354 	virtual int IntEval ( const CSphMatch & ) const			{ return m_pWeight ? *m_pWeight : -INT_MAX; }
EvalExpr_MinTopWeight1355 	virtual float Eval ( const CSphMatch & ) const			{ return m_pWeight ? (float)*m_pWeight : -FLT_MAX; }
Int64EvalExpr_MinTopWeight1356 	virtual int64_t Int64Eval ( const CSphMatch & ) const	{ return m_pWeight ? *m_pWeight : -LLONG_MAX; }
1357 
CommandExpr_MinTopWeight1358 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
1359 	{
1360 		CSphMatch * pWorst;
1361 		if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
1362 			return;
1363 		if ( static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_QUEUE_WORST, (void**)&pWorst ) )
1364 			m_pWeight = &pWorst->m_iWeight;
1365 	}
1366 };
1367 
1368 struct Expr_MinTopSortval : public ISphExpr
1369 {
1370 	CSphMatch *		m_pWorst;
1371 	int				m_iSortval;
1372 
Expr_MinTopSortvalExpr_MinTopSortval1373 	Expr_MinTopSortval()
1374 		: m_pWorst ( NULL )
1375 		, m_iSortval ( -1 )
1376 	{}
1377 
EvalExpr_MinTopSortval1378 	virtual float Eval ( const CSphMatch & ) const
1379 	{
1380 		if ( m_pWorst && m_pWorst->m_pDynamic && m_iSortval>=0 )
1381 			return *(float*)( m_pWorst->m_pDynamic + m_iSortval );
1382 		return -FLT_MAX;
1383 	}
1384 
CommandExpr_MinTopSortval1385 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
1386 	{
1387 		if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
1388 			return;
1389 		ISphExtra * p = (ISphExtra*)pArg;
1390 		if ( !p->ExtraData ( EXTRA_GET_QUEUE_WORST, (void**)&m_pWorst )
1391 			|| !p->ExtraData ( EXTRA_GET_QUEUE_SORTVAL, (void**)&m_iSortval ) )
1392 		{
1393 			m_pWorst = NULL;
1394 		}
1395 	}
1396 };
1397 
1398 //////////////////////////////////////////////////////////////////////////
1399 
1400 #define FIRST	m_pFirst->Eval(tMatch)
1401 #define SECOND	m_pSecond->Eval(tMatch)
1402 #define THIRD	m_pThird->Eval(tMatch)
1403 
1404 #define INTFIRST	m_pFirst->IntEval(tMatch)
1405 #define INTSECOND	m_pSecond->IntEval(tMatch)
1406 #define INTTHIRD	m_pThird->IntEval(tMatch)
1407 
1408 #define INT64FIRST	m_pFirst->Int64Eval(tMatch)
1409 #define INT64SECOND	m_pSecond->Int64Eval(tMatch)
1410 #define INT64THIRD	m_pThird->Int64Eval(tMatch)
1411 
1412 #define DECLARE_UNARY_TRAITS(_classname) \
1413 	struct _classname : public Expr_Unary_c \
1414 	{ \
1415 		explicit _classname ( ISphExpr * pFirst ) : Expr_Unary_c ( pFirst ) {}
1416 
1417 #define DECLARE_END() };
1418 
1419 #define DECLARE_UNARY_FLT(_classname,_expr) \
1420 		DECLARE_UNARY_TRAITS ( _classname ) \
1421 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1422 	};
1423 
1424 #define DECLARE_UNARY_INT(_classname,_expr,_expr2,_expr3) \
1425 		DECLARE_UNARY_TRAITS ( _classname ) \
1426 		virtual float Eval ( const CSphMatch & tMatch ) const { return (float)_expr; } \
1427 		virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
1428 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
1429 	};
1430 
1431 #define IABS(_arg) ( (_arg)>0 ? (_arg) : (-_arg) )
1432 
1433 DECLARE_UNARY_INT ( Expr_Neg_c,		-FIRST,					-INTFIRST,			-INT64FIRST )
DECLARE_UNARY_INT(Expr_Abs_c,fabs (FIRST),IABS (INTFIRST),IABS (INT64FIRST))1434 DECLARE_UNARY_INT ( Expr_Abs_c,		fabs(FIRST),			IABS(INTFIRST),		IABS(INT64FIRST) )
1435 DECLARE_UNARY_INT ( Expr_Ceil_c,	float(ceil(FIRST)),		int(ceil(FIRST)),	int64_t(ceil(FIRST)) )
1436 DECLARE_UNARY_INT ( Expr_Floor_c,	float(floor(FIRST)),	int(floor(FIRST)),	int64_t(floor(FIRST)) )
1437 
1438 DECLARE_UNARY_FLT ( Expr_Sin_c,		float(sin(FIRST)) )
1439 DECLARE_UNARY_FLT ( Expr_Cos_c,		float(cos(FIRST)) )
1440 DECLARE_UNARY_FLT ( Expr_Exp_c,		float(exp(FIRST)) )
1441 
1442 DECLARE_UNARY_INT ( Expr_NotInt_c,		(float)(INTFIRST?0:1),		INTFIRST?0:1,	INTFIRST?0:1 )
1443 DECLARE_UNARY_INT ( Expr_NotInt64_c,	(float)(INT64FIRST?0:1),	INT64FIRST?0:1,	INT64FIRST?0:1 )
1444 DECLARE_UNARY_INT ( Expr_Sint_c,		(float)(INTFIRST),			INTFIRST,		INTFIRST )
1445 
1446 DECLARE_UNARY_TRAITS ( Expr_Ln_c )
1447        virtual float Eval ( const CSphMatch & tMatch ) const
1448        {
1449                float fFirst = m_pFirst->Eval ( tMatch );
1450                // ideally this would be SQLNULL instead of plain 0.0f
1451                return fFirst>0.0f ? (float)log ( fFirst ) : 0.0f;
1452        }
1453 DECLARE_END()
1454 
DECLARE_UNARY_TRAITS(Expr_Log2_c)1455 DECLARE_UNARY_TRAITS ( Expr_Log2_c )
1456        virtual float Eval ( const CSphMatch & tMatch ) const
1457        {
1458                float fFirst = m_pFirst->Eval ( tMatch );
1459                // ideally this would be SQLNULL instead of plain 0.0f
1460                return fFirst>0.0f ? (float)( log ( fFirst )*M_LOG2E ) : 0.0f;
1461        }
1462 DECLARE_END()
1463 
DECLARE_UNARY_TRAITS(Expr_Log10_c)1464 DECLARE_UNARY_TRAITS ( Expr_Log10_c )
1465        virtual float Eval ( const CSphMatch & tMatch ) const
1466        {
1467                float fFirst = m_pFirst->Eval ( tMatch );
1468                // ideally this would be SQLNULL instead of plain 0.0f
1469                return fFirst>0.0f ? (float)( log ( fFirst )*M_LOG10E ) : 0.0f;
1470        }
1471 DECLARE_END()
1472 
DECLARE_UNARY_TRAITS(Expr_Sqrt_c)1473 DECLARE_UNARY_TRAITS ( Expr_Sqrt_c )
1474        virtual float Eval ( const CSphMatch & tMatch ) const
1475        {
1476                float fFirst = m_pFirst->Eval ( tMatch );
1477                // ideally this would be SQLNULL instead of plain 0.0f in case of negative argument
1478                // MEGA optimization: do not call sqrt for 0.0f
1479                return fFirst>0.0f ? (float)sqrt ( fFirst ) : 0.0f;
1480        }
1481 DECLARE_END()
1482 
1483 //////////////////////////////////////////////////////////////////////////
1484 
1485 #define DECLARE_BINARY_TRAITS(_classname) \
1486 	struct _classname : public ISphExpr \
1487 	{ \
1488 		ISphExpr * m_pFirst; \
1489 		ISphExpr * m_pSecond; \
1490 		_classname ( ISphExpr * pFirst, ISphExpr * pSecond ) : m_pFirst ( pFirst ), m_pSecond ( pSecond ) {} \
1491 		~_classname () { SafeRelease ( m_pFirst ); SafeRelease ( m_pSecond ); } \
1492 		virtual void Command ( ESphExprCommand eCmd, void * pArg ) { m_pFirst->Command ( eCmd, pArg ); m_pSecond->Command ( eCmd, pArg ); }
1493 
1494 #define DECLARE_BINARY_FLT(_classname,_expr) \
1495 		DECLARE_BINARY_TRAITS ( _classname ) \
1496 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1497 	};
1498 
1499 #define DECLARE_BINARY_INT(_classname,_expr,_expr2,_expr3) \
1500 		DECLARE_BINARY_TRAITS ( _classname ) \
1501 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1502 		virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
1503 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
1504 	};
1505 
1506 #define DECLARE_BINARY_POLY(_classname,_expr,_expr2,_expr3) \
1507 	DECLARE_BINARY_INT ( _classname##Float_c,	_expr,						(int)Eval(tMatch),		(int64_t)Eval(tMatch ) ) \
1508 	DECLARE_BINARY_INT ( _classname##Int_c,		(float)IntEval(tMatch),		_expr2,					(int64_t)IntEval(tMatch) ) \
1509 	DECLARE_BINARY_INT ( _classname##Int64_c,	(float)Int64Eval(tMatch),	(int)Int64Eval(tMatch),	_expr3 )
1510 
1511 #define IFFLT(_expr)	( (_expr) ? 1.0f : 0.0f )
1512 #define IFINT(_expr)	( (_expr) ? 1 : 0 )
1513 
1514 DECLARE_BINARY_INT ( Expr_Add_c,	FIRST + SECOND,						(DWORD)INTFIRST + (DWORD)INTSECOND,				(uint64_t)INT64FIRST + (uint64_t)INT64SECOND )
1515 DECLARE_BINARY_INT ( Expr_Sub_c,	FIRST - SECOND,						(DWORD)INTFIRST - (DWORD)INTSECOND,				(uint64_t)INT64FIRST - (uint64_t)INT64SECOND )
1516 DECLARE_BINARY_INT ( Expr_Mul_c,	FIRST * SECOND,						(DWORD)INTFIRST * (DWORD)INTSECOND,				(uint64_t)INT64FIRST * (uint64_t)INT64SECOND )
1517 DECLARE_BINARY_INT ( Expr_BitAnd_c,	(float)(int(FIRST)&int(SECOND)),	INTFIRST & INTSECOND,				INT64FIRST & INT64SECOND )
1518 DECLARE_BINARY_INT ( Expr_BitOr_c,	(float)(int(FIRST)|int(SECOND)),	INTFIRST | INTSECOND,				INT64FIRST | INT64SECOND )
1519 DECLARE_BINARY_INT ( Expr_Mod_c,	(float)(int(FIRST)%int(SECOND)),	INTFIRST % INTSECOND,				INT64FIRST % INT64SECOND )
1520 
DECLARE_BINARY_TRAITS(Expr_Div_c)1521 DECLARE_BINARY_TRAITS ( Expr_Div_c )
1522        virtual float Eval ( const CSphMatch & tMatch ) const
1523        {
1524                float fSecond = m_pSecond->Eval ( tMatch );
1525                // ideally this would be SQLNULL instead of plain 0.0f
1526                return fSecond ? m_pFirst->Eval ( tMatch )/fSecond : 0.0f;
1527        }
1528 DECLARE_END()
1529 
DECLARE_BINARY_TRAITS(Expr_Idiv_c)1530 DECLARE_BINARY_TRAITS ( Expr_Idiv_c )
1531 	virtual float Eval ( const CSphMatch & tMatch ) const
1532 	{
1533 		int iSecond = int(SECOND);
1534 		// ideally this would be SQLNULL instead of plain 0.0f
1535 		return iSecond ? float(int(FIRST)/iSecond) : 0.0f;
1536 	}
1537 
IntEval(const CSphMatch & tMatch) const1538 	virtual int IntEval ( const CSphMatch & tMatch ) const
1539 	{
1540 		int iSecond = INTSECOND;
1541 		// ideally this would be SQLNULL instead of plain 0
1542 		return iSecond ? ( INTFIRST / iSecond ) : 0;
1543 	}
1544 
Int64Eval(const CSphMatch & tMatch) const1545 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
1546 	{
1547 		int64_t iSecond = INT64SECOND;
1548 		// ideally this would be SQLNULL instead of plain 0
1549 		return iSecond ? ( INT64FIRST / iSecond ) : 0;
1550 	}
1551 DECLARE_END()
1552 
1553 DECLARE_BINARY_POLY ( Expr_Lt,		IFFLT ( FIRST<SECOND ),					IFINT ( INTFIRST<INTSECOND ),		IFINT ( INT64FIRST<INT64SECOND ) )
1554 DECLARE_BINARY_POLY ( Expr_Gt,		IFFLT ( FIRST>SECOND ),					IFINT ( INTFIRST>INTSECOND ),		IFINT ( INT64FIRST>INT64SECOND ) )
1555 DECLARE_BINARY_POLY ( Expr_Lte,		IFFLT ( FIRST<=SECOND ),				IFINT ( INTFIRST<=INTSECOND ),		IFINT ( INT64FIRST<=INT64SECOND ) )
1556 DECLARE_BINARY_POLY ( Expr_Gte,		IFFLT ( FIRST>=SECOND ),				IFINT ( INTFIRST>=INTSECOND ),		IFINT ( INT64FIRST>=INT64SECOND ) )
1557 DECLARE_BINARY_POLY ( Expr_Eq,		IFFLT ( fabs ( FIRST-SECOND )<=1e-6 ),	IFINT ( INTFIRST==INTSECOND ),		IFINT ( INT64FIRST==INT64SECOND ) )
1558 DECLARE_BINARY_POLY ( Expr_Ne,		IFFLT ( fabs ( FIRST-SECOND )>1e-6 ),	IFINT ( INTFIRST!=INTSECOND ),		IFINT ( INT64FIRST!=INT64SECOND ) )
1559 
1560 DECLARE_BINARY_INT ( Expr_Min_c,	Min ( FIRST, SECOND ),					Min ( INTFIRST, INTSECOND ),		Min ( INT64FIRST, INT64SECOND ) )
1561 DECLARE_BINARY_INT ( Expr_Max_c,	Max ( FIRST, SECOND ),					Max ( INTFIRST, INTSECOND ),		Max ( INT64FIRST, INT64SECOND ) )
1562 DECLARE_BINARY_FLT ( Expr_Pow_c,	float ( pow ( FIRST, SECOND ) ) )
1563 
1564 DECLARE_BINARY_POLY ( Expr_And,		FIRST!=0.0f && SECOND!=0.0f,		IFINT ( INTFIRST && INTSECOND ),	IFINT ( INT64FIRST && INT64SECOND ) )
1565 DECLARE_BINARY_POLY ( Expr_Or,		FIRST!=0.0f || SECOND!=0.0f,		IFINT ( INTFIRST || INTSECOND ),	IFINT ( INT64FIRST || INT64SECOND ) )
1566 
1567 DECLARE_BINARY_FLT ( Expr_Atan2_c,	float ( atan2 ( FIRST, SECOND ) ) )
1568 
1569 //////////////////////////////////////////////////////////////////////////
1570 
1571 /// boring base stuff
1572 struct ExprThreeway_c : public ISphExpr
1573 {
1574 	ISphExpr * m_pFirst;
1575 	ISphExpr * m_pSecond;
1576 	ISphExpr * m_pThird;
1577 
ExprThreeway_cExprThreeway_c1578 	ExprThreeway_c ( ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird )
1579 		: m_pFirst ( pFirst )
1580 		, m_pSecond ( pSecond )
1581 		, m_pThird ( pThird )
1582 	{}
1583 
~ExprThreeway_cExprThreeway_c1584 	~ExprThreeway_c()
1585 	{
1586 		SafeRelease ( m_pFirst );
1587 		SafeRelease ( m_pSecond );
1588 		SafeRelease ( m_pThird );
1589 	}
1590 
CommandExprThreeway_c1591 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
1592 	{
1593 		m_pFirst->Command ( eCmd, pArg );
1594 		m_pSecond->Command ( eCmd, pArg );
1595 		m_pThird->Command ( eCmd, pArg );
1596 	}
1597 };
1598 
1599 #define DECLARE_TERNARY(_classname,_expr,_expr2,_expr3) \
1600 	struct _classname : public ExprThreeway_c \
1601 	{ \
1602 		_classname ( ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird ) \
1603 			: ExprThreeway_c ( pFirst, pSecond, pThird ) {} \
1604 		\
1605 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1606 		virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
1607 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
1608 	};
1609 
1610 DECLARE_TERNARY ( Expr_If_c,	( FIRST!=0.0f ) ? SECOND : THIRD,	INTFIRST ? INTSECOND : INTTHIRD,	INT64FIRST ? INT64SECOND : INT64THIRD )
1611 DECLARE_TERNARY ( Expr_Madd_c,	FIRST*SECOND+THIRD,					INTFIRST*INTSECOND + INTTHIRD,		INT64FIRST*INT64SECOND + INT64THIRD )
DECLARE_TERNARY(Expr_Mul3_c,FIRST * SECOND * THIRD,INTFIRST * INTSECOND * INTTHIRD,INT64FIRST * INT64SECOND * INT64THIRD)1612 DECLARE_TERNARY ( Expr_Mul3_c,	FIRST*SECOND*THIRD,					INTFIRST*INTSECOND*INTTHIRD,		INT64FIRST*INT64SECOND*INT64THIRD )
1613 
1614 //////////////////////////////////////////////////////////////////////////
1615 
1616 #define DECLARE_TIMESTAMP(_classname,_expr) \
1617 	DECLARE_UNARY_TRAITS ( _classname ) \
1618 		virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval(tMatch); } \
1619 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval(tMatch); } \
1620 		virtual int IntEval ( const CSphMatch & tMatch ) const \
1621 		{ \
1622 			time_t ts = (time_t)INTFIRST;	\
1623 			struct tm s; \
1624 			localtime_r ( &ts, &s ); \
1625 			return _expr; \
1626 		} \
1627 	};
1628 
1629 DECLARE_TIMESTAMP ( Expr_Day_c,				s.tm_mday )
1630 DECLARE_TIMESTAMP ( Expr_Month_c,			s.tm_mon+1 )
1631 DECLARE_TIMESTAMP ( Expr_Year_c,			s.tm_year+1900 )
1632 DECLARE_TIMESTAMP ( Expr_YearMonth_c,		(s.tm_year+1900)*100+s.tm_mon+1 )
1633 DECLARE_TIMESTAMP ( Expr_YearMonthDay_c,	(s.tm_year+1900)*10000+(s.tm_mon+1)*100+s.tm_mday )
1634 
1635 //////////////////////////////////////////////////////////////////////////
1636 // UDF CALL SITE
1637 //////////////////////////////////////////////////////////////////////////
1638 
1639 void * UdfMalloc ( int iLen )
1640 {
1641 	return new BYTE [ iLen ];
1642 }
1643 
1644 /// UDF call site
1645 struct UdfCall_t
1646 {
1647 	const PluginUDF_c *	m_pUdf;
1648 	SPH_UDF_INIT		m_tInit;
1649 	SPH_UDF_ARGS		m_tArgs;
1650 	CSphVector<int>		m_dArgs2Free; // these args should be freed explicitly
1651 
UdfCall_tUdfCall_t1652 	UdfCall_t()
1653 	{
1654 		m_pUdf = NULL;
1655 		m_tInit.func_data = NULL;
1656 		m_tInit.is_const = false;
1657 		m_tArgs.arg_count = 0;
1658 		m_tArgs.arg_types = NULL;
1659 		m_tArgs.arg_values = NULL;
1660 		m_tArgs.arg_names = NULL;
1661 		m_tArgs.str_lengths = NULL;
1662 		m_tArgs.fn_malloc = UdfMalloc;
1663 	}
1664 
~UdfCall_tUdfCall_t1665 	~UdfCall_t ()
1666 	{
1667 		if ( m_pUdf )
1668 			m_pUdf->Release();
1669 		SafeDeleteArray ( m_tArgs.arg_types );
1670 		SafeDeleteArray ( m_tArgs.arg_values );
1671 		SafeDeleteArray ( m_tArgs.arg_names );
1672 		SafeDeleteArray ( m_tArgs.str_lengths );
1673 	}
1674 };
1675 
1676 //////////////////////////////////////////////////////////////////////////
1677 // PARSER INTERNALS
1678 //////////////////////////////////////////////////////////////////////////
1679 class ExprParser_t;
1680 
1681 #ifdef 	CMAKE_GENERATED_GRAMMAR
1682 	#include "bissphinxexpr.h"
1683 #else
1684 	#include "yysphinxexpr.h"
1685 #endif
1686 
1687 
1688 /// known functions
1689 enum Func_e
1690 {
1691 	FUNC_NOW=0,
1692 
1693 	FUNC_ABS,
1694 	FUNC_CEIL,
1695 	FUNC_FLOOR,
1696 	FUNC_SIN,
1697 	FUNC_COS,
1698 	FUNC_LN,
1699 	FUNC_LOG2,
1700 	FUNC_LOG10,
1701 	FUNC_EXP,
1702 	FUNC_SQRT,
1703 	FUNC_BIGINT,
1704 	FUNC_SINT,
1705 	FUNC_CRC32,
1706 	FUNC_FIBONACCI,
1707 
1708 	FUNC_DAY,
1709 	FUNC_MONTH,
1710 	FUNC_YEAR,
1711 	FUNC_YEARMONTH,
1712 	FUNC_YEARMONTHDAY,
1713 
1714 	FUNC_MIN,
1715 	FUNC_MAX,
1716 	FUNC_POW,
1717 	FUNC_IDIV,
1718 
1719 	FUNC_IF,
1720 	FUNC_MADD,
1721 	FUNC_MUL3,
1722 
1723 	FUNC_INTERVAL,
1724 	FUNC_IN,
1725 	FUNC_BITDOT,
1726 	FUNC_REMAP,
1727 
1728 	FUNC_GEODIST,
1729 	FUNC_EXIST,
1730 	FUNC_POLY2D,
1731 	FUNC_GEOPOLY2D,
1732 	FUNC_CONTAINS,
1733 	FUNC_ZONESPANLIST,
1734 	FUNC_TO_STRING,
1735 	FUNC_RANKFACTORS,
1736 	FUNC_PACKEDFACTORS,
1737 	FUNC_FACTORS,
1738 	FUNC_BM25F,
1739 	FUNC_INTEGER,
1740 	FUNC_DOUBLE,
1741 	FUNC_LENGTH,
1742 	FUNC_LEAST,
1743 	FUNC_GREATEST,
1744 	FUNC_UINT,
1745 
1746 	FUNC_CURTIME,
1747 	FUNC_UTC_TIME,
1748 	FUNC_UTC_TIMESTAMP,
1749 	FUNC_TIMEDIFF,
1750 	FUNC_CURRENT_USER,
1751 	FUNC_CONNECTION_ID,
1752 	FUNC_ALL,
1753 	FUNC_ANY,
1754 	FUNC_INDEXOF,
1755 
1756 	FUNC_MIN_TOP_WEIGHT,
1757 	FUNC_MIN_TOP_SORTVAL,
1758 
1759 	FUNC_ATAN2
1760 };
1761 
1762 
1763 struct FuncDesc_t
1764 {
1765 	const char *	m_sName;
1766 	int				m_iArgs;
1767 	Func_e			m_eFunc;
1768 	ESphAttr		m_eRet;
1769 };
1770 
1771 
1772 static FuncDesc_t g_dFuncs[] =
1773 {
1774 	{ "now",			0,	FUNC_NOW,			SPH_ATTR_INTEGER },
1775 
1776 	{ "abs",			1,	FUNC_ABS,			SPH_ATTR_NONE },
1777 	{ "ceil",			1,	FUNC_CEIL,			SPH_ATTR_INTEGER },
1778 	{ "floor",			1,	FUNC_FLOOR,			SPH_ATTR_INTEGER },
1779 	{ "sin",			1,	FUNC_SIN,			SPH_ATTR_FLOAT },
1780 	{ "cos",			1,	FUNC_COS,			SPH_ATTR_FLOAT },
1781 	{ "ln",				1,	FUNC_LN,			SPH_ATTR_FLOAT },
1782 	{ "log2",			1,	FUNC_LOG2,			SPH_ATTR_FLOAT },
1783 	{ "log10",			1,	FUNC_LOG10,			SPH_ATTR_FLOAT },
1784 	{ "exp",			1,	FUNC_EXP,			SPH_ATTR_FLOAT },
1785 	{ "sqrt",			1,	FUNC_SQRT,			SPH_ATTR_FLOAT },
1786 	{ "bigint",			1,	FUNC_BIGINT,		SPH_ATTR_BIGINT },	// type-enforcer special as-if-function
1787 	{ "sint",			1,	FUNC_SINT,			SPH_ATTR_BIGINT },	// type-enforcer special as-if-function
1788 	{ "crc32",			1,	FUNC_CRC32,			SPH_ATTR_INTEGER },
1789 	{ "fibonacci",		1,	FUNC_FIBONACCI,		SPH_ATTR_INTEGER },
1790 
1791 	{ "day",			1,	FUNC_DAY,			SPH_ATTR_INTEGER },
1792 	{ "month",			1,	FUNC_MONTH,			SPH_ATTR_INTEGER },
1793 	{ "year",			1,	FUNC_YEAR,			SPH_ATTR_INTEGER },
1794 	{ "yearmonth",		1,	FUNC_YEARMONTH,		SPH_ATTR_INTEGER },
1795 	{ "yearmonthday",	1,	FUNC_YEARMONTHDAY,	SPH_ATTR_INTEGER },
1796 
1797 	{ "min",			2,	FUNC_MIN,			SPH_ATTR_NONE },
1798 	{ "max",			2,	FUNC_MAX,			SPH_ATTR_NONE },
1799 	{ "pow",			2,	FUNC_POW,			SPH_ATTR_FLOAT },
1800 	{ "idiv",			2,	FUNC_IDIV,			SPH_ATTR_NONE },
1801 
1802 	{ "if",				3,	FUNC_IF,			SPH_ATTR_NONE },
1803 	{ "madd",			3,	FUNC_MADD,			SPH_ATTR_NONE },
1804 	{ "mul3",			3,	FUNC_MUL3,			SPH_ATTR_NONE },
1805 
1806 	{ "interval",		-2,	FUNC_INTERVAL,		SPH_ATTR_INTEGER },
1807 	{ "in",				-1, FUNC_IN,			SPH_ATTR_INTEGER },
1808 	{ "bitdot",			-1, FUNC_BITDOT,		SPH_ATTR_NONE },
1809 	{ "remap",			4,	FUNC_REMAP,			SPH_ATTR_INTEGER },
1810 
1811 	{ "geodist",		-4,	FUNC_GEODIST,		SPH_ATTR_FLOAT },
1812 	{ "exist",			2,	FUNC_EXIST,			SPH_ATTR_NONE },
1813 	{ "poly2d",			-1,	FUNC_POLY2D,		SPH_ATTR_POLY2D },
1814 	{ "geopoly2d",		-1,	FUNC_GEOPOLY2D,		SPH_ATTR_POLY2D },
1815 	{ "contains",		3,	FUNC_CONTAINS,		SPH_ATTR_INTEGER },
1816 	{ "zonespanlist",	0,	FUNC_ZONESPANLIST,	SPH_ATTR_STRINGPTR },
1817 	{ "to_string",		1,	FUNC_TO_STRING,		SPH_ATTR_STRINGPTR },
1818 	{ "rankfactors",	0,	FUNC_RANKFACTORS,	SPH_ATTR_STRINGPTR },
1819 	{ "packedfactors",	0,	FUNC_PACKEDFACTORS, SPH_ATTR_FACTORS },
1820 	{ "factors",		0,	FUNC_FACTORS,		SPH_ATTR_FACTORS }, // just an alias for PACKEDFACTORS()
1821 	{ "bm25f",			-2,	FUNC_BM25F,			SPH_ATTR_FLOAT },
1822 	{ "integer",		1,	FUNC_INTEGER,		SPH_ATTR_BIGINT },
1823 	{ "double",			1,	FUNC_DOUBLE,		SPH_ATTR_FLOAT },
1824 	{ "length",			1,	FUNC_LENGTH,		SPH_ATTR_INTEGER },
1825 	{ "least",			1,	FUNC_LEAST,			SPH_ATTR_STRINGPTR },
1826 	{ "greatest",		1,	FUNC_GREATEST,		SPH_ATTR_STRINGPTR },
1827 	{ "uint",			1,	FUNC_UINT,			SPH_ATTR_INTEGER },
1828 
1829 	{ "curtime",		0,	FUNC_CURTIME,		SPH_ATTR_STRINGPTR },
1830 	{ "utc_time",		0,	FUNC_UTC_TIME,		SPH_ATTR_STRINGPTR },
1831 	{ "utc_timestamp",	0,	FUNC_UTC_TIMESTAMP,	SPH_ATTR_STRINGPTR },
1832 	{ "timediff",		2,	FUNC_TIMEDIFF,		SPH_ATTR_STRINGPTR },
1833 	{ "current_user",	0,	FUNC_CURRENT_USER,	SPH_ATTR_INTEGER },
1834 	{ "connection_id",	0,	FUNC_CONNECTION_ID,	SPH_ATTR_INTEGER },
1835 	{ "all",			-1,	FUNC_ALL,			SPH_ATTR_INTEGER },
1836 	{ "any",			-1,	FUNC_ANY,			SPH_ATTR_INTEGER },
1837 	{ "indexof",		-1,	FUNC_INDEXOF,		SPH_ATTR_BIGINT },
1838 
1839 	{ "min_top_weight",		0,	FUNC_MIN_TOP_WEIGHT,	SPH_ATTR_INTEGER },
1840 	{ "min_top_sortval",	0,	FUNC_MIN_TOP_SORTVAL,	SPH_ATTR_FLOAT },
1841 
1842 	{ "atan2",			2,	FUNC_ATAN2,			SPH_ATTR_FLOAT }
1843 };
1844 
1845 
1846 // helper to generate input data for gperf
1847 // run this, run gperf, that will generate a C program
1848 // copy dAsso from asso_values in that C source
1849 // modify iHash switch according to that C source, if needed
1850 // copy dIndexes from the program output
1851 #if 0
1852 int HashGen()
1853 {
1854 	printf ( "struct func { char *name; int num; };\n%%%%\n" );
1855 	for ( int i=0; i<int( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0] )); i++ )
1856 		printf ( "%s, %d\n", g_dFuncs[i].m_sName, i );
1857 	printf ( "%%%%\n" );
1858 	printf ( "void main()\n" );
1859 	printf ( "{\n" );
1860 	printf ( "\tint i;\n" );
1861 	printf ( "\tfor ( i=0; i<=MAX_HASH_VALUE; i++ )\n" );
1862 	printf ( "\t\tprintf ( \"%%d,%%s\", wordlist[i].name[0] ? wordlist[i].num : -1, (i%%10)==9 ? \"\\n\" : \" \" );\n" );
1863 	printf ( "}\n" );
1864 	printf ( "// gperf -Gt 1.p > 1.c\n" );
1865 	exit ( 0 );
1866 	return 0;
1867 }
1868 
1869 static int G_HASHGEN = HashGen();
1870 #endif
1871 
1872 
1873 // FIXME? can remove this by preprocessing the assoc table
FuncHashLower(BYTE u)1874 static inline BYTE FuncHashLower ( BYTE u )
1875 {
1876 	return ( u>='A' && u<='Z' ) ? ( u | 0x20 ) : u;
1877 }
1878 
1879 
FuncHashLookup(const char * sKey)1880 static int FuncHashLookup ( const char * sKey )
1881 {
1882 	assert ( sKey && sKey[0] );
1883 
1884 	static BYTE dAsso[] =
1885 	{
1886 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1887 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1888 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1889 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1890 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1891 		10, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1892 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1893 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1894 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1895 		109, 109, 109, 109, 109, 5, 109, 35, 0, 0,
1896 		50, 5, 20, 30, 109, 10, 109, 109, 5, 0,
1897 		10, 15, 5, 25, 0, 55, 0, 0, 109, 21,
1898 		45, 20, 0, 109, 109, 109, 109, 109, 109, 109,
1899 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1900 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1901 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1902 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1903 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1904 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1905 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1906 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1907 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1908 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1909 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1910 		109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1911 		109, 109, 109, 109, 109, 109
1912 	};
1913 
1914 	const BYTE * s = (const BYTE*) sKey;
1915 	int iHash = strlen ( sKey );
1916 	switch ( iHash )
1917 	{
1918 		default:		iHash += dAsso [ FuncHashLower ( s[2] ) ];
1919 		case 2:			iHash += dAsso [ FuncHashLower ( s[1] ) ];
1920 		case 1:			iHash += dAsso [ FuncHashLower ( s[0] ) ];
1921 	}
1922 
1923 	static int dIndexes[] =
1924 	{
1925 		-1, -1, -1, -1, -1, 13, -1, 48, 49, 26,
1926 		30, -1, 52, 50, -1, 41, 29, 6, 51, 2,
1927 		-1, -1, 28, 20, 47, -1, 44, 42, 27, 37,
1928 		16, 33, 24, 35, 57, 58, -1, 36, 53, 14,
1929 		-1, -1, -1, 46, 22, 3, 11, -1, 54, 0,
1930 		45, -1, -1, 39, 7, 8, 38, 31, 9, 34,
1931 		-1, -1, 40, -1, 17, 32, -1, -1, 55, 18,
1932 		-1, 43, 19, 5, 23, 59, -1, 56, 4, 12,
1933 		-1, -1, -1, 21, 10, -1, -1, -1, -1, 25,
1934 		-1, -1, -1, 1, -1, -1, -1, -1, -1, -1,
1935 		-1, -1, -1, -1, -1, -1, -1, -1, 15
1936 	};
1937 
1938 	if ( iHash<0 || iHash>=(int)(sizeof(dIndexes)/sizeof(dIndexes[0])) )
1939 		return -1;
1940 
1941 	int iFunc = dIndexes[iHash];
1942 	if ( iFunc>=0 && strcasecmp ( g_dFuncs[iFunc].m_sName, sKey )==0 )
1943 		return iFunc;
1944 	return -1;
1945 }
1946 
1947 
FuncHashCheck()1948 static int FuncHashCheck()
1949 {
1950 	for ( int i=0; i<(int)(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])); i++ )
1951 	{
1952 		CSphString sKey ( g_dFuncs[i].m_sName );
1953 		sKey.ToLower();
1954 		if ( FuncHashLookup ( sKey.cstr() )!=i )
1955 			sphDie ( "INTERNAL ERROR: lookup for %s() failed, rebuild function hash", sKey.cstr() );
1956 		sKey.ToUpper();
1957 		if ( FuncHashLookup ( sKey.cstr() )!=i )
1958 			sphDie ( "INTERNAL ERROR: lookup for %s() failed, rebuild function hash", sKey.cstr() );
1959 		if ( g_dFuncs[i].m_eFunc!=i )
1960 			sphDie ( "INTERNAL ERROR: function hash entry %s() at index %d maps to Func_e entry %d, sync Func_e and g_dFuncs",
1961 				sKey.cstr(), i, g_dFuncs[i].m_eFunc );
1962 	}
1963 	if ( FuncHashLookup("A")!=-1 )
1964 		sphDie ( "INTERNAL ERROR: lookup for A() succeeded, rebuild function hash" );
1965 	return 1;
1966 }
1967 
1968 
1969 static int G_FUNC_HASH_CHECK = FuncHashCheck();
1970 
1971 //////////////////////////////////////////////////////////////////////////
1972 
1973 /// check whether the type is numeric
IsNumeric(ESphAttr eType)1974 static inline bool IsNumeric ( ESphAttr eType )
1975 {
1976 	return eType==SPH_ATTR_INTEGER || eType==SPH_ATTR_BIGINT || eType==SPH_ATTR_FLOAT;
1977 }
1978 
1979 /// check for type based on int value
GetIntType(int64_t iValue)1980 static inline ESphAttr GetIntType ( int64_t iValue )
1981 {
1982 	return ( iValue>=(int64_t)INT_MIN && iValue<=(int64_t)INT_MAX ) ? SPH_ATTR_INTEGER : SPH_ATTR_BIGINT;
1983 }
1984 
1985 /// get the widest numeric type of the two
WidestType(ESphAttr a,ESphAttr b)1986 static inline ESphAttr WidestType ( ESphAttr a, ESphAttr b )
1987 {
1988 	assert ( IsNumeric(a) && IsNumeric(b) );
1989 	if ( a==SPH_ATTR_FLOAT || b==SPH_ATTR_FLOAT )
1990 		return SPH_ATTR_FLOAT;
1991 	if ( a==SPH_ATTR_BIGINT || b==SPH_ATTR_BIGINT )
1992 		return SPH_ATTR_BIGINT;
1993 	return SPH_ATTR_INTEGER;
1994 }
1995 
1996 /// list of constants
1997 class ConstList_c
1998 {
1999 public:
2000 	CSphVector<int64_t>		m_dInts;		///< dword/int64 storage
2001 	CSphVector<float>		m_dFloats;		///< float storage
2002 	ESphAttr				m_eRetType;		///< SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_STRING, or SPH_ATTR_FLOAT
2003 	CSphString				m_sExpr;		///< m_sExpr copy for TOK_CONST_STRING evaluation
2004 
2005 public:
ConstList_c()2006 	ConstList_c ()
2007 		: m_eRetType ( SPH_ATTR_INTEGER )
2008 	{}
2009 
Add(int64_t iValue)2010 	void Add ( int64_t iValue )
2011 	{
2012 		if ( m_eRetType==SPH_ATTR_FLOAT )
2013 		{
2014 			m_dFloats.Add ( (float)iValue );
2015 		} else
2016 		{
2017 			m_eRetType = WidestType ( m_eRetType, GetIntType ( iValue ) );
2018 			m_dInts.Add ( iValue );
2019 		}
2020 	}
2021 
Add(float fValue)2022 	void Add ( float fValue )
2023 	{
2024 		if ( m_eRetType!=SPH_ATTR_FLOAT )
2025 		{
2026 			assert ( m_dFloats.GetLength()==0 );
2027 			ARRAY_FOREACH ( i, m_dInts )
2028 				m_dFloats.Add ( (float)m_dInts[i] );
2029 			m_dInts.Reset ();
2030 			m_eRetType = SPH_ATTR_FLOAT;
2031 		}
2032 		m_dFloats.Add ( fValue );
2033 	}
2034 };
2035 
2036 
2037 /// {title=2, body=1}
2038 /// {in=deg, out=mi}
2039 /// argument to functions like BM25F() and GEODIST()
2040 class MapArg_c
2041 {
2042 public:
2043 	CSphVector<CSphNamedVariant> m_dPairs;
2044 
2045 public:
Add(const char * sKey,const char * sValue,int64_t iValue)2046 	void Add ( const char * sKey, const char * sValue, int64_t iValue )
2047 	{
2048 		CSphNamedVariant & t = m_dPairs.Add();
2049 		t.m_sKey = sKey;
2050 		if ( sValue )
2051 			t.m_sValue = sValue;
2052 		else
2053 			t.m_iValue = (int)iValue;
2054 	}
2055 };
2056 
2057 
2058 /// expression tree node
2059 /// used to build an AST (Abstract Syntax Tree)
2060 struct ExprNode_t
2061 {
2062 	int				m_iToken;	///< token type, including operators
2063 	ESphAttr		m_eRetType;	///< result type
2064 	ESphAttr		m_eArgType;	///< args type
2065 	CSphAttrLocator	m_tLocator;	///< attribute locator, for TOK_ATTR type
2066 	int				m_iLocator; ///< index of attribute locator in schema
2067 
2068 	union
2069 	{
2070 		int64_t			m_iConst;		///< constant value, for TOK_CONST_INT type
2071 		float			m_fConst;		///< constant value, for TOK_CONST_FLOAT type
2072 		int				m_iFunc;		///< built-in function id, for TOK_FUNC type
2073 		int				m_iArgs;		///< args count, for arglist (token==',') type
2074 		ConstList_c *	m_pConsts;		///< constants list, for TOK_CONST_LIST type
2075 		MapArg_c	*	m_pMapArg;		///< map argument (maps name to const or name to expr), for TOK_MAP_ARG type
2076 		const char	*	m_sIdent;		///< pointer to const char, for TOK_IDENT type
2077 		SphAttr_t	*	m_pAttr;		///< pointer to 64-bit value, for TOK_ITERATOR type
2078 	};
2079 	int				m_iLeft;
2080 	int				m_iRight;
2081 
ExprNode_tExprNode_t2082 	ExprNode_t () : m_iToken ( 0 ), m_eRetType ( SPH_ATTR_NONE ), m_eArgType ( SPH_ATTR_NONE ),
2083 		m_iLocator ( -1 ), m_iLeft ( -1 ), m_iRight ( -1 ) {}
2084 };
2085 
2086 struct StackNode_t
2087 {
2088 	int m_iNode;
2089 	int m_iLeft;
2090 	int m_iRight;
2091 };
2092 
2093 /// expression parser
2094 class ExprParser_t
2095 {
2096 	friend int				yylex ( YYSTYPE * lvalp, ExprParser_t * pParser );
2097 	friend int				yyparse ( ExprParser_t * pParser );
2098 	friend void				yyerror ( ExprParser_t * pParser, const char * sMessage );
2099 
2100 public:
ExprParser_t(ISphExprHook * pHook,CSphQueryProfile * pProfiler,ESphCollation eCollation)2101 	ExprParser_t ( ISphExprHook * pHook, CSphQueryProfile * pProfiler, ESphCollation eCollation )
2102 		: m_pHook ( pHook )
2103 		, m_pProfiler ( pProfiler )
2104 		, m_bHasZonespanlist ( false )
2105 		, m_uPackedFactorFlags ( SPH_FACTOR_DISABLE )
2106 		, m_eEvalStage ( SPH_EVAL_FINAL ) // be default compute as late as possible
2107 		, m_eCollation ( eCollation )
2108 	{
2109 		m_dGatherStack.Reserve ( 64 );
2110 	}
2111 
2112 							~ExprParser_t ();
2113 	ISphExpr *				Parse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError );
2114 
2115 protected:
2116 	int						m_iParsed;	///< filled by yyparse() at the very end
2117 	CSphString				m_sLexerError;
2118 	CSphString				m_sParserError;
2119 	CSphString				m_sCreateError;
2120 	ISphExprHook *			m_pHook;
2121 	CSphQueryProfile *		m_pProfiler;
2122 
2123 protected:
2124 	ESphAttr				GetWidestRet ( int iLeft, int iRight );
2125 
2126 	int						AddNodeInt ( int64_t iValue );
2127 	int						AddNodeFloat ( float fValue );
2128 	int						AddNodeString ( int64_t iValue );
2129 	int						AddNodeAttr ( int iTokenType, uint64_t uAttrLocator );
2130 	int						AddNodeID ();
2131 	int						AddNodeWeight ();
2132 	int						AddNodeOp ( int iOp, int iLeft, int iRight );
2133 	int						AddNodeFunc ( int iFunc, int iFirst, int iSecond=-1, int iThird=-1, int iFourth=-1 );
2134 	int						AddNodeUdf ( int iCall, int iArg );
2135 	int						AddNodePF ( int iFunc, int iArg );
2136 	int						AddNodeConstlist ( int64_t iValue );
2137 	int						AddNodeConstlist ( float iValue );
2138 	void					AppendToConstlist ( int iNode, int64_t iValue );
2139 	void					AppendToConstlist ( int iNode, float iValue );
2140 	int						AddNodeUservar ( int iUservar );
2141 	int						AddNodeHookIdent ( int iID );
2142 	int						AddNodeHookFunc ( int iID, int iLeft );
2143 	int						AddNodeMapArg ( const char * sKey, const char * sValue, int64_t iValue );
2144 	void					AppendToMapArg ( int iNode, const char * sKey, const char * sValue, int64_t iValue );
2145 	const char *			Attr2Ident ( uint64_t uAttrLoc );
2146 	int						AddNodeJsonField ( uint64_t uAttrLocator, int iLeft );
2147 	int						AddNodeJsonSubkey ( int64_t iValue );
2148 	int						AddNodeDotNumber ( int64_t iValue );
2149 	int						AddNodeIdent ( const char * sKey, int iLeft );
2150 
2151 private:
2152 	const char *			m_sExpr;
2153 	const char *			m_pCur;
2154 	const char *			m_pLastTokenStart;
2155 	const ISphSchema *		m_pSchema;
2156 	CSphVector<ExprNode_t>	m_dNodes;
2157 	CSphVector<CSphString>	m_dUservars;
2158 	CSphVector<char*>		m_dIdents;
2159 	int						m_iConstNow;
2160 	CSphVector<StackNode_t>	m_dGatherStack;
2161 	CSphVector<UdfCall_t*>	m_dUdfCalls;
2162 
2163 public:
2164 	bool					m_bHasZonespanlist;
2165 	DWORD					m_uPackedFactorFlags;
2166 	ESphEvalStage			m_eEvalStage;
2167 	ESphCollation			m_eCollation;
2168 
2169 private:
2170 	int						GetToken ( YYSTYPE * lvalp );
2171 
2172 	void					GatherArgTypes ( int iNode, CSphVector<int> & dTypes );
2173 	void					GatherArgNodes ( int iNode, CSphVector<int> & dNodes );
2174 	void					GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes );
2175 	template < typename T >
2176 	void					GatherArgT ( int iNode, T & FUNCTOR );
2177 
2178 	bool					CheckForConstSet ( int iArgsNode, int iSkip );
2179 	int						ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp );
2180 
2181 	template < typename T >
2182 	void					WalkTree ( int iRoot, T & FUNCTOR );
2183 
2184 	void					Optimize ( int iNode );
2185 	void					CanonizePass ( int iNode );
2186 	void					ConstantFoldPass ( int iNode );
2187 	void					VariousOptimizationsPass ( int iNode );
2188 	void					Dump ( int iNode );
2189 
2190 	ISphExpr *				CreateTree ( int iNode );
2191 	ISphExpr *				CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
2192 	ISphExpr *				CreateInNode ( int iNode );
2193 	ISphExpr *				CreateLengthNode ( const ExprNode_t & tNode, ISphExpr * pLeft );
2194 	ISphExpr *				CreateGeodistNode ( int iArgs );
2195 	ISphExpr *				CreatePFNode ( int iArg );
2196 	ISphExpr *				CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
2197 	ISphExpr *				CreateUdfNode ( int iCall, ISphExpr * pLeft );
2198 	ISphExpr *				CreateExistNode ( const ExprNode_t & tNode );
2199 	ISphExpr *				CreateContainsNode ( const ExprNode_t & tNode );
2200 	ISphExpr *				CreateAggregateNode ( const ExprNode_t & tNode, ESphAggrFunc eFunc, ISphExpr * pLeft );
2201 	ISphExpr *				CreateForInNode ( int iNode );
2202 	void					FixupIterators ( int iNode, const char * sKey, SphAttr_t * pAttr );
2203 
GetError() const2204 	bool					GetError () const { return !( m_sLexerError.IsEmpty() && m_sParserError.IsEmpty() && m_sCreateError.IsEmpty() ); }
2205 };
2206 
2207 //////////////////////////////////////////////////////////////////////////
2208 
2209 /// parse that numeric constant (e.g. "123", ".03")
ParseNumeric(YYSTYPE * lvalp,const char ** ppStr)2210 static int ParseNumeric ( YYSTYPE * lvalp, const char ** ppStr )
2211 {
2212 	assert ( lvalp && ppStr && *ppStr );
2213 
2214 	// try float route
2215 	char * pEnd = NULL;
2216 	float fRes = (float) strtod ( *ppStr, &pEnd );
2217 
2218 	// try int route
2219 	uint64_t uRes = 0; // unsigned overflow is better than signed overflow
2220 	bool bInt = true;
2221 	for ( const char * p=(*ppStr); p<pEnd; p++ && bInt )
2222 	{
2223 		if ( isdigit(*p) )
2224 			uRes = uRes*10 + (int)( (*p)-'0' ); // FIXME! missing overflow check, missing octal/hex handling
2225 		else
2226 			bInt = false;
2227 	}
2228 
2229 	// choose your destiny
2230 	*ppStr = pEnd;
2231 	if ( bInt )
2232 	{
2233 		lvalp->iConst = (int64_t)uRes;
2234 		return TOK_CONST_INT;
2235 	} else
2236 	{
2237 		lvalp->fConst = fRes;
2238 		return TOK_CONST_FLOAT;
2239 	}
2240 }
2241 
2242 // used to store in 8 bytes in Bison lvalp variable
sphPackAttrLocator(const CSphAttrLocator & tLoc,int iLocator)2243 static uint64_t sphPackAttrLocator ( const CSphAttrLocator & tLoc, int iLocator )
2244 {
2245 	assert ( iLocator>=0 && iLocator<=0x7fff );
2246 	uint64_t uIndex = 0;
2247 	uIndex = ( tLoc.m_iBitOffset<<16 ) + tLoc.m_iBitCount + ( (uint64_t)iLocator<<32 );
2248 	if ( tLoc.m_bDynamic )
2249 		uIndex |= ( U64C(1)<<63 );
2250 
2251 	return uIndex;
2252 }
2253 
sphUnpackAttrLocator(uint64_t uIndex,ExprNode_t * pNode)2254 static void sphUnpackAttrLocator ( uint64_t uIndex, ExprNode_t * pNode )
2255 {
2256 	assert ( pNode );
2257 	pNode->m_tLocator.m_iBitOffset = (int)( ( uIndex>>16 ) & 0xffff );
2258 	pNode->m_tLocator.m_iBitCount = (int)( uIndex & 0xffff );
2259 	pNode->m_tLocator.m_bDynamic = ( ( uIndex & ( U64C(1)<<63 ) )!=0 );
2260 
2261 	pNode->m_iLocator = (int)( ( uIndex>>32 ) & 0x7fff );
2262 }
2263 
ParseAttr(int iAttr,const char * sTok,YYSTYPE * lvalp)2264 int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
2265 {
2266 	// check attribute type and width
2267 	const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iAttr );
2268 
2269 	int iRes = -1;
2270 	switch ( tCol.m_eAttrType )
2271 	{
2272 	case SPH_ATTR_FLOAT:		iRes = TOK_ATTR_FLOAT;	break;
2273 	case SPH_ATTR_UINT32SET:	iRes = TOK_ATTR_MVA32; break;
2274 	case SPH_ATTR_INT64SET:		iRes = TOK_ATTR_MVA64; break;
2275 	case SPH_ATTR_STRING:		iRes = TOK_ATTR_STRING; break;
2276 	case SPH_ATTR_JSON:			iRes = TOK_ATTR_JSON; break;
2277 	case SPH_ATTR_JSON_FIELD:	iRes = TOK_ATTR_JSON; break;
2278 	case SPH_ATTR_FACTORS:		iRes = TOK_ATTR_FACTORS; break;
2279 	case SPH_ATTR_INTEGER:
2280 	case SPH_ATTR_TIMESTAMP:
2281 	case SPH_ATTR_BOOL:
2282 	case SPH_ATTR_BIGINT:
2283 	case SPH_ATTR_TOKENCOUNT:
2284 		iRes = tCol.m_tLocator.IsBitfield() ? TOK_ATTR_BITS : TOK_ATTR_INT;
2285 		break;
2286 	default:
2287 		m_sLexerError.SetSprintf ( "attribute '%s' is of unsupported type (type=%d)", sTok, tCol.m_eAttrType );
2288 		return -1;
2289 	}
2290 
2291 	lvalp->iAttrLocator = sphPackAttrLocator ( tCol.m_tLocator, iAttr );
2292 	return iRes;
2293 }
2294 
2295 
2296 /// a lexer of my own
2297 /// returns token id and fills lvalp on success
2298 /// returns -1 and fills sError on failure
GetToken(YYSTYPE * lvalp)2299 int ExprParser_t::GetToken ( YYSTYPE * lvalp )
2300 {
2301 	// skip whitespace, check eof
2302 	while ( isspace ( *m_pCur ) ) m_pCur++;
2303 	m_pLastTokenStart = m_pCur;
2304 	if ( !*m_pCur ) return 0;
2305 
2306 	// check for constant
2307 	if ( isdigit ( m_pCur[0] ) )
2308 		return ParseNumeric ( lvalp, &m_pCur );
2309 
2310 	// check for field, function, or magic name
2311 	if ( sphIsAttr ( m_pCur[0] )
2312 		|| ( m_pCur[0]=='@' && sphIsAttr ( m_pCur[1] ) && !isdigit ( m_pCur[1] ) ) )
2313 	{
2314 		// get token
2315 		const char * pStart = m_pCur++;
2316 		while ( sphIsAttr ( *m_pCur ) ) m_pCur++;
2317 
2318 		CSphString sTok;
2319 		sTok.SetBinary ( pStart, m_pCur-pStart );
2320 		CSphString sTokMixedCase = sTok;
2321 		sTok.ToLower ();
2322 
2323 		// check for magic name
2324 		if ( sTok=="@id" )			return TOK_ATID;
2325 		if ( sTok=="@weight" )		return TOK_ATWEIGHT;
2326 		if ( sTok=="id" )			return TOK_ID;
2327 		if ( sTok=="weight" )		return TOK_WEIGHT;
2328 		if ( sTok=="groupby" )		return TOK_GROUPBY;
2329 		if ( sTok=="distinct" )		return TOK_DISTINCT;
2330 		if ( sTok=="@geodist" )
2331 		{
2332 			int iGeodist = m_pSchema->GetAttrIndex("@geodist");
2333 			if ( iGeodist==-1 )
2334 			{
2335 				m_sLexerError = "geoanchor is not set, @geodist expression unavailable";
2336 				return -1;
2337 			}
2338 			const CSphAttrLocator & tLoc = m_pSchema->GetAttr ( iGeodist ).m_tLocator;
2339 			lvalp->iAttrLocator = sphPackAttrLocator ( tLoc, iGeodist );
2340 			return TOK_ATTR_FLOAT;
2341 		}
2342 
2343 		// check for uservar
2344 		if ( pStart[0]=='@' )
2345 		{
2346 			lvalp->iNode = m_dUservars.GetLength();
2347 			m_dUservars.Add ( sTok );
2348 			return TOK_USERVAR;
2349 		}
2350 
2351 		// check for keyword
2352 		if ( sTok=="and" )		{ return TOK_AND; }
2353 		if ( sTok=="or" )		{ return TOK_OR; }
2354 		if ( sTok=="not" )		{ return TOK_NOT; }
2355 		if ( sTok=="div" )		{ return TOK_DIV; }
2356 		if ( sTok=="mod" )		{ return TOK_MOD; }
2357 		if ( sTok=="for" )		{ return TOK_FOR; }
2358 		if ( sTok=="is" )		{ return TOK_IS; }
2359 		if ( sTok=="null" )		{ return TOK_NULL; }
2360 
2361 		// in case someone used 'count' as a name for an attribute
2362 		if ( sTok=="count" )
2363 		{
2364 			int iAttr = m_pSchema->GetAttrIndex ( "count" );
2365 			if ( iAttr>=0 )
2366 				ParseAttr ( iAttr, sTok.cstr(), lvalp );
2367 			return TOK_COUNT;
2368 		}
2369 
2370 		// check for attribute
2371 		int iAttr = m_pSchema->GetAttrIndex ( sTok.cstr() );
2372 		if ( iAttr>=0 )
2373 			return ParseAttr ( iAttr, sTok.cstr(), lvalp );
2374 
2375 		// hook might replace built-in function
2376 		int iHookFunc = -1;
2377 		if ( m_pHook )
2378 			iHookFunc = m_pHook->IsKnownFunc ( sTok.cstr() );
2379 
2380 		// check for function
2381 		int iFunc = FuncHashLookup ( sTok.cstr() );
2382 		if ( iFunc>=0 && iHookFunc==-1 )
2383 		{
2384 			assert ( !strcasecmp ( g_dFuncs[iFunc].m_sName, sTok.cstr() ) );
2385 			lvalp->iFunc = iFunc;
2386 			if ( iFunc==FUNC_IN )
2387 				return TOK_FUNC_IN;
2388 			if ( iFunc==FUNC_REMAP )
2389 				return TOK_FUNC_REMAP;
2390 			if ( iFunc==FUNC_PACKEDFACTORS || iFunc==FUNC_FACTORS )
2391 				return TOK_FUNC_PF;
2392 			return TOK_FUNC;
2393 		}
2394 
2395 		// ask hook
2396 		if ( m_pHook )
2397 		{
2398 			int iID = m_pHook->IsKnownIdent ( sTok.cstr() );
2399 			if ( iID>=0 )
2400 			{
2401 				lvalp->iNode = iID;
2402 				return TOK_HOOK_IDENT;
2403 			}
2404 
2405 			iID = iHookFunc;
2406 			if ( iID>=0 )
2407 			{
2408 				lvalp->iNode = iID;
2409 				return TOK_HOOK_FUNC;
2410 			}
2411 		}
2412 
2413 		// check for UDF
2414 		const PluginUDF_c * pUdf = (const PluginUDF_c *) sphPluginGet ( PLUGIN_FUNCTION, sTok.cstr() );
2415 		if ( pUdf )
2416 		{
2417 			lvalp->iNode = m_dUdfCalls.GetLength();
2418 			m_dUdfCalls.Add ( new UdfCall_t() );
2419 			m_dUdfCalls.Last()->m_pUdf = pUdf;
2420 			return TOK_UDF;
2421 		}
2422 
2423 		// arbitrary identifier, then
2424 		m_dIdents.Add ( sTokMixedCase.Leak() );
2425 		lvalp->sIdent = m_dIdents.Last();
2426 		return TOK_IDENT;
2427 	}
2428 
2429 	// check for known operators, then
2430 	switch ( *m_pCur )
2431 	{
2432 		case '+':
2433 		case '-':
2434 		case '*':
2435 		case '/':
2436 		case '(':
2437 		case ')':
2438 		case ',':
2439 		case '&':
2440 		case '|':
2441 		case '%':
2442 		case '{':
2443 		case '}':
2444 		case '[':
2445 		case ']':
2446 		case '`':
2447 			return *m_pCur++;
2448 
2449 		case '<':
2450 			m_pCur++;
2451 			if ( *m_pCur=='>' ) { m_pCur++; return TOK_NE; }
2452 			if ( *m_pCur=='=' ) { m_pCur++; return TOK_LTE; }
2453 			return '<';
2454 
2455 		case '>':
2456 			m_pCur++;
2457 			if ( *m_pCur=='=' ) { m_pCur++; return TOK_GTE; }
2458 			return '>';
2459 
2460 		case '=':
2461 			m_pCur++;
2462 			if ( *m_pCur=='=' ) m_pCur++;
2463 			return TOK_EQ;
2464 
2465 		// special case for leading dots (float values without leading zero, JSON key names, etc)
2466 		case '.':
2467 			{
2468 				int iBeg = (int)( m_pCur-m_sExpr+1 );
2469 				bool bDigit = isdigit ( m_pCur[1] )!=0;
2470 
2471 				// handle dots followed by a digit
2472 				// aka, a float value without leading zero
2473 				if ( bDigit )
2474 				{
2475 					char * pEnd = NULL;
2476 					float fValue = (float) strtod ( m_pCur, &pEnd );
2477 					lvalp->fConst = fValue;
2478 
2479 					if ( pEnd && !sphIsAttr(*pEnd) )
2480 						m_pCur = pEnd;
2481 					else // fallback to subkey (e.g. ".1234a")
2482 						bDigit = false;
2483 				}
2484 
2485 				// handle dots followed by a non-digit
2486 				// for cases like jsoncol.keyname
2487 				if ( !bDigit )
2488 				{
2489 					m_pCur++;
2490 					while ( isspace ( *m_pCur ) )
2491 						m_pCur++;
2492 					iBeg = (int)( m_pCur-m_sExpr );
2493 					while ( sphIsAttr(*m_pCur) )
2494 						m_pCur++;
2495 				}
2496 
2497 				// return packed string after the dot
2498 				int iLen = (int)( m_pCur-m_sExpr ) - iBeg;
2499 				lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
2500 				return bDigit ? TOK_DOT_NUMBER : TOK_SUBKEY;
2501 			}
2502 
2503 		case '\'':
2504 		case '"':
2505 			{
2506 				const char cEnd = *m_pCur;
2507 				for ( const char * s = m_pCur+1; *s; s++ )
2508 				{
2509 					if ( *s==cEnd )
2510 					{
2511 						int iBeg = (int)( m_pCur-m_sExpr );
2512 						int iLen = (int)( s-m_sExpr ) - iBeg + 1;
2513 						lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
2514 						m_pCur = s+1;
2515 						return TOK_CONST_STRING;
2516 
2517 					} else if ( *s=='\\' )
2518 					{
2519 						s++;
2520 						if ( !*s )
2521 							break;
2522 					}
2523 				}
2524 				m_sLexerError.SetSprintf ( "unterminated string constant near '%s'", m_pCur );
2525 				return -1;
2526 			}
2527 	}
2528 
2529 	m_sLexerError.SetSprintf ( "unknown operator '%c' near '%s'", *m_pCur, m_pCur );
2530 	return -1;
2531 }
2532 
2533 /// is add/sub?
IsAddSub(const ExprNode_t * pNode)2534 static inline bool IsAddSub ( const ExprNode_t * pNode )
2535 {
2536 	return pNode->m_iToken=='+' || pNode->m_iToken=='-';
2537 }
2538 
2539 /// is unary operator?
IsUnary(const ExprNode_t * pNode)2540 static inline bool IsUnary ( const ExprNode_t * pNode )
2541 {
2542 	return pNode->m_iToken==TOK_NEG || pNode->m_iToken==TOK_NOT;
2543 }
2544 
2545 /// is arithmetic?
IsAri(const ExprNode_t * pNode)2546 static inline bool IsAri ( const ExprNode_t * pNode )
2547 {
2548 	int iTok = pNode->m_iToken;
2549 	return iTok=='+' || iTok=='-' || iTok=='*' || iTok=='/';
2550 }
2551 
2552 /// is constant?
IsConst(const ExprNode_t * pNode)2553 static inline bool IsConst ( const ExprNode_t * pNode )
2554 {
2555 	return pNode->m_iToken==TOK_CONST_INT || pNode->m_iToken==TOK_CONST_FLOAT;
2556 }
2557 
2558 /// float value of a constant
FloatVal(const ExprNode_t * pNode)2559 static inline float FloatVal ( const ExprNode_t * pNode )
2560 {
2561 	assert ( IsConst(pNode) );
2562 	return pNode->m_iToken==TOK_CONST_INT
2563 		? (float)pNode->m_iConst
2564 		: pNode->m_fConst;
2565 }
2566 
CanonizePass(int iNode)2567 void ExprParser_t::CanonizePass ( int iNode )
2568 {
2569 	if ( iNode<0 )
2570 		return;
2571 
2572 	CanonizePass ( m_dNodes [ iNode ].m_iLeft );
2573 	CanonizePass ( m_dNodes [ iNode ].m_iRight );
2574 
2575 	ExprNode_t * pRoot = &m_dNodes [ iNode ];
2576 	ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
2577 	ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
2578 
2579 	// canonize (expr op const), move const to the left
2580 	if ( IsAri ( pRoot ) && !IsConst ( pLeft ) && IsConst ( pRight ) )
2581 	{
2582 		Swap ( pRoot->m_iLeft, pRoot->m_iRight );
2583 		Swap ( pLeft, pRight );
2584 
2585 		// fixup (expr-const) to ((-const)+expr)
2586 		if ( pRoot->m_iToken=='-' )
2587 		{
2588 			pRoot->m_iToken = '+';
2589 			if ( pLeft->m_iToken==TOK_CONST_INT )
2590 				pLeft->m_iConst *= -1;
2591 			else
2592 				pLeft->m_fConst *= -1;
2593 		}
2594 
2595 		// fixup (expr/const) to ((1/const)*expr)
2596 		if ( pRoot->m_iToken=='/' )
2597 		{
2598 			pRoot->m_iToken = '*';
2599 			pLeft->m_fConst = 1.0f / FloatVal ( pLeft );
2600 			pLeft->m_iToken = TOK_CONST_FLOAT;
2601 		}
2602 	}
2603 
2604 	// promote children constants
2605 	if ( IsAri ( pRoot ) && IsAri ( pLeft ) && IsAddSub ( pLeft )==IsAddSub ( pRoot ) &&
2606 		IsConst ( &m_dNodes [ pLeft->m_iLeft ] ) )
2607 	{
2608 		// ((const op lr) op2 right) gets replaced with (const op (lr op2/op right))
2609 		// constant gets promoted one level up
2610 		int iConst = pLeft->m_iLeft;
2611 		pLeft->m_iLeft = pLeft->m_iRight;
2612 		pLeft->m_iRight = pRoot->m_iRight; // (c op lr) -> (lr ... r)
2613 
2614 		switch ( pLeft->m_iToken )
2615 		{
2616 		case '+':
2617 		case '*':
2618 			// (c + lr) op r -> c + (lr op r)
2619 			// (c * lr) op r -> c * (lr op r)
2620 			Swap ( pLeft->m_iToken, pRoot->m_iToken );
2621 			break;
2622 
2623 		case '-':
2624 			// (c - lr) + r -> c - (lr - r)
2625 			// (c - lr) - r -> c - (lr + r)
2626 			pLeft->m_iToken = ( pRoot->m_iToken=='+' ? '-' : '+' );
2627 			pRoot->m_iToken = '-';
2628 			break;
2629 
2630 		case '/':
2631 			// (c / lr) * r -> c * (r / lr)
2632 			// (c / lr) / r -> c / (r * lr)
2633 			Swap ( pLeft->m_iLeft, pLeft->m_iRight );
2634 			pLeft->m_iToken = ( pRoot->m_iToken=='*' ) ? '/' : '*';
2635 			break;
2636 
2637 		default:
2638 			assert ( 0 && "internal error: unhandled op in left-const promotion" );
2639 		}
2640 
2641 		pRoot->m_iRight = pRoot->m_iLeft;
2642 		pRoot->m_iLeft = iConst;
2643 
2644 		pLeft = &m_dNodes [ pRoot->m_iLeft ];
2645 		pRight = &m_dNodes [ pRoot->m_iRight ];
2646 	}
2647 
2648 	// MySQL Workbench fixup
2649 	if ( pRoot->m_iToken==TOK_FUNC && ( pRoot->m_iFunc==FUNC_CURRENT_USER || pRoot->m_iFunc==FUNC_CONNECTION_ID ) )
2650 	{
2651 		pRoot->m_iToken = TOK_CONST_INT;
2652 		pRoot->m_iConst = 0;
2653 		return;
2654 	}
2655 }
2656 
ConstantFoldPass(int iNode)2657 void ExprParser_t::ConstantFoldPass ( int iNode )
2658 {
2659 	if ( iNode<0 )
2660 		return;
2661 
2662 	ConstantFoldPass ( m_dNodes [ iNode ].m_iLeft );
2663 	ConstantFoldPass ( m_dNodes [ iNode ].m_iRight );
2664 
2665 	ExprNode_t * pRoot = &m_dNodes [ iNode ];
2666 	ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
2667 	ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
2668 
2669 	// unary arithmetic expression with constant
2670 	if ( IsUnary ( pRoot ) && IsConst ( pLeft ) )
2671 	{
2672 		if ( pLeft->m_iToken==TOK_CONST_INT )
2673 		{
2674 			switch ( pRoot->m_iToken )
2675 			{
2676 				case TOK_NEG:	pRoot->m_iConst = -pLeft->m_iConst; break;
2677 				case TOK_NOT:	pRoot->m_iConst = !pLeft->m_iConst; break;
2678 				default:		assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
2679 			}
2680 
2681 		} else
2682 		{
2683 			switch ( pRoot->m_iToken )
2684 			{
2685 				case TOK_NEG:	pRoot->m_fConst = -pLeft->m_fConst; break;
2686 				case TOK_NOT:	pRoot->m_fConst = !pLeft->m_fConst; break;
2687 				default:		assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
2688 			}
2689 		}
2690 
2691 		pRoot->m_iToken = pLeft->m_iToken;
2692 		pRoot->m_iLeft = -1;
2693 		return;
2694 	}
2695 
2696 	// arithmetic expression with constants
2697 	if ( IsAri ( pRoot ) )
2698 	{
2699 		assert ( pLeft && pRight );
2700 
2701 		// optimize fully-constant expressions
2702 		if ( IsConst ( pLeft ) && IsConst ( pRight ) )
2703 		{
2704 			if ( pLeft->m_iToken==TOK_CONST_INT && pRight->m_iToken==TOK_CONST_INT && pRoot->m_iToken!='/' )
2705 			{
2706 				switch ( pRoot->m_iToken )
2707 				{
2708 					case '+':	pRoot->m_iConst = pLeft->m_iConst + pRight->m_iConst; break;
2709 					case '-':	pRoot->m_iConst = pLeft->m_iConst - pRight->m_iConst; break;
2710 					case '*':	pRoot->m_iConst = pLeft->m_iConst * pRight->m_iConst; break;
2711 					default:	assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
2712 				}
2713 				pRoot->m_iToken = TOK_CONST_INT;
2714 
2715 			} else
2716 			{
2717 				float fLeft = FloatVal ( pLeft );
2718 				float fRight = FloatVal ( pRight );
2719 				switch ( pRoot->m_iToken )
2720 				{
2721 					case '+':	pRoot->m_fConst = fLeft + fRight; break;
2722 					case '-':	pRoot->m_fConst = fLeft - fRight; break;
2723 					case '*':	pRoot->m_fConst = fLeft * fRight; break;
2724 					case '/':	pRoot->m_fConst = fRight ? fLeft / fRight : 0.0f; break;
2725 					default:	assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
2726 				}
2727 				pRoot->m_iToken = TOK_CONST_FLOAT;
2728 			}
2729 			pRoot->m_iLeft = -1;
2730 			pRoot->m_iRight = -1;
2731 			return;
2732 		}
2733 
2734 		// optimize compatible operations with constants
2735 		if ( IsConst ( pLeft ) && IsAri ( pRight ) && IsAddSub ( pRoot )==IsAddSub ( pRight ) &&
2736 			IsConst ( &m_dNodes [ pRight->m_iLeft ] ) )
2737 		{
2738 			ExprNode_t * pConst = &m_dNodes [ pRight->m_iLeft ];
2739 			ExprNode_t * pExpr = &m_dNodes [ pRight->m_iRight ];
2740 			assert ( !IsConst ( pExpr ) ); // must had been optimized
2741 
2742 			// optimize (left op (const op2 expr)) to ((left op const) op*op2 expr)
2743 			if ( IsAddSub ( pRoot ) )
2744 			{
2745 				// fold consts
2746 				int iSign = ( ( pRoot->m_iToken=='+' ) ? 1 : -1 );
2747 				if ( pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
2748 				{
2749 					pLeft->m_iConst += iSign*pConst->m_iConst;
2750 				} else
2751 				{
2752 					pLeft->m_fConst = FloatVal ( pLeft ) + iSign*FloatVal ( pConst );
2753 					pLeft->m_iToken = TOK_CONST_FLOAT;
2754 				}
2755 
2756 				// fold ops
2757 				pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '+' : '-';
2758 
2759 			} else
2760 			{
2761 				// fols consts
2762 				if ( pRoot->m_iToken=='*' && pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
2763 				{
2764 					pLeft->m_iConst *= pConst->m_iConst;
2765 				} else
2766 				{
2767 					if ( pRoot->m_iToken=='*' )
2768 						pLeft->m_fConst = FloatVal ( pLeft ) * FloatVal ( pConst );
2769 					else
2770 						pLeft->m_fConst = FloatVal ( pLeft ) / FloatVal ( pConst );
2771 					pLeft->m_iToken = TOK_CONST_FLOAT;
2772 				}
2773 
2774 				// fold ops
2775 				pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '*' : '/';
2776 			}
2777 
2778 			// promote expr arg
2779 			pRoot->m_iRight = pRight->m_iRight;
2780 			pRight = pExpr;
2781 		}
2782 	}
2783 
2784 	// unary function from a constant
2785 	if ( pRoot->m_iToken==TOK_FUNC && g_dFuncs [ pRoot->m_iFunc ].m_iArgs==1 && IsConst ( pLeft ) )
2786 	{
2787 		float fArg = pLeft->m_iToken==TOK_CONST_FLOAT ? pLeft->m_fConst : float ( pLeft->m_iConst );
2788 		switch ( pRoot->m_iFunc )
2789 		{
2790 			case FUNC_ABS:
2791 				pRoot->m_iToken = pLeft->m_iToken;
2792 				pRoot->m_iLeft = -1;
2793 				if ( pLeft->m_iToken==TOK_CONST_INT )
2794 					pRoot->m_iConst = IABS ( pLeft->m_iConst );
2795 				else
2796 					pRoot->m_fConst = (float)fabs ( fArg );
2797 				break;
2798 			case FUNC_CEIL:		pRoot->m_iToken = TOK_CONST_INT; pRoot->m_iLeft = -1; pRoot->m_iConst = (int)ceil ( fArg ); break;
2799 			case FUNC_FLOOR:	pRoot->m_iToken = TOK_CONST_INT; pRoot->m_iLeft = -1; pRoot->m_iConst = (int)floor ( fArg ); break;
2800 			case FUNC_SIN:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( sin ( fArg) ); break;
2801 			case FUNC_COS:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( cos ( fArg ) ); break;
2802 			case FUNC_LN:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float) log(fArg) : 0.0f; break;
2803 			case FUNC_LOG2:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)( log(fArg)*M_LOG2E ) : 0.0f; break;
2804 			case FUNC_LOG10:	pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)( log(fArg)*M_LOG10E ) : 0.0f; break;
2805 			case FUNC_EXP:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( exp ( fArg ) ); break;
2806 			case FUNC_SQRT:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)sqrt(fArg) : 0.0f; break;
2807 			default:			break;
2808 		}
2809 		return;
2810 	}
2811 
2812 	// constant function (such as NOW())
2813 	if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_NOW )
2814 	{
2815 		pRoot->m_iToken = TOK_CONST_INT;
2816 		pRoot->m_iConst = m_iConstNow;
2817 	}
2818 }
2819 
VariousOptimizationsPass(int iNode)2820 void ExprParser_t::VariousOptimizationsPass ( int iNode )
2821 {
2822 	if ( iNode<0 )
2823 		return;
2824 
2825 	VariousOptimizationsPass ( m_dNodes [ iNode ].m_iLeft );
2826 	VariousOptimizationsPass ( m_dNodes [ iNode ].m_iRight );
2827 
2828 	ExprNode_t * pRoot = &m_dNodes [ iNode ];
2829 	ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
2830 	ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
2831 
2832 	// madd, mul3
2833 	// FIXME! separate pass for these? otherwise (2+(a*b))+3 won't get const folding
2834 	if ( ( pRoot->m_iToken=='+' || pRoot->m_iToken=='*' ) && ( pLeft->m_iToken=='*' || pRight->m_iToken=='*' ) )
2835 	{
2836 		if ( pLeft->m_iToken!='*' )
2837 		{
2838 			Swap ( pRoot->m_iLeft, pRoot->m_iRight );
2839 			Swap ( pLeft, pRight );
2840 		}
2841 
2842 		pLeft->m_iToken = ',';
2843 
2844 		int iLeft = pRoot->m_iLeft;
2845 		int iRight = pRoot->m_iRight;
2846 
2847 		pRoot->m_iFunc = ( pRoot->m_iToken=='+' ) ? FUNC_MADD : FUNC_MUL3;
2848 		pRoot->m_iToken = TOK_FUNC;
2849 		pRoot->m_iLeft = m_dNodes.GetLength();
2850 		pRoot->m_iRight = -1;
2851 
2852 		ExprNode_t & tArgs = m_dNodes.Add(); // invalidates all pointers!
2853 		tArgs.m_iToken = ',';
2854 		tArgs.m_iLeft = iLeft;
2855 		tArgs.m_iRight = iRight;
2856 		return;
2857 	}
2858 
2859 	// division by a constant (replace with multiplication by inverse)
2860 	if ( pRoot->m_iToken=='/' && pRight->m_iToken==TOK_CONST_FLOAT )
2861 	{
2862 		pRight->m_fConst = 1.0f / pRight->m_fConst;
2863 		pRoot->m_iToken = '*';
2864 		return;
2865 	}
2866 
2867 
2868 	// SINT(int-attr)
2869 	if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_SINT )
2870 	{
2871 		assert ( pLeft );
2872 
2873 		if ( pLeft->m_iToken==TOK_ATTR_INT || pLeft->m_iToken==TOK_ATTR_BITS )
2874 		{
2875 			pRoot->m_iToken = TOK_ATTR_SINT;
2876 			pRoot->m_tLocator = pLeft->m_tLocator;
2877 			pRoot->m_iLeft = -1;
2878 		}
2879 	}
2880 }
2881 
2882 /// optimize subtree
Optimize(int iNode)2883 void ExprParser_t::Optimize ( int iNode )
2884 {
2885 	CanonizePass ( iNode );
2886 	ConstantFoldPass ( iNode );
2887 	VariousOptimizationsPass ( iNode );
2888 }
2889 
2890 
2891 // debug dump
Dump(int iNode)2892 void ExprParser_t::Dump ( int iNode )
2893 {
2894 	if ( iNode<0 )
2895 		return;
2896 
2897 	ExprNode_t & tNode = m_dNodes[iNode];
2898 	switch ( tNode.m_iToken )
2899 	{
2900 		case TOK_CONST_INT:
2901 			printf ( INT64_FMT, tNode.m_iConst );
2902 			break;
2903 
2904 		case TOK_CONST_FLOAT:
2905 			printf ( "%f", tNode.m_fConst );
2906 			break;
2907 
2908 		case TOK_ATTR_INT:
2909 		case TOK_ATTR_SINT:
2910 			printf ( "row[%d]", tNode.m_tLocator.m_iBitOffset/32 );
2911 			break;
2912 
2913 		default:
2914 			printf ( "(" );
2915 			Dump ( tNode.m_iLeft );
2916 			printf ( ( tNode.m_iToken<256 ) ? " %c " : " op-%d ", tNode.m_iToken );
2917 			Dump ( tNode.m_iRight );
2918 			printf ( ")" );
2919 			break;
2920 	}
2921 }
2922 
2923 
2924 /// fold arglist into array
FoldArglist(ISphExpr * pLeft,CSphVector<ISphExpr * > & dArgs)2925 static void FoldArglist ( ISphExpr * pLeft, CSphVector<ISphExpr *> & dArgs )
2926 {
2927 	if ( !pLeft || !pLeft->IsArglist() )
2928 	{
2929 		dArgs.Add ( pLeft );
2930 		return;
2931 	}
2932 
2933 	Expr_Arglist_c * pArgs = (Expr_Arglist_c *)pLeft;
2934 	Swap ( dArgs, pArgs->m_dArgs );
2935 	SafeRelease ( pLeft );
2936 }
2937 
2938 
2939 typedef sphinx_int64_t ( *UdfInt_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
2940 typedef double ( *UdfDouble_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
2941 typedef char * ( *UdfCharptr_fn) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
2942 
2943 
2944 class Expr_Udf_c : public ISphExpr
2945 {
2946 public:
2947 	CSphVector<ISphExpr*>			m_dArgs;
2948 	CSphVector<int>					m_dArgs2Free;
2949 
2950 protected:
2951 	UdfCall_t *						m_pCall;
2952 	mutable CSphVector<int64_t>		m_dArgvals;
2953 	mutable char					m_bError;
2954 	CSphQueryProfile *				m_pProfiler;
2955 
2956 public:
Expr_Udf_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)2957 	explicit Expr_Udf_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
2958 		: m_pCall ( pCall )
2959 		, m_bError ( 0 )
2960 		, m_pProfiler ( pProfiler )
2961 	{
2962 		SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
2963 
2964 		assert ( tArgs.arg_values==NULL );
2965 		tArgs.arg_values = new char * [ tArgs.arg_count ];
2966 		tArgs.str_lengths = new int [ tArgs.arg_count ];
2967 
2968 		m_dArgs2Free = pCall->m_dArgs2Free;
2969 		m_dArgvals.Resize ( tArgs.arg_count );
2970 		ARRAY_FOREACH ( i, m_dArgvals )
2971 			tArgs.arg_values[i] = (char*) &m_dArgvals[i];
2972 	}
2973 
~Expr_Udf_c()2974 	~Expr_Udf_c ()
2975 	{
2976 		if ( m_pCall->m_pUdf->m_fnDeinit )
2977 			m_pCall->m_pUdf->m_fnDeinit ( &m_pCall->m_tInit );
2978 		SafeDelete ( m_pCall );
2979 
2980 		ARRAY_FOREACH ( i, m_dArgs )
2981 			SafeRelease ( m_dArgs[i] );
2982 	}
2983 
FillArgs(const CSphMatch & tMatch) const2984 	void FillArgs ( const CSphMatch & tMatch ) const
2985 	{
2986 		// FIXME? a cleaner way to reinterpret?
2987 		SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
2988 		ARRAY_FOREACH ( i, m_dArgs )
2989 		{
2990 			switch ( tArgs.arg_types[i] )
2991 			{
2992 				case SPH_UDF_TYPE_UINT32:		*(DWORD*)&m_dArgvals[i] = m_dArgs[i]->IntEval ( tMatch ); break;
2993 				case SPH_UDF_TYPE_INT64:		m_dArgvals[i] = m_dArgs[i]->Int64Eval ( tMatch ); break;
2994 				case SPH_UDF_TYPE_FLOAT:		*(float*)&m_dArgvals[i] = m_dArgs[i]->Eval ( tMatch ); break;
2995 				case SPH_UDF_TYPE_STRING:		tArgs.str_lengths[i] = m_dArgs[i]->StringEval ( tMatch, (const BYTE**)&tArgs.arg_values[i] ); break;
2996 				case SPH_UDF_TYPE_UINT32SET:	tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
2997 				case SPH_UDF_TYPE_UINT64SET:	tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
2998 				case SPH_UDF_TYPE_FACTORS:		tArgs.arg_values[i] = (char*) m_dArgs[i]->FactorEval ( tMatch ); break;
2999 				default:						assert ( 0 ); m_dArgvals[i] = 0; break;
3000 			}
3001 		}
3002 	}
3003 
FreeArgs() const3004 	void FreeArgs() const
3005 	{
3006 		ARRAY_FOREACH ( i, m_dArgs2Free )
3007 		{
3008 			int iAttr = m_dArgs2Free[i];
3009 			SafeDeleteArray ( m_pCall->m_tArgs.arg_values[iAttr] );
3010 		}
3011 	}
3012 
Command(ESphExprCommand eCmd,void * pArg)3013 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
3014 	{
3015 		if ( eCmd==SPH_EXPR_GET_UDF )
3016 		{
3017 			*((bool*)pArg) = true;
3018 			return;
3019 		}
3020 		ARRAY_FOREACH ( i, m_dArgs )
3021 			m_dArgs[i]->Command ( eCmd, pArg );
3022 	}
3023 };
3024 
3025 
3026 class Expr_UdfInt_c : public Expr_Udf_c
3027 {
3028 public:
Expr_UdfInt_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)3029 	explicit Expr_UdfInt_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
3030 		: Expr_Udf_c ( pCall, pProfiler )
3031 	{
3032 		assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_INTEGER || pCall->m_pUdf->m_eRetType==SPH_ATTR_BIGINT );
3033 	}
3034 
Int64Eval(const CSphMatch & tMatch) const3035 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
3036 	{
3037 		ESphQueryState eOld = SPH_QSTATE_TOTAL;
3038 		if ( m_pProfiler )
3039 			eOld = m_pProfiler->Switch ( SPH_QSTATE_EVAL_UDF );
3040 
3041 		if ( m_bError )
3042 		{
3043 			if ( m_pProfiler )
3044 				m_pProfiler->Switch ( eOld );
3045 			return 0;
3046 		}
3047 
3048 		FillArgs ( tMatch );
3049 		UdfInt_fn pFn = (UdfInt_fn) m_pCall->m_pUdf->m_fnFunc;
3050 		int64_t iRes = pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
3051 		FreeArgs();
3052 
3053 		if ( m_pProfiler )
3054 			m_pProfiler->Switch ( eOld );
3055 		return iRes;
3056 	}
3057 
IntEval(const CSphMatch & tMatch) const3058 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Int64Eval ( tMatch ); }
Eval(const CSphMatch & tMatch) const3059 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) Int64Eval ( tMatch ); }
3060 };
3061 
3062 
3063 class Expr_UdfFloat_c : public Expr_Udf_c
3064 {
3065 public:
Expr_UdfFloat_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)3066 	explicit Expr_UdfFloat_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
3067 		: Expr_Udf_c ( pCall, pProfiler )
3068 	{
3069 		assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_FLOAT );
3070 	}
3071 
Eval(const CSphMatch & tMatch) const3072 	virtual float Eval ( const CSphMatch & tMatch ) const
3073 	{
3074 		ESphQueryState eOld = SPH_QSTATE_TOTAL;
3075 		if ( m_pProfiler )
3076 			eOld = m_pProfiler->Switch ( SPH_QSTATE_EVAL_UDF );
3077 
3078 		if ( m_bError )
3079 		{
3080 			if ( m_pProfiler )
3081 				m_pProfiler->Switch ( eOld );
3082 			return 0;
3083 		}
3084 
3085 		FillArgs ( tMatch );
3086 		UdfDouble_fn pFn = (UdfDouble_fn) m_pCall->m_pUdf->m_fnFunc;
3087 		float fRes = (float) pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
3088 		FreeArgs();
3089 
3090 		if ( m_pProfiler )
3091 			m_pProfiler->Switch ( eOld );
3092 		return fRes;
3093 	}
3094 
IntEval(const CSphMatch & tMatch) const3095 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Eval ( tMatch ); }
Int64Eval(const CSphMatch & tMatch) const3096 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t) Eval ( tMatch ); }
3097 };
3098 
3099 
3100 class Expr_UdfStringptr_c : public Expr_Udf_c
3101 {
3102 public:
Expr_UdfStringptr_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)3103 	explicit Expr_UdfStringptr_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
3104 		: Expr_Udf_c ( pCall, pProfiler )
3105 	{
3106 		assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_STRINGPTR );
3107 	}
3108 
Eval(const CSphMatch &) const3109 	virtual float Eval ( const CSphMatch & ) const
3110 	{
3111 		assert ( 0 && "internal error: stringptr udf evaluated as float" );
3112 		return 0.0f;
3113 	}
3114 
IntEval(const CSphMatch &) const3115 	virtual int IntEval ( const CSphMatch & ) const
3116 	{
3117 		assert ( 0 && "internal error: stringptr udf evaluated as int" );
3118 		return 0;
3119 	}
3120 
Int64Eval(const CSphMatch &) const3121 	virtual int64_t Int64Eval ( const CSphMatch & ) const
3122 	{
3123 		assert ( 0 && "internal error: stringptr udf evaluated as bigint" );
3124 		return 0;
3125 	}
3126 
StringEval(const CSphMatch & tMatch,const BYTE ** ppStr) const3127 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
3128 	{
3129 		ESphQueryState eOld = SPH_QSTATE_TOTAL;
3130 		if ( m_pProfiler )
3131 			eOld = m_pProfiler->Switch ( SPH_QSTATE_EVAL_UDF );
3132 
3133 		FillArgs ( tMatch );
3134 		UdfCharptr_fn pFn = (UdfCharptr_fn) m_pCall->m_pUdf->m_fnFunc;
3135 		char * pRes = pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError ); // owned now!
3136 		*ppStr = (const BYTE*) pRes;
3137 		int iLen = ( pRes ? strlen(pRes) : 0 );
3138 		FreeArgs();
3139 
3140 		if ( m_pProfiler )
3141 			m_pProfiler->Switch ( eOld );
3142 
3143 		return iLen;
3144 	}
3145 
IsStringPtr() const3146 	virtual bool IsStringPtr() const
3147 	{
3148 		return true;
3149 	}
3150 };
3151 
3152 
CreateUdfNode(int iCall,ISphExpr * pLeft)3153 ISphExpr * ExprParser_t::CreateUdfNode ( int iCall, ISphExpr * pLeft )
3154 {
3155 	Expr_Udf_c * pRes = NULL;
3156 	switch ( m_dUdfCalls[iCall]->m_pUdf->m_eRetType )
3157 	{
3158 		case SPH_ATTR_INTEGER:
3159 		case SPH_ATTR_BIGINT:
3160 			pRes = new Expr_UdfInt_c ( m_dUdfCalls[iCall], m_pProfiler );
3161 			break;
3162 		case SPH_ATTR_FLOAT:
3163 			pRes = new Expr_UdfFloat_c ( m_dUdfCalls[iCall], m_pProfiler );
3164 			break;
3165 		case SPH_ATTR_STRINGPTR:
3166 			pRes = new Expr_UdfStringptr_c ( m_dUdfCalls[iCall], m_pProfiler );
3167 			break;
3168 		default:
3169 			m_sCreateError.SetSprintf ( "internal error: unhandled type %d in CreateUdfNode()", m_dUdfCalls[iCall]->m_pUdf->m_eRetType );
3170 			break;
3171 	}
3172 	if ( pRes )
3173 	{
3174 		if ( pLeft )
3175 			FoldArglist ( pLeft, pRes->m_dArgs );
3176 		m_dUdfCalls[iCall] = NULL; // evaluator owns it now
3177 	}
3178 	return pRes;
3179 }
3180 
3181 
CreateExistNode(const ExprNode_t & tNode)3182 ISphExpr * ExprParser_t::CreateExistNode ( const ExprNode_t & tNode )
3183 {
3184 	assert ( m_dNodes[tNode.m_iLeft].m_iToken==',' );
3185 	int iAttrName = m_dNodes[tNode.m_iLeft].m_iLeft;
3186 	int iAttrDefault = m_dNodes[tNode.m_iLeft].m_iRight;
3187 	assert ( iAttrName>=0 && iAttrName<m_dNodes.GetLength()
3188 		&& iAttrDefault>=0 && iAttrDefault<m_dNodes.GetLength() );
3189 
3190 	int iNameStart = (int)( m_dNodes[iAttrName].m_iConst>>32 );
3191 	int iNameLen = (int)( m_dNodes[iAttrName].m_iConst & 0xffffffffUL );
3192 	// skip head and tail non attribute name symbols
3193 	while ( m_sExpr[iNameStart]!='\0' && ( m_sExpr[iNameStart]=='\'' || m_sExpr[iNameStart]==' ' ) && iNameLen )
3194 	{
3195 		iNameStart++;
3196 		iNameLen--;
3197 	}
3198 	while ( m_sExpr[iNameStart+iNameLen-1]!='\0'
3199 		&& ( m_sExpr[iNameStart+iNameLen-1]=='\'' || m_sExpr[iNameStart+iNameLen-1]==' ' )
3200 		&& iNameLen )
3201 	{
3202 		iNameLen--;
3203 	}
3204 
3205 	if ( iNameLen<=0 )
3206 	{
3207 		m_sCreateError.SetSprintf ( "first EXIST() argument must be valid string" );
3208 		return NULL;
3209 	}
3210 
3211 	assert ( iNameStart>=0 && iNameLen>0 && iNameStart+iNameLen<=(int)strlen ( m_sExpr ) );
3212 
3213 	CSphString sAttr ( m_sExpr+iNameStart, iNameLen );
3214 	sphColumnToLowercase ( const_cast<char *>( sAttr.cstr() ) );
3215 	int iLoc = m_pSchema->GetAttrIndex ( sAttr.cstr() );
3216 
3217 	if ( iLoc>=0 )
3218 	{
3219 		const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iLoc );
3220 		if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET || tCol.m_eAttrType==SPH_ATTR_STRING )
3221 		{
3222 			m_sCreateError = "MVA and STRING in EXIST() prohibited";
3223 			return NULL;
3224 		}
3225 
3226 		const CSphAttrLocator & tLoc = tCol.m_tLocator;
3227 		if ( tNode.m_eRetType==SPH_ATTR_FLOAT )
3228 			return new Expr_GetFloat_c ( tLoc, iLoc );
3229 		else
3230 			return new Expr_GetInt_c ( tLoc, iLoc );
3231 	} else
3232 	{
3233 		if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
3234 			return new Expr_GetIntConst_c ( (int)m_dNodes[iAttrDefault].m_iConst );
3235 		else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
3236 			return new Expr_GetInt64Const_c ( m_dNodes[iAttrDefault].m_iConst );
3237 		else
3238 			return new Expr_GetConst_c ( m_dNodes[iAttrDefault].m_fConst );
3239 	}
3240 }
3241 
3242 //////////////////////////////////////////////////////////////////////////
3243 
3244 class Expr_Contains_c : public ISphExpr
3245 {
3246 protected:
3247 	ISphExpr * m_pLat;
3248 	ISphExpr * m_pLon;
3249 
Contains(float x,float y,int n,const float * p)3250 	static bool Contains ( float x, float y, int n, const float * p )
3251 	{
3252 		bool bIn = false;
3253 		for ( int ii=0; ii<n; ii+=2 )
3254 		{
3255 			// get that edge
3256 			float ax = p[ii];
3257 			float ay = p[ii+1];
3258 			float bx = ( ii==n-2 ) ? p[0] : p[ii+2];
3259 			float by = ( ii==n-2 ) ? p[1] : p[ii+3];
3260 
3261 			// check point vs edge
3262 			float t1 = (x-ax)*(by-ay);
3263 			float t2 = (y-ay)*(bx-ax);
3264 			if ( t1==t2 && !( ax==bx && ay==by ) )
3265 			{
3266 				// so AP and AB are colinear
3267 				// because (AP dot (-AB.y, AB.x)) aka (t1-t2) is 0
3268 				// check (AP dot AB) vs (AB dot AB) then
3269 				float t3 = (x-ax)*(bx-ax) + (y-ay)*(by-ay); // AP dot AP
3270 				float t4 = (bx-ax)*(bx-ax) + (by-ay)*(by-ay); // AB dot AB
3271 				if ( t3>=0 && t3<=t4 )
3272 					return true;
3273 			}
3274 
3275 			// count edge crossings
3276 			if ( ( ay>y )!=(by>y) )
3277 				if ( ( t1<t2 ) ^ ( by<ay ) )
3278 					bIn = !bIn;
3279 		}
3280 		return bIn;
3281 	}
3282 
3283 public:
Expr_Contains_c(ISphExpr * pLat,ISphExpr * pLon)3284 	Expr_Contains_c ( ISphExpr * pLat, ISphExpr * pLon )
3285 		: m_pLat ( pLat )
3286 		, m_pLon ( pLon )
3287 	{}
3288 
~Expr_Contains_c()3289 	~Expr_Contains_c()
3290 	{
3291 		SafeRelease ( m_pLat );
3292 		SafeRelease ( m_pLon );
3293 	}
3294 
Eval(const CSphMatch & tMatch) const3295 	virtual float Eval ( const CSphMatch & tMatch ) const
3296 	{
3297 		return (float)IntEval ( tMatch );
3298 	}
3299 
Int64Eval(const CSphMatch & tMatch) const3300 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
3301 	{
3302 		return IntEval ( tMatch );
3303 	}
3304 
Command(ESphExprCommand eCmd,void * pArg)3305 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
3306 	{
3307 		m_pLat->Command ( eCmd, pArg );
3308 		m_pLon->Command ( eCmd, pArg );
3309 	}
3310 
3311 	// FIXME! implement SetStringPool?
3312 };
3313 
3314 //////////////////////////////////////////////////////////////////////////
3315 // GEODISTANCE
3316 //////////////////////////////////////////////////////////////////////////
3317 
3318 // conversions between degrees and radians
3319 static const double PI = 3.14159265358979323846;
3320 static const double TO_RAD = PI / 180.0;
3321 static const double TO_RAD2 = PI / 360.0;
3322 static const double TO_DEG = 180.0 / PI;
3323 static const float TO_RADF = (float)( PI / 180.0 );
3324 static const float TO_RADF2 = (float)( PI / 360.0 );
3325 static const float TO_DEGF = (float)( 180.0 / PI );
3326 
3327 const int GEODIST_TABLE_COS		= 1024; // maxerr 0.00063%
3328 const int GEODIST_TABLE_ASIN	= 512;
3329 const int GEODIST_TABLE_K		= 1024;
3330 
3331 static float g_GeoCos[GEODIST_TABLE_COS+1];		///< cos(x) table
3332 static float g_GeoAsin[GEODIST_TABLE_ASIN+1];	///< asin(sqrt(x)) table
3333 static float g_GeoFlatK[GEODIST_TABLE_K+1][2];	///< GeodistAdaptive() flat ellipsoid method k1,k2 coeffs table
3334 
3335 
GeodistInit()3336 void GeodistInit()
3337 {
3338 	for ( int i=0; i<=GEODIST_TABLE_COS; i++ )
3339 		g_GeoCos[i] = (float)cos ( 2*PI*i/GEODIST_TABLE_COS ); // [0, 2pi] -> [0, COSTABLE]
3340 
3341 	for ( int i=0; i<=GEODIST_TABLE_ASIN; i++ )
3342 		g_GeoAsin[i] = (float)asin ( sqrt ( double(i)/GEODIST_TABLE_ASIN ) ); // [0, 1] -> [0, ASINTABLE]
3343 
3344 	for ( int i=0; i<=GEODIST_TABLE_K; i++ )
3345 	{
3346 		double x = PI*i/GEODIST_TABLE_K - PI*0.5; // [-pi/2, pi/2] -> [0, KTABLE]
3347 		g_GeoFlatK[i][0] = (float) sqr ( 111132.09 - 566.05*cos ( 2*x ) + 1.20*cos ( 4*x ) );
3348 		g_GeoFlatK[i][1] = (float) sqr ( 111415.13*cos(x) - 94.55*cos ( 3*x ) + 0.12*cos ( 5*x ) );
3349 	}
3350 }
3351 
3352 
GeodistSphereRad(float lat1,float lon1,float lat2,float lon2)3353 inline float GeodistSphereRad ( float lat1, float lon1, float lat2, float lon2 )
3354 {
3355 	static const double D = 2*6384000;
3356 	double dlat2 = 0.5*( lat1 - lat2 );
3357 	double dlon2 = 0.5*( lon1 - lon2 );
3358 	double a = sqr ( sin(dlat2) ) + cos(lat1)*cos(lat2)*sqr ( sin(dlon2) );
3359 	double c = asin ( Min ( 1.0, sqrt(a) ) );
3360 	return (float)(D*c);
3361 }
3362 
3363 
GeodistSphereDeg(float lat1,float lon1,float lat2,float lon2)3364 inline float GeodistSphereDeg ( float lat1, float lon1, float lat2, float lon2 )
3365 {
3366 	static const double D = 2*6384000;
3367 	double dlat2 = TO_RAD2*( lat1 - lat2 );
3368 	double dlon2 = TO_RAD2*( lon1 - lon2 );
3369 	double a = sqr ( sin(dlat2) ) + cos ( TO_RAD*lat1 )*cos ( TO_RAD*lat2 )*sqr ( sin(dlon2) );
3370 	double c = asin ( Min ( 1.0, sqrt(a) ) );
3371 	return (float)(D*c);
3372 }
3373 
3374 
GeodistDegDiff(float f)3375 static inline float GeodistDegDiff ( float f )
3376 {
3377 	f = (float)fabs(f);
3378 	while ( f>360 )
3379 		f -= 360;
3380 	if ( f>180 )
3381 		f = 360-f;
3382 	return f;
3383 }
3384 
3385 
GeodistFlatDeg(float fLat1,float fLon1,float fLat2,float fLon2)3386 float GeodistFlatDeg ( float fLat1, float fLon1, float fLat2, float fLon2 )
3387 {
3388 	double c1 = cos ( TO_RAD2*( fLat1+fLat2 ) );
3389 	double c2 = 2*c1*c1-1; // cos(2*t)
3390 	double c3 = c1*(2*c2-1); // cos(3*t)
3391 	double k1 = 111132.09 - 566.05*c2;
3392 	double k2 = 111415.13*c1 - 94.55*c3;
3393 	float dlat = GeodistDegDiff ( fLat1-fLat2 );
3394 	float dlon = GeodistDegDiff ( fLon1-fLon2 );
3395 	return (float)sqrt ( k1*k1*dlat*dlat + k2*k2*dlon*dlon );
3396 }
3397 
3398 
GeodistFastCos(float x)3399 static inline float GeodistFastCos ( float x )
3400 {
3401 	float y = (float)(fabs(x)*GEODIST_TABLE_COS/PI/2);
3402 	int i = int(y);
3403 	y -= i;
3404 	i &= ( GEODIST_TABLE_COS-1 );
3405 	return g_GeoCos[i] + ( g_GeoCos[i+1]-g_GeoCos[i] )*y;
3406 }
3407 
3408 
GeodistFastSin(float x)3409 static inline float GeodistFastSin ( float x )
3410 {
3411 	float y = float(fabs(x)*GEODIST_TABLE_COS/PI/2);
3412 	int i = int(y);
3413 	y -= i;
3414 	i = ( i - GEODIST_TABLE_COS/4 ) & ( GEODIST_TABLE_COS-1 ); // cos(x-pi/2)=sin(x), costable/4=pi/2
3415 	return g_GeoCos[i] + ( g_GeoCos[i+1]-g_GeoCos[i] )*y;
3416 }
3417 
3418 
3419 /// fast implementation of asin(sqrt(x))
3420 /// max error in floats 0.00369%, in doubles 0.00072%
GeodistFastAsinSqrt(float x)3421 static inline float GeodistFastAsinSqrt ( float x )
3422 {
3423 	if ( x<0.122 )
3424 	{
3425 		// distance under 4546km, Taylor error under 0.00072%
3426 		float y = (float)sqrt(x);
3427 		return y + x*y*0.166666666666666f + x*x*y*0.075f + x*x*x*y*0.044642857142857f;
3428 	}
3429 	if ( x<0.948 )
3430 	{
3431 		// distance under 17083km, 512-entry LUT error under 0.00072%
3432 		x *= GEODIST_TABLE_ASIN;
3433 		int i = int(x);
3434 		return g_GeoAsin[i] + ( g_GeoAsin[i+1] - g_GeoAsin[i] )*( x-i );
3435 	}
3436 	return (float)asin ( sqrt(x) ); // distance over 17083km, just compute honestly
3437 }
3438 
3439 
GeodistAdaptiveDeg(float lat1,float lon1,float lat2,float lon2)3440 inline float GeodistAdaptiveDeg ( float lat1, float lon1, float lat2, float lon2 )
3441 {
3442 	float dlat = GeodistDegDiff ( lat1-lat2 );
3443 	float dlon = GeodistDegDiff ( lon1-lon2 );
3444 
3445 	if ( dlon<13 )
3446 	{
3447 		// points are close enough; use flat ellipsoid model
3448 		// interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint
3449 		float m = ( lat1+lat2+180 )*GEODIST_TABLE_K/360; // [-90, 90] degrees -> [0, KTABLE] indexes
3450 		int i = int(m);
3451 		i &= ( GEODIST_TABLE_K-1 );
3452 		float kk1 = g_GeoFlatK[i][0] + ( g_GeoFlatK[i+1][0] - g_GeoFlatK[i][0] )*( m-i );
3453 		float kk2 = g_GeoFlatK[i][1] + ( g_GeoFlatK[i+1][1] - g_GeoFlatK[i][1] )*( m-i );
3454 		return (float)sqrt ( kk1*dlat*dlat + kk2*dlon*dlon );
3455 	} else
3456 	{
3457 		// points too far away; use haversine
3458 		static const float D = 2*6371000;
3459 		float a = fsqr ( GeodistFastSin ( dlat*TO_RADF2 ) ) + GeodistFastCos ( lat1*TO_RADF ) * GeodistFastCos ( lat2*TO_RADF ) * fsqr ( GeodistFastSin ( dlon*TO_RADF2 ) );
3460 		return (float)( D*GeodistFastAsinSqrt(a) );
3461 	}
3462 }
3463 
3464 
GeodistAdaptiveRad(float lat1,float lon1,float lat2,float lon2)3465 inline float GeodistAdaptiveRad ( float lat1, float lon1, float lat2, float lon2 )
3466 {
3467 	// cut-paste-optimize, maybe?
3468 	return GeodistAdaptiveDeg ( lat1*TO_DEGF, lon1*TO_DEGF, lat2*TO_DEGF, lon2*TO_DEGF );
3469 }
3470 
3471 
GeoTesselate(CSphVector<float> & dIn)3472 static inline void GeoTesselate ( CSphVector<float> & dIn )
3473 {
3474 	// 1 minute of latitude, max
3475 	// (it varies from 1842.9 to 1861.57 at 0 to 90 respectively)
3476 	static const float LAT_MINUTE = 1861.57f;
3477 
3478 	// 1 minute of longitude in metres, at different latitudes
3479 	static const float LON_MINUTE[] =
3480 	{
3481 		1855.32f, 1848.31f, 1827.32f, 1792.51f, // 0, 5, 10, 15
3482 		1744.12f, 1682.50f, 1608.10f, 1521.47f, // 20, 25, 30, 35
3483 		1423.23f, 1314.11f, 1194.93f, 1066.57f, // 40, 45, 50, 55
3484 		930.00f, 786.26f, 636.44f, 481.70f, // 60, 65 70, 75
3485 		323.22f, 162.24f, 0.0f // 80, 85, 90
3486 	};
3487 
3488 	// tesselation threshold
3489 	// FIXME! make this configurable?
3490 	static const float TESSELATE_TRESH = 500000.0f; // 500 km, error under 150m or 0.03%
3491 
3492 	CSphVector<float> dOut;
3493 	for ( int i=0; i<dIn.GetLength(); i+=2 )
3494 	{
3495 		// add the current vertex in any event
3496 		dOut.Add ( dIn[i] );
3497 		dOut.Add ( dIn[i+1] );
3498 
3499 		// get edge lat/lon, convert to radians
3500 		bool bLast = ( i==dIn.GetLength()-2 );
3501 		float fLat1 = dIn[i];
3502 		float fLon1 = dIn[i+1];
3503 		float fLat2 = dIn [ bLast ? 0 : (i+2) ];
3504 		float fLon2 = dIn [ bLast ? 1 : (i+3) ];
3505 
3506 		// quick rough geodistance estimation
3507 		float fMinLat = Min ( fLat1, fLat2 );
3508 		int iLatBand = (int) floor ( fabs ( fMinLat ) / 5.0f );
3509 		iLatBand = iLatBand % 18;
3510 
3511 		float d = (float) (60.0f*( LAT_MINUTE*fabs ( fLat1-fLat2 ) + LON_MINUTE [ iLatBand ]*fabs ( fLon1-fLon2 ) ) );
3512 		if ( d<=TESSELATE_TRESH )
3513 			continue;
3514 
3515 		// convert to radians
3516 		// FIXME! make units configurable
3517 		fLat1 *= TO_RADF;
3518 		fLon1 *= TO_RADF;
3519 		fLat2 *= TO_RADF;
3520 		fLon2 *= TO_RADF;
3521 
3522 		// compute precise geodistance
3523 		d = GeodistSphereRad ( fLat1, fLon1, fLat2, fLon2 );
3524 		if ( d<=TESSELATE_TRESH )
3525 			continue;
3526 		int iSegments = (int) ceil ( d / TESSELATE_TRESH );
3527 
3528 		// compute arc distance
3529 		// OPTIMIZE! maybe combine with CalcGeodist?
3530 		d = (float)acos ( sin(fLat1)*sin(fLat2) + cos(fLat1)*cos(fLat2)*cos(fLon1-fLon2) );
3531 		const float isd = (float)(1.0f / sin(d));
3532 		const float clat1 = (float)cos(fLat1);
3533 		const float slat1 = (float)sin(fLat1);
3534 		const float clon1 = (float)cos(fLon1);
3535 		const float slon1 = (float)sin(fLon1);
3536 		const float clat2 = (float)cos(fLat2);
3537 		const float slat2 = (float)sin(fLat2);
3538 		const float clon2 = (float)cos(fLon2);
3539 		const float slon2 = (float)sin(fLon2);
3540 
3541 		for ( int j=1; j<iSegments; j++ )
3542 		{
3543 			float f = float(j) / float(iSegments); // needed distance fraction
3544 			float a = (float)sin ( (1-f)*d ) * isd;
3545 			float b = (float)sin ( f*d ) * isd;
3546 			float x = a*clat1*clon1 + b*clat2*clon2;
3547 			float y = a*clat1*slon1 + b*clat2*slon2;
3548 			float z = a*slat1 + b*slat2;
3549 			dOut.Add ( (float)( TO_DEG * atan2 ( z, sqrt ( x*x+y*y ) ) ) );
3550 			dOut.Add ( (float)( TO_DEG * atan2 ( y, x ) ) );
3551 		}
3552 	}
3553 
3554 	// swap 'em results
3555 	dIn.SwapData ( dOut );
3556 }
3557 
3558 //////////////////////////////////////////////////////////////////////////
3559 
3560 class Expr_ContainsConstvec_c : public Expr_Contains_c
3561 {
3562 protected:
3563 	CSphVector<float> m_dPoly;
3564 	float m_fMinX;
3565 	float m_fMinY;
3566 	float m_fMaxX;
3567 	float m_fMaxY;
3568 
3569 public:
Expr_ContainsConstvec_c(ISphExpr * pLat,ISphExpr * pLon,const CSphVector<int> & dNodes,const ExprNode_t * pNodes,bool bGeoTesselate)3570 	Expr_ContainsConstvec_c ( ISphExpr * pLat, ISphExpr * pLon, const CSphVector<int> & dNodes, const ExprNode_t * pNodes, bool bGeoTesselate )
3571 		: Expr_Contains_c ( pLat, pLon )
3572 	{
3573 		// copy polygon data
3574 		assert ( dNodes.GetLength()>=6 );
3575 		m_dPoly.Resize ( dNodes.GetLength() );
3576 
3577 		ARRAY_FOREACH ( i, dNodes )
3578 			m_dPoly[i] = FloatVal ( &pNodes[dNodes[i]] );
3579 
3580 		// handle (huge) geosphere polygons
3581 		if ( bGeoTesselate )
3582 			GeoTesselate ( m_dPoly );
3583 
3584 		// compute bbox
3585 		m_fMinX = m_fMaxX = m_dPoly[0];
3586 		for ( int i=2; i<m_dPoly.GetLength(); i+=2 )
3587 		{
3588 			m_fMinX = Min ( m_fMinX, m_dPoly[i] );
3589 			m_fMaxX = Max ( m_fMaxX, m_dPoly[i] );
3590 		}
3591 
3592 		m_fMinY = m_fMaxY = m_dPoly[1];
3593 		for ( int i=3; i<m_dPoly.GetLength(); i+=2 )
3594 		{
3595 			m_fMinY = Min ( m_fMinY, m_dPoly[i] );
3596 			m_fMaxY = Max ( m_fMaxY, m_dPoly[i] );
3597 		}
3598 	}
3599 
IntEval(const CSphMatch & tMatch) const3600 	virtual int IntEval ( const CSphMatch & tMatch ) const
3601 	{
3602 		// eval args, do bbox check
3603 		float fLat = m_pLat->Eval(tMatch);
3604 		if ( fLat<m_fMinX || fLat>m_fMaxX )
3605 			return 0;
3606 
3607 		float fLon = m_pLon->Eval(tMatch);
3608 		if ( fLon<m_fMinY || fLon>m_fMaxY )
3609 			return 0;
3610 
3611 		// do the polygon check
3612 		return Contains ( fLat, fLon, m_dPoly.GetLength(), m_dPoly.Begin() );
3613 	}
3614 };
3615 
3616 
3617 class Expr_ContainsExprvec_c : public Expr_Contains_c
3618 {
3619 protected:
3620 	mutable CSphVector<float> m_dPoly;
3621 	CSphVector<ISphExpr*> m_dExpr;
3622 
3623 public:
Expr_ContainsExprvec_c(ISphExpr * pLat,ISphExpr * pLon,CSphVector<ISphExpr * > dExprs)3624 	Expr_ContainsExprvec_c ( ISphExpr * pLat, ISphExpr * pLon, CSphVector<ISphExpr*> dExprs )
3625 		: Expr_Contains_c ( pLat, pLon )
3626 	{
3627 		m_dExpr.SwapData ( dExprs );
3628 		m_dPoly.Resize ( m_dExpr.GetLength() );
3629 	}
3630 
~Expr_ContainsExprvec_c()3631 	~Expr_ContainsExprvec_c()
3632 	{
3633 		ARRAY_FOREACH ( i, m_dExpr )
3634 			SafeRelease ( m_dExpr[i] );
3635 	}
3636 
IntEval(const CSphMatch & tMatch) const3637 	virtual int IntEval ( const CSphMatch & tMatch ) const
3638 	{
3639 		ARRAY_FOREACH ( i, m_dExpr )
3640 			m_dPoly[i] = m_dExpr[i]->Eval ( tMatch );
3641 		return Contains ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_dPoly.GetLength(), m_dPoly.Begin() );
3642 	}
3643 };
3644 
3645 
3646 class Expr_ContainsStrattr_c : public Expr_Contains_c
3647 {
3648 protected:
3649 	ISphExpr * m_pStr;
3650 	bool m_bGeo;
3651 
3652 public:
Expr_ContainsStrattr_c(ISphExpr * pLat,ISphExpr * pLon,ISphExpr * pStr,bool bGeo)3653 	Expr_ContainsStrattr_c ( ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pStr, bool bGeo )
3654 		: Expr_Contains_c ( pLat, pLon )
3655 	{
3656 		m_pStr = pStr;
3657 		m_bGeo = bGeo;
3658 	}
3659 
~Expr_ContainsStrattr_c()3660 	~Expr_ContainsStrattr_c()
3661 	{
3662 		SafeRelease ( m_pStr );
3663 	}
3664 
ParsePoly(const char * p,int iLen,CSphVector<float> & dPoly)3665 	static void ParsePoly ( const char * p, int iLen, CSphVector<float> & dPoly )
3666 	{
3667 		const char * pMax = p+iLen;
3668 		while ( p<pMax )
3669 		{
3670 			if ( isdigit(p[0]) || ( p+1<pMax && p[0]=='-' && isdigit(p[1]) ) )
3671 				dPoly.Add ( (float)strtod ( p, (char**)&p ) );
3672 			else
3673 				p++;
3674 		}
3675 	}
3676 
IntEval(const CSphMatch & tMatch) const3677 	virtual int IntEval ( const CSphMatch & tMatch ) const
3678 	{
3679 		const char * pStr;
3680 		assert ( !m_pStr->IsStringPtr() ); // aware of mem leaks caused by some StringEval implementations
3681 		int iLen = m_pStr->StringEval ( tMatch, (const BYTE **)&pStr );
3682 
3683 		CSphVector<float> dPoly;
3684 		ParsePoly ( pStr, iLen, dPoly );
3685 		if ( dPoly.GetLength()<6 )
3686 			return 0;
3687 		// OPTIMIZE? add quick bbox check too?
3688 
3689 		if ( m_bGeo )
3690 			GeoTesselate ( dPoly );
3691 		return Contains ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), dPoly.GetLength(), dPoly.Begin() );
3692 	}
3693 
Command(ESphExprCommand eCmd,void * pArg)3694 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
3695 	{
3696 		Expr_Contains_c::Command ( eCmd, pArg );
3697 		m_pStr->Command ( eCmd, pArg );
3698 	}
3699 };
3700 
3701 
CreateContainsNode(const ExprNode_t & tNode)3702 ISphExpr * ExprParser_t::CreateContainsNode ( const ExprNode_t & tNode )
3703 {
3704 	// get and check them args
3705 	const ExprNode_t & tArglist = m_dNodes [ tNode.m_iLeft ];
3706 	const int iPoly = m_dNodes [ tArglist.m_iLeft ].m_iLeft;
3707 	const int iLat = m_dNodes [ tArglist.m_iLeft ].m_iRight;
3708 	const int iLon = tArglist.m_iRight;
3709 	assert ( IsNumeric ( m_dNodes[iLat].m_eRetType ) );
3710 	assert ( IsNumeric ( m_dNodes[iLat].m_eRetType ) );
3711 	assert ( m_dNodes[iPoly].m_eRetType==SPH_ATTR_POLY2D );
3712 
3713 	// create evaluator
3714 	// gotta handle an optimized constant poly case
3715 	CSphVector<int> dPolyArgs;
3716 	GatherArgNodes ( m_dNodes[iPoly].m_iLeft, dPolyArgs );
3717 
3718 	bool bGeoTesselate = ( m_dNodes[iPoly].m_iToken==TOK_FUNC && m_dNodes[iPoly].m_iFunc==FUNC_GEOPOLY2D );
3719 
3720 	if ( dPolyArgs.GetLength()==1 && m_dNodes[dPolyArgs[0]].m_iToken==TOK_ATTR_STRING )
3721 	{
3722 		return new Expr_ContainsStrattr_c ( CreateTree(iLat), CreateTree(iLon),
3723 			CreateTree ( dPolyArgs[0] ), bGeoTesselate );
3724 	}
3725 
3726 	bool bConst = ARRAY_ALL ( bConst, dPolyArgs, IsConst ( &m_dNodes [ dPolyArgs[_all] ] ) );
3727 	if ( bConst )
3728 	{
3729 		// POLY2D(numeric-consts)
3730 		return new Expr_ContainsConstvec_c ( CreateTree(iLat), CreateTree(iLon),
3731 			dPolyArgs, m_dNodes.Begin(), bGeoTesselate );
3732 	} else
3733 	{
3734 		// POLY2D(generic-exprs)
3735 		CSphVector<ISphExpr*> dExprs ( dPolyArgs.GetLength() );
3736 		ARRAY_FOREACH ( i, dExprs )
3737 			dExprs[i] = CreateTree ( dPolyArgs[i] );
3738 		return new Expr_ContainsExprvec_c ( CreateTree(iLat), CreateTree(iLon), dExprs );
3739 	}
3740 }
3741 
3742 class Expr_Remap_c : public ISphExpr
3743 {
3744 	struct CondValPair_t
3745 	{
3746 		int64_t m_iCond;
3747 		union
3748 		{
3749 			int64_t m_iVal;
3750 			float m_fVal;
3751 		};
3752 
CondValPair_tExpr_Remap_c::CondValPair_t3753 		explicit CondValPair_t ( int64_t iCond=0 ) : m_iCond ( iCond ), m_iVal ( 0 ) {}
operator <Expr_Remap_c::CondValPair_t3754 		bool operator< ( const CondValPair_t & rhs ) const { return m_iCond<rhs.m_iCond; }
operator ==Expr_Remap_c::CondValPair_t3755 		bool operator== ( const CondValPair_t & rhs ) const { return m_iCond==rhs.m_iCond; }
3756 	};
3757 
3758 	ISphExpr * m_pCond;
3759 	ISphExpr * m_pVal;
3760 	CSphVector<CondValPair_t> m_dPairs;
3761 
3762 public:
Expr_Remap_c(ISphExpr * pCondExpr,ISphExpr * pValExpr,const CSphVector<int64_t> & dConds,const ConstList_c & tVals)3763 	Expr_Remap_c ( ISphExpr * pCondExpr, ISphExpr * pValExpr, const CSphVector<int64_t> & dConds, const ConstList_c & tVals )
3764 		: m_pCond ( pCondExpr )
3765 		, m_pVal ( pValExpr )
3766 		, m_dPairs ( dConds.GetLength() )
3767 	{
3768 		assert ( pCondExpr && pValExpr );
3769 		assert ( dConds.GetLength() );
3770 		assert ( dConds.GetLength()==tVals.m_dInts.GetLength() ||
3771 				dConds.GetLength()==tVals.m_dFloats.GetLength() );
3772 
3773 		if ( tVals.m_dInts.GetLength() )
3774 			ARRAY_FOREACH ( i, m_dPairs )
3775 			{
3776 				m_dPairs[i].m_iCond = dConds[i];
3777 				m_dPairs[i].m_iVal = tVals.m_dInts[i];
3778 			}
3779 		else
3780 			ARRAY_FOREACH ( i, m_dPairs )
3781 			{
3782 				m_dPairs[i].m_iCond = dConds[i];
3783 				m_dPairs[i].m_fVal = tVals.m_dFloats[i];
3784 			}
3785 
3786 		m_dPairs.Uniq();
3787 	}
3788 
~Expr_Remap_c()3789 	~Expr_Remap_c()
3790 	{
3791 		SafeRelease ( m_pCond );
3792 		SafeRelease ( m_pVal );
3793 	}
3794 
Eval(const CSphMatch & tMatch) const3795 	virtual float Eval ( const CSphMatch & tMatch ) const
3796 	{
3797 		const CondValPair_t * p = m_dPairs.BinarySearch ( CondValPair_t ( m_pCond->Int64Eval ( tMatch ) ) );
3798 		if ( p )
3799 			return p->m_fVal;
3800 		return m_pVal->Eval ( tMatch );
3801 	}
3802 
IntEval(const CSphMatch & tMatch) const3803 	virtual int IntEval ( const CSphMatch & tMatch ) const
3804 	{
3805 		return (int)Int64Eval ( tMatch );
3806 	}
3807 
Int64Eval(const CSphMatch & tMatch) const3808 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
3809 	{
3810 		const CondValPair_t * p = m_dPairs.BinarySearch ( CondValPair_t ( m_pCond->Int64Eval ( tMatch ) ) );
3811 		if ( p )
3812 			return p->m_iVal;
3813 		return m_pVal->Int64Eval ( tMatch );
3814 	}
3815 
3816 };
3817 
3818 //////////////////////////////////////////////////////////////////////////
3819 
3820 /// fold nodes subtree into opcodes
CreateTree(int iNode)3821 ISphExpr * ExprParser_t::CreateTree ( int iNode )
3822 {
3823 	if ( iNode<0 || GetError() )
3824 		return NULL;
3825 
3826 	const ExprNode_t & tNode = m_dNodes[iNode];
3827 
3828 	// avoid spawning argument node in some cases
3829 	bool bSkipLeft = false;
3830 	bool bSkipRight = false;
3831 	if ( tNode.m_iToken==TOK_FUNC )
3832 	{
3833 		switch ( tNode.m_iFunc )
3834 		{
3835 		case FUNC_IN:
3836 		case FUNC_EXIST:
3837 		case FUNC_GEODIST:
3838 		case FUNC_CONTAINS:
3839 		case FUNC_ZONESPANLIST:
3840 		case FUNC_RANKFACTORS:
3841 		case FUNC_PACKEDFACTORS:
3842 		case FUNC_FACTORS:
3843 		case FUNC_BM25F:
3844 		case FUNC_CURTIME:
3845 		case FUNC_UTC_TIME:
3846 		case FUNC_UTC_TIMESTAMP:
3847 		case FUNC_ALL:
3848 		case FUNC_ANY:
3849 		case FUNC_INDEXOF:
3850 		case FUNC_MIN_TOP_WEIGHT:
3851 		case FUNC_MIN_TOP_SORTVAL:
3852 		case FUNC_REMAP:
3853 			bSkipLeft = true;
3854 			bSkipRight = true;
3855 			break;
3856 		default:
3857 			break;
3858 		}
3859 	}
3860 
3861 	ISphExpr * pLeft = bSkipLeft ? NULL : CreateTree ( tNode.m_iLeft );
3862 	ISphExpr * pRight = bSkipRight ? NULL : CreateTree ( tNode.m_iRight );
3863 
3864 	if ( GetError() )
3865 	{
3866 		SafeRelease ( pLeft );
3867 		SafeRelease ( pRight );
3868 		return NULL;
3869 	}
3870 
3871 #define LOC_SPAWN_POLY(_classname) \
3872 	if ( tNode.m_eArgType==SPH_ATTR_INTEGER )		return new _classname##Int_c ( pLeft, pRight ); \
3873 	else if ( tNode.m_eArgType==SPH_ATTR_BIGINT )	return new _classname##Int64_c ( pLeft, pRight ); \
3874 	else											return new _classname##Float_c ( pLeft, pRight );
3875 
3876 	int iOp = tNode.m_iToken;
3877 	if ( iOp=='+' || iOp=='-' || iOp=='*' || iOp=='/' || iOp=='&' || iOp=='|' || iOp=='%' || iOp=='<' || iOp=='>'
3878 		|| iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE || iOp==TOK_AND || iOp==TOK_OR || iOp==TOK_NOT )
3879 	{
3880 		if ( pLeft && m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD && m_dNodes[tNode.m_iLeft].m_iToken==TOK_ATTR_JSON )
3881 			pLeft = new Expr_JsonFieldConv_c ( pLeft );
3882 		if ( pRight && m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_JSON_FIELD && m_dNodes[tNode.m_iRight].m_iToken==TOK_ATTR_JSON )
3883 			pRight = new Expr_JsonFieldConv_c ( pRight );
3884 	}
3885 
3886 	switch ( tNode.m_iToken )
3887 	{
3888 		case TOK_ATTR_INT:		return new Expr_GetInt_c ( tNode.m_tLocator, tNode.m_iLocator );
3889 		case TOK_ATTR_BITS:		return new Expr_GetBits_c ( tNode.m_tLocator, tNode.m_iLocator );
3890 		case TOK_ATTR_FLOAT:	return new Expr_GetFloat_c ( tNode.m_tLocator, tNode.m_iLocator );
3891 		case TOK_ATTR_SINT:		return new Expr_GetSint_c ( tNode.m_tLocator, tNode.m_iLocator );
3892 		case TOK_ATTR_STRING:	return new Expr_GetString_c ( tNode.m_tLocator, tNode.m_iLocator );
3893 		case TOK_ATTR_MVA64:
3894 		case TOK_ATTR_MVA32:	return new Expr_GetMva_c ( tNode.m_tLocator, tNode.m_iLocator );
3895 		case TOK_ATTR_FACTORS:	return new Expr_GetFactorsAttr_c ( tNode.m_tLocator, tNode.m_iLocator );
3896 
3897 		case TOK_CONST_FLOAT:	return new Expr_GetConst_c ( tNode.m_fConst );
3898 		case TOK_CONST_INT:
3899 			if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
3900 				return new Expr_GetIntConst_c ( (int)tNode.m_iConst );
3901 			else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
3902 				return new Expr_GetInt64Const_c ( tNode.m_iConst );
3903 			else
3904 				return new Expr_GetConst_c ( float(tNode.m_iConst) );
3905 			break;
3906 		case TOK_CONST_STRING:
3907 			return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ), true );
3908 		case TOK_SUBKEY:
3909 			return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ), false );
3910 
3911 		case TOK_ID:			return new Expr_GetId_c ();
3912 		case TOK_WEIGHT:		return new Expr_GetWeight_c ();
3913 
3914 		case '+':				return new Expr_Add_c ( pLeft, pRight ); break;
3915 		case '-':				return new Expr_Sub_c ( pLeft, pRight ); break;
3916 		case '*':				return new Expr_Mul_c ( pLeft, pRight ); break;
3917 		case '/':				return new Expr_Div_c ( pLeft, pRight ); break;
3918 		case '&':				return new Expr_BitAnd_c ( pLeft, pRight ); break;
3919 		case '|':				return new Expr_BitOr_c ( pLeft, pRight ); break;
3920 		case '%':				return new Expr_Mod_c ( pLeft, pRight ); break;
3921 
3922 		case '<':				LOC_SPAWN_POLY ( Expr_Lt ); break;
3923 		case '>':				LOC_SPAWN_POLY ( Expr_Gt ); break;
3924 		case TOK_LTE:			LOC_SPAWN_POLY ( Expr_Lte ); break;
3925 		case TOK_GTE:			LOC_SPAWN_POLY ( Expr_Gte ); break;
3926 		case TOK_EQ:			if ( ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_STRING ||
3927 									m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_STRINGPTR ) &&
3928 									( m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRING ||
3929 									m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRINGPTR ) )
3930 									return new Expr_StrEq_c ( pLeft, pRight, m_eCollation );
3931 								else if ( ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD ) &&
3932 									( m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRING ||
3933 									m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRINGPTR ) )
3934 									return new Expr_StrEq_c ( pLeft, pRight, m_eCollation );
3935 								LOC_SPAWN_POLY ( Expr_Eq ); break;
3936 		case TOK_NE:			LOC_SPAWN_POLY ( Expr_Ne ); break;
3937 		case TOK_AND:			LOC_SPAWN_POLY ( Expr_And ); break;
3938 		case TOK_OR:			LOC_SPAWN_POLY ( Expr_Or ); break;
3939 		case TOK_NOT:
3940 			if ( tNode.m_eArgType==SPH_ATTR_BIGINT )
3941 				return new Expr_NotInt64_c ( pLeft );
3942 			else
3943 				return new Expr_NotInt_c ( pLeft );
3944 			break;
3945 
3946 		case ',':
3947 			if ( pLeft && pRight )
3948 				return new Expr_Arglist_c ( pLeft, pRight );
3949 			break;
3950 
3951 		case TOK_NEG:			assert ( pRight==NULL ); return new Expr_Neg_c ( pLeft ); break;
3952 		case TOK_FUNC:
3953 			{
3954 				// fold arglist to array
3955 				Func_e eFunc = (Func_e)tNode.m_iFunc;
3956 				assert ( g_dFuncs[tNode.m_iFunc].m_eFunc==eFunc );
3957 
3958 				CSphVector<ISphExpr *> dArgs;
3959 				if ( !bSkipLeft )
3960 					FoldArglist ( pLeft, dArgs );
3961 
3962 				// spawn proper function
3963 				assert ( tNode.m_iFunc>=0 && tNode.m_iFunc<int(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])) );
3964 				assert (
3965 					( bSkipLeft ) || // function will handle its arglist,
3966 					( g_dFuncs[tNode.m_iFunc].m_iArgs>=0 && g_dFuncs[tNode.m_iFunc].m_iArgs==dArgs.GetLength() ) || // arg count matches,
3967 					( g_dFuncs[tNode.m_iFunc].m_iArgs<0 && -g_dFuncs[tNode.m_iFunc].m_iArgs<=dArgs.GetLength() ) ); // or min vararg count reached
3968 
3969 				switch ( eFunc )
3970 				{
3971 					case FUNC_NOW:		assert ( 0 ); break; // prevent gcc bitching
3972 
3973 					case FUNC_ABS:		return new Expr_Abs_c ( dArgs[0] );
3974 					case FUNC_CEIL:		return new Expr_Ceil_c ( dArgs[0] );
3975 					case FUNC_FLOOR:	return new Expr_Floor_c ( dArgs[0] );
3976 					case FUNC_SIN:		return new Expr_Sin_c ( dArgs[0] );
3977 					case FUNC_COS:		return new Expr_Cos_c ( dArgs[0] );
3978 					case FUNC_LN:		return new Expr_Ln_c ( dArgs[0] );
3979 					case FUNC_LOG2:		return new Expr_Log2_c ( dArgs[0] );
3980 					case FUNC_LOG10:	return new Expr_Log10_c ( dArgs[0] );
3981 					case FUNC_EXP:		return new Expr_Exp_c ( dArgs[0] );
3982 					case FUNC_SQRT:		return new Expr_Sqrt_c ( dArgs[0] );
3983 					case FUNC_SINT:		return new Expr_Sint_c ( dArgs[0] );
3984 					case FUNC_CRC32:	return new Expr_Crc32_c ( dArgs[0] );
3985 					case FUNC_FIBONACCI:return new Expr_Fibonacci_c ( dArgs[0] );
3986 
3987 					case FUNC_DAY:			return new Expr_Day_c ( dArgs[0] );
3988 					case FUNC_MONTH:		return new Expr_Month_c ( dArgs[0] );
3989 					case FUNC_YEAR:			return new Expr_Year_c ( dArgs[0] );
3990 					case FUNC_YEARMONTH:	return new Expr_YearMonth_c ( dArgs[0] );
3991 					case FUNC_YEARMONTHDAY:	return new Expr_YearMonthDay_c ( dArgs[0] );
3992 
3993 					case FUNC_MIN:		return new Expr_Min_c ( dArgs[0], dArgs[1] );
3994 					case FUNC_MAX:		return new Expr_Max_c ( dArgs[0], dArgs[1] );
3995 					case FUNC_POW:		return new Expr_Pow_c ( dArgs[0], dArgs[1] );
3996 					case FUNC_IDIV:		return new Expr_Idiv_c ( dArgs[0], dArgs[1] );
3997 
3998 					case FUNC_IF:		return new Expr_If_c ( dArgs[0], dArgs[1], dArgs[2] );
3999 					case FUNC_MADD:		return new Expr_Madd_c ( dArgs[0], dArgs[1], dArgs[2] );
4000 					case FUNC_MUL3:		return new Expr_Mul3_c ( dArgs[0], dArgs[1], dArgs[2] );
4001 					case FUNC_ATAN2:	return new Expr_Atan2_c ( dArgs[0], dArgs[1] );
4002 
4003 					case FUNC_INTERVAL:	return CreateIntervalNode ( tNode.m_iLeft, dArgs );
4004 					case FUNC_IN:		return CreateInNode ( iNode );
4005 					case FUNC_LENGTH:	return CreateLengthNode ( tNode, dArgs[0] );
4006 					case FUNC_BITDOT:	return CreateBitdotNode ( tNode.m_iLeft, dArgs );
4007 					case FUNC_REMAP:
4008 					{
4009 						ISphExpr * pCond = CreateTree ( tNode.m_iLeft );
4010 						ISphExpr * pVal = CreateTree ( tNode.m_iRight );
4011 						assert ( pCond && pVal );
4012 						// This is a hack. I know how parser fills m_dNodes and thus know where to find constlists.
4013 						const CSphVector<int64_t> & dConds = m_dNodes [ iNode-2 ].m_pConsts->m_dInts;
4014 						const ConstList_c & tVals = *m_dNodes [ iNode-1 ].m_pConsts;
4015 						return new Expr_Remap_c ( pCond, pVal, dConds, tVals );
4016 					}
4017 
4018 					case FUNC_GEODIST:	return CreateGeodistNode ( tNode.m_iLeft );
4019 					case FUNC_EXIST:	return CreateExistNode ( tNode );
4020 					case FUNC_CONTAINS:	return CreateContainsNode ( tNode );
4021 
4022 					case FUNC_POLY2D:
4023 					case FUNC_GEOPOLY2D:break; // just make gcc happy
4024 
4025 					case FUNC_ZONESPANLIST:
4026 						m_bHasZonespanlist = true;
4027 						m_eEvalStage = SPH_EVAL_PRESORT;
4028 						return new Expr_GetZonespanlist_c ();
4029 					case FUNC_TO_STRING:
4030 						return new Expr_ToString_c ( dArgs[0], m_dNodes [ tNode.m_iLeft ].m_eRetType );
4031 					case FUNC_RANKFACTORS:
4032 						m_eEvalStage = SPH_EVAL_PRESORT;
4033 						return new Expr_GetRankFactors_c();
4034 					case FUNC_PACKEDFACTORS:
4035 					case FUNC_FACTORS:
4036 						return CreatePFNode ( tNode.m_iLeft );
4037 					case FUNC_BM25F:
4038 					{
4039 						m_uPackedFactorFlags |= SPH_FACTOR_ENABLE;
4040 
4041 						CSphVector<int> dBM25FArgs;
4042 						GatherArgNodes ( tNode.m_iLeft, dBM25FArgs );
4043 
4044 						const ExprNode_t & tLeft = m_dNodes [ dBM25FArgs[0] ];
4045 						const ExprNode_t & tRight = m_dNodes [ dBM25FArgs[1] ];
4046 						float fK1 = tLeft.m_fConst;
4047 						float fB = tRight.m_fConst;
4048 						fK1 = Max ( fK1, 0.001f );
4049 						fB = Min ( Max ( fB, 0.0f ), 1.0f );
4050 
4051 						CSphVector<CSphNamedVariant> * pFieldWeights = NULL;
4052 						if ( dBM25FArgs.GetLength()>2 )
4053 							pFieldWeights = &m_dNodes [ dBM25FArgs[2] ].m_pMapArg->m_dPairs;
4054 
4055 						return new Expr_BM25F_c ( fK1, fB, pFieldWeights );
4056 					}
4057 
4058 					case FUNC_BIGINT:
4059 					case FUNC_INTEGER:
4060 					case FUNC_DOUBLE:
4061 					case FUNC_UINT:
4062 						if ( m_dNodes[tNode.m_iLeft].m_iToken==TOK_ATTR_JSON )
4063 							return new Expr_JsonFieldConv_c ( dArgs[0] );
4064 						return dArgs[0];
4065 
4066 					case FUNC_LEAST:	return CreateAggregateNode ( tNode, SPH_AGGR_MIN, dArgs[0] );
4067 					case FUNC_GREATEST:	return CreateAggregateNode ( tNode, SPH_AGGR_MAX, dArgs[0] );
4068 
4069 					case FUNC_CURTIME:	return new Expr_Time_c ( false, false ); break;
4070 					case FUNC_UTC_TIME: return new Expr_Time_c ( true, false ); break;
4071 					case FUNC_UTC_TIMESTAMP: return new Expr_Time_c ( true, true ); break;
4072 					case FUNC_TIMEDIFF: return new Expr_TimeDiff_c ( dArgs[0], dArgs[1] ); break;
4073 
4074 					case FUNC_ALL:
4075 					case FUNC_ANY:
4076 					case FUNC_INDEXOF:
4077 						return CreateForInNode ( iNode );
4078 
4079 					case FUNC_MIN_TOP_WEIGHT:
4080 						m_eEvalStage = SPH_EVAL_PRESORT;
4081 						return new Expr_MinTopWeight();
4082 						break;
4083 					case FUNC_MIN_TOP_SORTVAL:
4084 						m_eEvalStage = SPH_EVAL_PRESORT;
4085 						return new Expr_MinTopSortval();
4086 						break;
4087 					default: // just make gcc happy
4088 						break;
4089 				}
4090 				assert ( 0 && "unhandled function id" );
4091 				break;
4092 			}
4093 
4094 		case TOK_UDF:			return CreateUdfNode ( tNode.m_iFunc, pLeft ); break;
4095 		case TOK_HOOK_IDENT:	return m_pHook->CreateNode ( tNode.m_iFunc, NULL, NULL, m_sCreateError ); break;
4096 		case TOK_HOOK_FUNC:		return m_pHook->CreateNode ( tNode.m_iFunc, pLeft, &m_eEvalStage, m_sCreateError ); break;
4097 		case TOK_MAP_ARG:
4098 			// tricky bit
4099 			// data gets moved (!) from node to ISphExpr at this point
4100 			return new Expr_MapArg_c ( tNode.m_pMapArg->m_dPairs );
4101 			break;
4102 		case TOK_ATTR_JSON:
4103 			if ( pLeft && m_dNodes[tNode.m_iLeft].m_iToken==TOK_SUBKEY && !tNode.m_tLocator.m_bDynamic )
4104 			{
4105 				// json key is a single static subkey, switch to fastpath
4106 				return new Expr_JsonFastKey_c ( tNode.m_tLocator, tNode.m_iLocator, pLeft );
4107 			} else
4108 			{
4109 				// json key is a generic expression, use generic catch-all JsonField
4110 				CSphVector<ISphExpr *> dArgs;
4111 				CSphVector<ESphAttr> dTypes;
4112 				if ( pLeft ) // may be NULL (top level array)
4113 				{
4114 					FoldArglist ( pLeft, dArgs );
4115 					GatherArgRetTypes ( tNode.m_iLeft, dTypes );
4116 				}
4117 				return new Expr_JsonField_c ( tNode.m_tLocator, tNode.m_iLocator, dArgs, dTypes );
4118 			}
4119 			break;
4120 		case TOK_ITERATOR:
4121 			{
4122 				// iterator, e.g. handles "x.gid" in SELECT ALL(x.gid=1 FOR x IN json.array)
4123 				CSphVector<ISphExpr *> dArgs;
4124 				CSphVector<ESphAttr> dTypes;
4125 				if ( pLeft )
4126 				{
4127 					FoldArglist ( pLeft, dArgs );
4128 					GatherArgRetTypes ( tNode.m_iLeft, dTypes );
4129 				}
4130 				return new Expr_JsonFieldConv_c ( new Expr_Iterator_c ( tNode.m_tLocator, tNode.m_iLocator, dArgs, dTypes, tNode.m_pAttr ) );
4131 			}
4132 		case TOK_IDENT:			m_sCreateError.SetSprintf ( "unknown column: %s", tNode.m_sIdent ); break;
4133 
4134 		case TOK_IS_NULL:
4135 		case TOK_IS_NOT_NULL:
4136 			if ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD )
4137 				return new Expr_JsonFieldIsNull_c ( pLeft, tNode.m_iToken==TOK_IS_NULL );
4138 			else
4139 				return new Expr_GetIntConst_c ( tNode.m_iToken!=TOK_IS_NULL );
4140 
4141 		default:				assert ( 0 && "unhandled token type" ); break;
4142 	}
4143 
4144 #undef LOC_SPAWN_POLY
4145 
4146 	// fire exit
4147 	SafeRelease ( pLeft );
4148 	SafeRelease ( pRight );
4149 	return NULL;
4150 }
4151 
4152 //////////////////////////////////////////////////////////////////////////
4153 
4154 /// arg-vs-set function (currently, IN or INTERVAL) evaluator traits
4155 template < typename T >
4156 class Expr_ArgVsSet_c : public ISphExpr
4157 {
4158 protected:
4159 	ISphExpr *			m_pArg;
4160 
4161 public:
Expr_ArgVsSet_c(ISphExpr * pArg)4162 	explicit Expr_ArgVsSet_c ( ISphExpr * pArg ) : m_pArg ( pArg ) {}
~Expr_ArgVsSet_c()4163 	~Expr_ArgVsSet_c () { SafeRelease ( m_pArg ); }
4164 
4165 	virtual int IntEval ( const CSphMatch & tMatch ) const = 0;
Eval(const CSphMatch & tMatch) const4166 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) IntEval ( tMatch ); }
Int64Eval(const CSphMatch & tMatch) const4167 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
4168 
4169 protected:
4170 	T ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const;
4171 };
4172 
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4173 template<> int Expr_ArgVsSet_c<int>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4174 {
4175 	return pArg->IntEval ( tMatch );
4176 }
4177 
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4178 template<> DWORD Expr_ArgVsSet_c<DWORD>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4179 {
4180 	return (DWORD)pArg->IntEval ( tMatch );
4181 }
4182 
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4183 template<> float Expr_ArgVsSet_c<float>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4184 {
4185 	return pArg->Eval ( tMatch );
4186 }
4187 
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4188 template<> int64_t Expr_ArgVsSet_c<int64_t>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4189 {
4190 	return pArg->Int64Eval ( tMatch );
4191 }
4192 
4193 
4194 /// arg-vs-constant-set
4195 template < typename T >
4196 class Expr_ArgVsConstSet_c : public Expr_ArgVsSet_c<T>
4197 {
4198 protected:
4199 	CSphVector<T> m_dValues;
4200 
4201 public:
4202 	/// take ownership of arg, pre-evaluate and dismiss turn points
Expr_ArgVsConstSet_c(ISphExpr * pArg,CSphVector<ISphExpr * > & dArgs,int iSkip)4203 	Expr_ArgVsConstSet_c ( ISphExpr * pArg, CSphVector<ISphExpr *> & dArgs, int iSkip )
4204 		: Expr_ArgVsSet_c<T> ( pArg )
4205 	{
4206 		CSphMatch tDummy;
4207 		for ( int i=iSkip; i<dArgs.GetLength(); i++ )
4208 		{
4209 			m_dValues.Add ( Expr_ArgVsSet_c<T>::ExprEval ( dArgs[i], tDummy ) );
4210 			SafeRelease ( dArgs[i] );
4211 		}
4212 	}
4213 
4214 	/// take ownership of arg, and copy that constlist
Expr_ArgVsConstSet_c(ISphExpr * pArg,ConstList_c * pConsts)4215 	Expr_ArgVsConstSet_c ( ISphExpr * pArg, ConstList_c * pConsts )
4216 		: Expr_ArgVsSet_c<T> ( pArg )
4217 	{
4218 		if ( !pConsts )
4219 			return; // can happen on uservar path
4220 		if ( pConsts->m_eRetType==SPH_ATTR_FLOAT )
4221 		{
4222 			m_dValues.Reserve ( pConsts->m_dFloats.GetLength() );
4223 			ARRAY_FOREACH ( i, pConsts->m_dFloats )
4224 				m_dValues.Add ( (T)pConsts->m_dFloats[i] );
4225 		} else
4226 		{
4227 			m_dValues.Reserve ( pConsts->m_dInts.GetLength() );
4228 			ARRAY_FOREACH ( i, pConsts->m_dInts )
4229 				m_dValues.Add ( (T)pConsts->m_dInts[i] );
4230 		}
4231 	}
4232 };
4233 
4234 //////////////////////////////////////////////////////////////////////////
4235 
4236 /// INTERVAL() evaluator for constant turn point values case
4237 template < typename T >
4238 class Expr_IntervalConst_c : public Expr_ArgVsConstSet_c<T>
4239 {
4240 public:
4241 	/// take ownership of arg, pre-evaluate and dismiss turn points
Expr_IntervalConst_c(CSphVector<ISphExpr * > & dArgs)4242 	explicit Expr_IntervalConst_c ( CSphVector<ISphExpr *> & dArgs )
4243 		: Expr_ArgVsConstSet_c<T> ( dArgs[0], dArgs, 1 )
4244 	{}
4245 
4246 	/// evaluate arg, return interval id
IntEval(const CSphMatch & tMatch) const4247 	virtual int IntEval ( const CSphMatch & tMatch ) const
4248 	{
4249 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4250 		ARRAY_FOREACH ( i, this->m_dValues ) // FIXME! OPTIMIZE! perform binary search here
4251 			if ( val<this->m_dValues[i] )
4252 				return i;
4253 		return this->m_dValues.GetLength();
4254 	}
4255 
Command(ESphExprCommand eCmd,void * pArg)4256 	virtual void Command ( ESphExprCommand eCmd, void * pArg ) { this->m_pArg->Command ( eCmd, pArg ); }
4257 };
4258 
4259 
4260 /// generic INTERVAL() evaluator
4261 template < typename T >
4262 class Expr_Interval_c : public Expr_ArgVsSet_c<T>
4263 {
4264 protected:
4265 	CSphVector<ISphExpr *> m_dTurnPoints;
4266 
4267 public:
4268 	/// take ownership of arg and turn points
Expr_Interval_c(const CSphVector<ISphExpr * > & dArgs)4269 	explicit Expr_Interval_c ( const CSphVector<ISphExpr *> & dArgs )
4270 		: Expr_ArgVsSet_c<T> ( dArgs[0] )
4271 	{
4272 		for ( int i=1; i<dArgs.GetLength(); i++ )
4273 			m_dTurnPoints.Add ( dArgs[i] );
4274 	}
4275 
4276 	/// evaluate arg, return interval id
IntEval(const CSphMatch & tMatch) const4277 	virtual int IntEval ( const CSphMatch & tMatch ) const
4278 	{
4279 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4280 		ARRAY_FOREACH ( i, m_dTurnPoints )
4281 			if ( val < Expr_ArgVsSet_c<T>::ExprEval ( m_dTurnPoints[i], tMatch ) )
4282 				return i;
4283 		return m_dTurnPoints.GetLength();
4284 	}
4285 
Command(ESphExprCommand eCmd,void * pArg)4286 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4287 	{
4288 		this->m_pArg->Command ( eCmd, pArg );
4289 		ARRAY_FOREACH ( i, m_dTurnPoints )
4290 			m_dTurnPoints[i]->Command ( eCmd, pArg );
4291 	}
4292 };
4293 
4294 //////////////////////////////////////////////////////////////////////////
4295 
4296 /// IN() evaluator, arbitrary scalar expression vs. constant values
4297 template < typename T >
4298 class Expr_In_c : public Expr_ArgVsConstSet_c<T>
4299 {
4300 public:
4301 	/// pre-sort values for binary search
Expr_In_c(ISphExpr * pArg,ConstList_c * pConsts)4302 	Expr_In_c ( ISphExpr * pArg, ConstList_c * pConsts ) :
4303 		Expr_ArgVsConstSet_c<T> ( pArg, pConsts )
4304 	{
4305 		this->m_dValues.Sort();
4306 	}
4307 
4308 	/// evaluate arg, check if the value is within set
IntEval(const CSphMatch & tMatch) const4309 	virtual int IntEval ( const CSphMatch & tMatch ) const
4310 	{
4311 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4312 		return this->m_dValues.BinarySearch ( val )!=NULL;
4313 	}
4314 
Command(ESphExprCommand eCmd,void * pArg)4315 	virtual void Command ( ESphExprCommand eCmd, void * pArg ) { this->m_pArg->Command ( eCmd, pArg ); }
4316 };
4317 
4318 
4319 /// IN() evaluator, arbitrary scalar expression vs. uservar
4320 /// (for the sake of evaluator, uservar is a pre-sorted, refcounted external vector)
4321 class Expr_InUservar_c : public Expr_ArgVsSet_c<int64_t>
4322 {
4323 protected:
4324 	UservarIntSet_c * m_pConsts;
4325 
4326 public:
4327 	/// just get hold of args
Expr_InUservar_c(ISphExpr * pArg,UservarIntSet_c * pConsts)4328 	explicit Expr_InUservar_c ( ISphExpr * pArg, UservarIntSet_c * pConsts )
4329 		: Expr_ArgVsSet_c<int64_t> ( pArg )
4330 		, m_pConsts ( pConsts ) // no addref, hook should have addref'd (otherwise there'd be a race)
4331 	{}
4332 
4333 	/// release the uservar value
~Expr_InUservar_c()4334 	~Expr_InUservar_c()
4335 	{
4336 		SafeRelease ( m_pConsts );
4337 	}
4338 
4339 	/// evaluate arg, check if the value is within set
IntEval(const CSphMatch & tMatch) const4340 	virtual int IntEval ( const CSphMatch & tMatch ) const
4341 	{
4342 		int64_t iVal = ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4343 		return m_pConsts->BinarySearch ( iVal )!=NULL;
4344 	}
4345 
Command(ESphExprCommand eCmd,void * pArg)4346 	virtual void Command ( ESphExprCommand eCmd, void * pArg ) { this->m_pArg->Command ( eCmd, pArg ); }
4347 };
4348 
4349 
4350 /// IN() evaluator, MVA attribute vs. constant values
4351 template < bool MVA64 >
4352 class Expr_MVAIn_c : public Expr_ArgVsConstSet_c<int64_t>
4353 {
4354 public:
4355 	/// pre-sort values for binary search
Expr_MVAIn_c(const CSphAttrLocator & tLoc,int iLocator,ConstList_c * pConsts,UservarIntSet_c * pUservar)4356 	Expr_MVAIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar )
4357 		: Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
4358 		, m_tLocator ( tLoc )
4359 		, m_iLocator ( iLocator )
4360 		, m_pMvaPool ( NULL )
4361 		, m_pUservar ( pUservar )
4362 		, m_bArenaProhibit ( false )
4363 	{
4364 		assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4365 		assert ( !pConsts || !pUservar ); // either constlist or uservar, not both
4366 		this->m_dValues.Sort();
4367 	}
4368 
~Expr_MVAIn_c()4369 	~Expr_MVAIn_c()
4370 	{
4371 		SafeRelease ( m_pUservar );
4372 	}
4373 
4374 	int MvaEval ( const DWORD * pMva ) const;
4375 
MvaEval(const CSphMatch &) const4376 	virtual const DWORD * MvaEval ( const CSphMatch & ) const { assert ( 0 && "not implemented" ); return NULL; }
4377 
4378 	/// evaluate arg, check if any values are within set
IntEval(const CSphMatch & tMatch) const4379 	virtual int IntEval ( const CSphMatch & tMatch ) const
4380 	{
4381 		const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
4382 		if ( !pMva )
4383 			return 0;
4384 
4385 		return MvaEval ( pMva );
4386 	}
4387 
Command(ESphExprCommand eCmd,void * pArg)4388 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4389 	{
4390 		if ( eCmd==SPH_EXPR_SET_MVA_POOL )
4391 		{
4392 			const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
4393 			assert ( pArg );
4394 			m_pMvaPool = pPool->m_pMva;
4395 			m_bArenaProhibit = pPool->m_bArenaProhibit;
4396 		}
4397 		if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4398 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4399 	}
4400 
4401 protected:
4402 	CSphAttrLocator		m_tLocator;
4403 	int					m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
4404 	const DWORD *		m_pMvaPool;
4405 	UservarIntSet_c *	m_pUservar;
4406 	bool				m_bArenaProhibit;
4407 };
4408 
4409 
4410 template<>
MvaEval(const DWORD * pMva) const4411 int Expr_MVAIn_c<false>::MvaEval ( const DWORD * pMva ) const
4412 {
4413 	// OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
4414 	DWORD uLen = *pMva++;
4415 	const DWORD * pMvaMax = pMva+uLen;
4416 
4417 	const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4418 	const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4419 
4420 	const DWORD * L = pMva;
4421 	const DWORD * R = pMvaMax - 1;
4422 	for ( ; pFilter < pFilterMax; pFilter++ )
4423 	{
4424 		while ( L<=R )
4425 		{
4426 			const DWORD * m = L + (R - L) / 2;
4427 
4428 			if ( *pFilter > *m )
4429 				L = m + 1;
4430 			else if ( *pFilter < *m )
4431 				R = m - 1;
4432 			else
4433 				return 1;
4434 		}
4435 		R = pMvaMax - 1;
4436 	}
4437 	return 0;
4438 }
4439 
4440 
4441 template<>
MvaEval(const DWORD * pMva) const4442 int Expr_MVAIn_c<true>::MvaEval ( const DWORD * pMva ) const
4443 {
4444 	// OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
4445 	DWORD uLen = *pMva++;
4446 	assert ( ( uLen%2 )==0 );
4447 	const DWORD * pMvaMax = pMva+uLen;
4448 
4449 	const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4450 	const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4451 
4452 	const int64_t * L = (const int64_t *)pMva;
4453 	const int64_t * R = (const int64_t *)( pMvaMax - 2 );
4454 	for ( ; pFilter < pFilterMax; pFilter++ )
4455 	{
4456 		while ( L<=R )
4457 		{
4458 			const int64_t * pVal = L + (R - L) / 2;
4459 			int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
4460 
4461 			if ( *pFilter > iMva )
4462 				L = pVal + 1;
4463 			else if ( *pFilter < iMva )
4464 				R = pVal - 1;
4465 			else
4466 				return 1;
4467 		}
4468 		R = (const int64_t *) ( pMvaMax - 2 );
4469 	}
4470 	return 0;
4471 }
4472 
4473 /// LENGTH() evaluator for MVAs
4474 class Expr_MVALength_c : public ISphExpr
4475 {
4476 protected:
4477 	CSphAttrLocator		m_tLocator;
4478 	int					m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
4479 	bool				m_b64;
4480 	const DWORD *		m_pMvaPool;
4481 	bool				m_bArenaProhibit;
4482 
4483 public:
Expr_MVALength_c(const CSphAttrLocator & tLoc,int iLocator,bool b64)4484 	Expr_MVALength_c ( const CSphAttrLocator & tLoc, int iLocator, bool b64 )
4485 		: m_tLocator ( tLoc )
4486 		, m_iLocator ( iLocator )
4487 		, m_b64 ( b64 )
4488 		, m_pMvaPool ( NULL )
4489 		, m_bArenaProhibit ( false )
4490 	{
4491 		assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4492 	}
4493 
IntEval(const CSphMatch & tMatch) const4494 	virtual int IntEval ( const CSphMatch & tMatch ) const
4495 	{
4496 		const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
4497 		if ( !pMva )
4498 			return 0;
4499 		return (int)( m_b64 ? *pMva/2 : *pMva );
4500 	}
4501 
Command(ESphExprCommand eCmd,void * pArg)4502 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4503 	{
4504 		if ( eCmd==SPH_EXPR_SET_MVA_POOL )
4505 		{
4506 			const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
4507 			assert ( pArg );
4508 			m_pMvaPool = pPool->m_pMva;
4509 			m_bArenaProhibit = pPool->m_bArenaProhibit;
4510 		}
4511 		if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4512 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4513 	}
4514 
Eval(const CSphMatch & tMatch) const4515 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
4516 };
4517 
4518 
4519 /// aggregate functions evaluator for MVA attribute
4520 template < bool MVA64 >
4521 class Expr_MVAAggr_c : public ISphExpr
4522 {
4523 public:
Expr_MVAAggr_c(const CSphAttrLocator & tLoc,int iLocator,ESphAggrFunc eFunc)4524 	Expr_MVAAggr_c ( const CSphAttrLocator & tLoc, int iLocator, ESphAggrFunc eFunc )
4525 		: m_tLocator ( tLoc )
4526 		, m_iLocator ( iLocator )
4527 		, m_pMvaPool ( NULL )
4528 		, m_bArenaProhibit ( false )
4529 		, m_eFunc ( eFunc )
4530 	{
4531 		assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4532 	}
4533 
4534 	int64_t MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const;
4535 
Int64Eval(const CSphMatch & tMatch) const4536 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
4537 	{
4538 		const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
4539 		if ( !pMva )
4540 			return 0;
4541 		return MvaAggr ( pMva, m_eFunc );
4542 	}
4543 
Command(ESphExprCommand eCmd,void * pArg)4544 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4545 	{
4546 		if ( eCmd==SPH_EXPR_SET_MVA_POOL )
4547 		{
4548 			const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
4549 			assert ( pArg );
4550 			m_pMvaPool = pPool->m_pMva;
4551 			m_bArenaProhibit = pPool->m_bArenaProhibit;
4552 		}
4553 		if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4554 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4555 	}
4556 
Eval(const CSphMatch & tMatch) const4557 	virtual float	Eval ( const CSphMatch & tMatch ) const { return (float)Int64Eval ( tMatch ); }
IntEval(const CSphMatch & tMatch) const4558 	virtual int		IntEval ( const CSphMatch & tMatch ) const { return (int)Int64Eval ( tMatch ); }
4559 
4560 protected:
4561 	CSphAttrLocator		m_tLocator;
4562 	int					m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
4563 	const DWORD *		m_pMvaPool;
4564 	bool				m_bArenaProhibit;
4565 	ESphAggrFunc		m_eFunc;
4566 };
4567 
4568 
4569 template <>
MvaAggr(const DWORD * pMva,ESphAggrFunc eFunc) const4570 int64_t Expr_MVAAggr_c<false>::MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const
4571 {
4572 	DWORD uLen = *pMva++;
4573 	const DWORD * pMvaMax = pMva+uLen;
4574 	const DWORD * L = pMva;
4575 	const DWORD * R = pMvaMax - 1;
4576 
4577 	switch ( eFunc )
4578 	{
4579 		case SPH_AGGR_MIN:	return *L;
4580 		case SPH_AGGR_MAX:	return *R;
4581 		default:			return 0;
4582 	}
4583 }
4584 
4585 
4586 template <>
MvaAggr(const DWORD * pMva,ESphAggrFunc eFunc) const4587 int64_t Expr_MVAAggr_c<true>::MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const
4588 {
4589 	DWORD uLen = *pMva++;
4590 	assert ( ( uLen%2 )==0 );
4591 	const DWORD * pMvaMax = pMva+uLen;
4592 	const int64_t * L = (const int64_t *)pMva;
4593 	const int64_t * R = (const int64_t *)( pMvaMax - 2 );
4594 
4595 	switch ( eFunc )
4596 	{
4597 		case SPH_AGGR_MIN:	return *L;
4598 		case SPH_AGGR_MAX:	return *R;
4599 		default:			return 0;
4600 	}
4601 }
4602 
4603 
4604 /// IN() evaluator, JSON array vs. constant values
4605 class Expr_JsonFieldIn_c : public Expr_ArgVsConstSet_c<int64_t>
4606 {
4607 protected:
4608 	UservarIntSet_c *	m_pUservar;
4609 	const BYTE *		m_pStrings;
4610 	ISphExpr *			m_pArg;
4611 	CSphVector<int64_t>	m_dHashes;
4612 
4613 public:
Expr_JsonFieldIn_c(ConstList_c * pConsts,UservarIntSet_c * pUservar,ISphExpr * pArg)4614 	Expr_JsonFieldIn_c ( ConstList_c * pConsts, UservarIntSet_c * pUservar, ISphExpr * pArg )
4615 		: Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
4616 		, m_pUservar ( pUservar )
4617 		, m_pStrings ( NULL )
4618 		, m_pArg ( pArg )
4619 	{
4620 		assert ( !pConsts || !pUservar );
4621 
4622 		const char * sExpr = pConsts->m_sExpr.cstr();
4623 		int iExprLen = pConsts->m_sExpr.Length();
4624 
4625 		const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4626 		const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4627 
4628 		for ( const int64_t * pCur=pFilter; pCur<pFilterMax; pCur++ )
4629 		{
4630 			int64_t iVal = *pCur;
4631 			int iOfs = (int)( iVal>>32 );
4632 			int iLen = (int)( iVal & 0xffffffffUL );
4633 			if ( iOfs>0 && iOfs+iLen<=iExprLen )
4634 			{
4635 				CSphString sRes;
4636 				SqlUnescape ( sRes, sExpr + iOfs, iLen );
4637 				m_dHashes.Add ( sphFNV64 ( sRes.cstr(), sRes.Length() ) );
4638 			}
4639 		}
4640 
4641 		m_dHashes.Sort();
4642 	}
4643 
~Expr_JsonFieldIn_c()4644 	~Expr_JsonFieldIn_c()
4645 	{
4646 		SafeRelease ( m_pUservar );
4647 		SafeRelease ( m_pArg );
4648 	}
4649 
Command(ESphExprCommand eCmd,void * pArg)4650 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4651 	{
4652 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4653 			m_pStrings = (const BYTE*)pArg;
4654 		m_pArg->Command ( eCmd, pArg );
4655 	}
4656 
4657 	/// evaluate arg, check if any values are within set
IntEval(const CSphMatch & tMatch) const4658 	virtual int IntEval ( const CSphMatch & tMatch ) const
4659 	{
4660 		const BYTE * pVal = NULL;
4661 		ESphJsonType eJson = GetKey ( &pVal, tMatch );
4662 		switch ( eJson )
4663 		{
4664 			case JSON_INT32_VECTOR:		return ArrayEval<int> ( pVal );
4665 			case JSON_INT64_VECTOR:		return ArrayEval<int64_t> ( pVal );
4666 			case JSON_STRING_VECTOR:	return StringArrayEval ( pVal, false );
4667 			case JSON_STRING:			return StringArrayEval ( pVal, true );
4668 			case JSON_INT32:			return ValueEval ( (int64_t) sphJsonLoadInt ( &pVal ) );
4669 			case JSON_INT64:			return ValueEval ( sphJsonLoadBigint ( &pVal ) );
4670 			case JSON_MIXED_VECTOR:
4671 				{
4672 					const BYTE * p = pVal;
4673 					sphJsonUnpackInt ( &p ); // skip node length
4674 					int iLen = sphJsonUnpackInt ( &p );
4675 					for ( int i=0; i<iLen; i++ )
4676 					{
4677 						ESphJsonType eType = (ESphJsonType)*p++;
4678 						pVal = p;
4679 						int iRes = 0;
4680 						switch (eType)
4681 						{
4682 						case JSON_STRING: iRes =  StringArrayEval ( pVal, true ); break;
4683 						case JSON_INT32: iRes = ValueEval ( (int64_t) sphJsonLoadInt ( &pVal ) ); break;
4684 						case JSON_INT64: iRes = ValueEval ( sphJsonLoadBigint ( &pVal ) ); break;
4685 						case JSON_DOUBLE: iRes = ValueEval ( (int64_t)sphQW2D ( sphJsonLoadBigint ( &pVal ) ) ); break;
4686 						default: break; // for weird subobjects, just let IN() return false
4687 						}
4688 						if ( iRes )
4689 							return 1;
4690 						sphJsonSkipNode ( eType, &p );
4691 					}
4692 					return 0;
4693 				}
4694 			default:					return 0;
4695 		}
4696 	}
4697 
4698 protected:
GetKey(const BYTE ** ppKey,const CSphMatch & tMatch) const4699 	ESphJsonType GetKey ( const BYTE ** ppKey, const CSphMatch & tMatch ) const
4700 	{
4701 		assert ( ppKey );
4702 		if ( !m_pStrings )
4703 			return JSON_EOF;
4704 		uint64_t uValue = m_pArg->Int64Eval ( tMatch );
4705 		*ppKey = m_pStrings + ( uValue & 0xffffffff );
4706 		return (ESphJsonType)( uValue >> 32 );
4707 	}
4708 
ValueEval(const int64_t iVal) const4709 	int ValueEval ( const int64_t iVal ) const
4710 	{
4711 		const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4712 		const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4713 		for ( ; pFilter<pFilterMax; pFilter++ )
4714 			if ( iVal==*pFilter )
4715 				return 1;
4716 		return 0;
4717 	}
4718 
4719 	// cannot apply MvaEval() on unordered JSON arrays, using linear search
4720 	template <typename T>
ArrayEval(const BYTE * pVal) const4721 	int ArrayEval ( const BYTE * pVal ) const
4722 	{
4723 		const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4724 		const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4725 
4726 		int iLen = sphJsonUnpackInt ( &pVal );
4727 		const T * pArray = (const T *)pVal;
4728 		const T * pArrayMax = pArray+iLen;
4729 		for ( ; pFilter<pFilterMax; pFilter++ )
4730 		{
4731 			T iVal = (T)*pFilter;
4732 			for ( const T * m = pArray; m<pArrayMax; m++ )
4733 				if ( iVal==*m )
4734 					return 1;
4735 		}
4736 		return 0;
4737 	}
4738 
StringArrayEval(const BYTE * pVal,bool bValueEval) const4739 	int StringArrayEval ( const BYTE * pVal, bool bValueEval ) const
4740 	{
4741 		if ( !bValueEval )
4742 			sphJsonUnpackInt ( &pVal );
4743 		int iCount = bValueEval ? 1 : sphJsonUnpackInt ( &pVal );
4744 
4745 		while ( iCount-- )
4746 		{
4747 			int iLen = sphJsonUnpackInt ( &pVal );
4748 			if ( m_dHashes.BinarySearch ( sphFNV64 ( pVal, iLen ) ) )
4749 				return 1;
4750 			pVal += iLen;
4751 		}
4752 		return 0;
4753 	}
4754 };
4755 
4756 
4757 class Expr_StrIn_c : public Expr_ArgVsConstSet_c<int64_t>
4758 {
4759 protected:
4760 	CSphAttrLocator			m_tLocator;
4761 	int						m_iLocator;
4762 	const BYTE *			m_pStrings;
4763 	UservarIntSet_c *		m_pUservar;
4764 	CSphVector<CSphString>  m_dStringValues;
4765 	SphStringCmp_fn			m_fnStrCmp;
4766 
4767 public:
Expr_StrIn_c(const CSphAttrLocator & tLoc,int iLocator,ConstList_c * pConsts,UservarIntSet_c * pUservar,ESphCollation eCollation)4768 	Expr_StrIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar, ESphCollation eCollation )
4769 		: Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
4770 		, m_tLocator ( tLoc )
4771 		, m_iLocator ( iLocator )
4772 		, m_pStrings ( NULL )
4773 		, m_pUservar ( pUservar )
4774 	{
4775 		assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4776 		assert ( !pConsts || !pUservar );
4777 
4778 		m_fnStrCmp = GetCollationFn ( eCollation );
4779 
4780 		const char * sExpr = pConsts->m_sExpr.cstr();
4781 		int iExprLen = pConsts->m_sExpr.Length();
4782 
4783 		const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4784 		const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4785 
4786 		for ( const int64_t * pCur=pFilter; pCur<pFilterMax; pCur++ )
4787 		{
4788 			int64_t iVal = *pCur;
4789 			int iOfs = (int)( iVal>>32 );
4790 			int iLen = (int)( iVal & 0xffffffffUL );
4791 			if ( iOfs>0 && iOfs+iLen<=iExprLen )
4792 			{
4793 				CSphString sRes;
4794 				SqlUnescape ( sRes, sExpr + iOfs, iLen );
4795 				m_dStringValues.Add ( sRes );
4796 			}
4797 		}
4798 	}
4799 
~Expr_StrIn_c()4800 	~Expr_StrIn_c()
4801 	{
4802 		SafeRelease ( m_pUservar );
4803 	}
4804 
IntEval(const CSphMatch & tMatch) const4805 	virtual int IntEval ( const CSphMatch & tMatch ) const
4806 	{
4807 		const BYTE * pVal;
4808 		SphAttr_t iOfs = tMatch.GetAttr ( m_tLocator );
4809 		if ( iOfs<=0 )
4810 			return 0;
4811 		int iLen = sphUnpackStr ( m_pStrings + iOfs, &pVal );
4812 
4813 		CSphString sValue ( (const char*)pVal, iLen );
4814 		const BYTE * pStr = (const BYTE*)sValue.cstr();
4815 
4816 		ARRAY_FOREACH ( i, m_dStringValues )
4817 			if ( m_fnStrCmp ( pStr, (const BYTE*)m_dStringValues[i].cstr(), false )==0 )
4818 				return 1;
4819 
4820 		return 0;
4821 	}
4822 
Command(ESphExprCommand eCmd,void * pArg)4823 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4824 	{
4825 		if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4826 			m_pStrings = (const BYTE*)pArg;
4827 		if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4828 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4829 	}
4830 };
4831 
4832 //////////////////////////////////////////////////////////////////////////
4833 
4834 /// generic BITDOT() evaluator
4835 /// first argument is a bit mask and the rest ones are bit weights
4836 /// function returns sum of bits multiplied by their weights
4837 /// BITDOT(5, 11, 33, 55) => 1*11 + 0*33 + 1*55 = 66
4838 /// BITDOT(4, 11, 33, 55) => 0*11 + 0*33 + 1*55 = 55
4839 template < typename T >
4840 class Expr_Bitdot_c : public Expr_ArgVsSet_c<T>
4841 {
4842 protected:
4843 	CSphVector<ISphExpr *> m_dBitWeights;
4844 
4845 public:
4846 	/// take ownership of arg and turn points
Expr_Bitdot_c(const CSphVector<ISphExpr * > & dArgs)4847 	explicit Expr_Bitdot_c ( const CSphVector<ISphExpr *> & dArgs )
4848 			: Expr_ArgVsSet_c<T> ( dArgs[0] )
4849 	{
4850 		for ( int i=1; i<dArgs.GetLength(); i++ )
4851 			m_dBitWeights.Add ( dArgs[i] );
4852 	}
4853 
4854 protected:
4855 	/// generic evaluate
DoEval(const CSphMatch & tMatch) const4856 	virtual T DoEval ( const CSphMatch & tMatch ) const
4857 	{
4858 		int64_t uArg = this->m_pArg->Int64Eval ( tMatch ); // 'this' fixes gcc braindamage
4859 		T tRes = 0;
4860 
4861 		int iBit = 0;
4862 		while ( uArg && iBit<m_dBitWeights.GetLength() )
4863 		{
4864 			if ( uArg & 1 )
4865 				tRes += Expr_ArgVsSet_c<T>::ExprEval ( m_dBitWeights[iBit], tMatch );
4866 			uArg >>= 1;
4867 			iBit++;
4868 		}
4869 
4870 		return tRes;
4871 	}
4872 
4873 public:
Eval(const CSphMatch & tMatch) const4874 	virtual float Eval ( const CSphMatch & tMatch ) const
4875 	{
4876 		return (float) DoEval ( tMatch );
4877 	}
4878 
IntEval(const CSphMatch & tMatch) const4879 	virtual int IntEval ( const CSphMatch & tMatch ) const
4880 	{
4881 		return (int) DoEval ( tMatch );
4882 	}
4883 
Int64Eval(const CSphMatch & tMatch) const4884 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
4885 	{
4886 		return (int64_t) DoEval ( tMatch );
4887 	}
4888 
Command(ESphExprCommand eCmd,void * pArg)4889 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4890 	{
4891 		this->m_pArg->Command ( eCmd, pArg );
4892 		ARRAY_FOREACH ( i, m_dBitWeights )
4893 			m_dBitWeights[i]->Command ( eCmd, pArg );
4894 	}
4895 };
4896 
4897 //////////////////////////////////////////////////////////////////////////
4898 
4899 enum GeoFunc_e
4900 {
4901 	GEO_HAVERSINE,
4902 	GEO_ADAPTIVE
4903 };
4904 
4905 typedef float (*Geofunc_fn)( float, float, float, float );
4906 
GeodistFn(GeoFunc_e eFunc,bool bDeg)4907 static Geofunc_fn GeodistFn ( GeoFunc_e eFunc, bool bDeg )
4908 {
4909 	switch ( 2*eFunc+bDeg )
4910 	{
4911 		case 2*GEO_HAVERSINE:		return &GeodistSphereRad;
4912 		case 2*GEO_HAVERSINE+1:		return &GeodistSphereDeg;
4913 		case 2*GEO_ADAPTIVE:		return &GeodistAdaptiveRad;
4914 		case 2*GEO_ADAPTIVE+1:		return &GeodistAdaptiveDeg;
4915 	}
4916 	return NULL;
4917 }
4918 
Geodist(GeoFunc_e eFunc,bool bDeg,float lat1,float lon1,float lat2,float lon2)4919 static float Geodist ( GeoFunc_e eFunc, bool bDeg, float lat1, float lon1, float lat2, float lon2 )
4920 {
4921 	return GeodistFn ( eFunc, bDeg ) ( lat1, lon1, lat2, lon2 );
4922 }
4923 
4924 /// geodist() - attr point, constant anchor
4925 class Expr_GeodistAttrConst_c : public ISphExpr
4926 {
4927 public:
Expr_GeodistAttrConst_c(Geofunc_fn pFunc,float fOut,CSphAttrLocator tLat,CSphAttrLocator tLon,float fAnchorLat,float fAnchorLon,int iLat,int iLon)4928 	Expr_GeodistAttrConst_c ( Geofunc_fn pFunc, float fOut, CSphAttrLocator tLat, CSphAttrLocator tLon, float fAnchorLat, float fAnchorLon, int iLat, int iLon )
4929 		: m_pFunc ( pFunc )
4930 		, m_fOut ( fOut )
4931 		, m_tLat ( tLat )
4932 		, m_tLon ( tLon )
4933 		, m_fAnchorLat ( fAnchorLat )
4934 		, m_fAnchorLon ( fAnchorLon )
4935 		, m_iLat ( iLat )
4936 		, m_iLon ( iLon )
4937 	{}
4938 
Eval(const CSphMatch & tMatch) const4939 	virtual float Eval ( const CSphMatch & tMatch ) const
4940 	{
4941 		return m_fOut*m_pFunc ( tMatch.GetAttrFloat ( m_tLat ), tMatch.GetAttrFloat ( m_tLon ), m_fAnchorLat, m_fAnchorLon );
4942 	}
4943 
Command(ESphExprCommand eCmd,void * pArg)4944 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4945 	{
4946 		if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4947 		{
4948 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLat );
4949 			static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLon );
4950 		}
4951 	}
4952 
4953 private:
4954 	Geofunc_fn		m_pFunc;
4955 	float			m_fOut;
4956 	CSphAttrLocator	m_tLat;
4957 	CSphAttrLocator	m_tLon;
4958 	float			m_fAnchorLat;
4959 	float			m_fAnchorLon;
4960 	int				m_iLat;
4961 	int				m_iLon;
4962 };
4963 
4964 /// geodist() - expr point, constant anchor
4965 class Expr_GeodistConst_c: public ISphExpr
4966 {
4967 public:
Expr_GeodistConst_c(Geofunc_fn pFunc,float fOut,ISphExpr * pLat,ISphExpr * pLon,float fAnchorLat,float fAnchorLon)4968 	Expr_GeodistConst_c ( Geofunc_fn pFunc, float fOut, ISphExpr * pLat, ISphExpr * pLon, float fAnchorLat, float fAnchorLon )
4969 		: m_pFunc ( pFunc )
4970 		, m_fOut ( fOut )
4971 		, m_pLat ( pLat )
4972 		, m_pLon ( pLon )
4973 		, m_fAnchorLat ( fAnchorLat )
4974 		, m_fAnchorLon ( fAnchorLon )
4975 	{}
4976 
~Expr_GeodistConst_c()4977 	~Expr_GeodistConst_c ()
4978 	{
4979 		SafeRelease ( m_pLon );
4980 		SafeRelease ( m_pLat );
4981 	}
4982 
Eval(const CSphMatch & tMatch) const4983 	virtual float Eval ( const CSphMatch & tMatch ) const
4984 	{
4985 		return m_fOut*m_pFunc ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_fAnchorLat, m_fAnchorLon );
4986 	}
4987 
Command(ESphExprCommand eCmd,void * pArg)4988 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
4989 	{
4990 		m_pLat->Command ( eCmd, pArg );
4991 		m_pLon->Command ( eCmd, pArg );
4992 	}
4993 
4994 private:
4995 	Geofunc_fn	m_pFunc;
4996 	float		m_fOut;
4997 	ISphExpr *	m_pLat;
4998 	ISphExpr *	m_pLon;
4999 	float		m_fAnchorLat;
5000 	float		m_fAnchorLon;
5001 };
5002 
5003 /// geodist() - expr point, expr anchor
5004 class Expr_Geodist_c: public ISphExpr
5005 {
5006 public:
Expr_Geodist_c(Geofunc_fn pFunc,float fOut,ISphExpr * pLat,ISphExpr * pLon,ISphExpr * pAnchorLat,ISphExpr * pAnchorLon)5007 	Expr_Geodist_c ( Geofunc_fn pFunc, float fOut, ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pAnchorLat, ISphExpr * pAnchorLon )
5008 		: m_pFunc ( pFunc )
5009 		, m_fOut ( fOut )
5010 		, m_pLat ( pLat )
5011 		, m_pLon ( pLon )
5012 		, m_pAnchorLat ( pAnchorLat )
5013 		, m_pAnchorLon ( pAnchorLon )
5014 	{}
5015 
~Expr_Geodist_c()5016 	~Expr_Geodist_c ()
5017 	{
5018 		SafeRelease ( m_pAnchorLon );
5019 		SafeRelease ( m_pAnchorLat );
5020 		SafeRelease ( m_pLon );
5021 		SafeRelease ( m_pLat );
5022 	}
5023 
Eval(const CSphMatch & tMatch) const5024 	virtual float Eval ( const CSphMatch & tMatch ) const
5025 	{
5026 		return m_fOut*m_pFunc ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_pAnchorLat->Eval(tMatch), m_pAnchorLon->Eval(tMatch) );
5027 	}
5028 
Command(ESphExprCommand eCmd,void * pArg)5029 	virtual void Command ( ESphExprCommand eCmd, void * pArg )
5030 	{
5031 		m_pLat->Command ( eCmd, pArg );
5032 		m_pLon->Command ( eCmd, pArg );
5033 		m_pAnchorLat->Command ( eCmd, pArg );
5034 		m_pAnchorLon->Command ( eCmd, pArg );
5035 	}
5036 
5037 private:
5038 	Geofunc_fn	m_pFunc;
5039 	float		m_fOut;
5040 	ISphExpr *	m_pLat;
5041 	ISphExpr *	m_pLon;
5042 	ISphExpr *	m_pAnchorLat;
5043 	ISphExpr *	m_pAnchorLon;
5044 };
5045 
5046 //////////////////////////////////////////////////////////////////////////
5047 
5048 struct GatherArgTypes_t : ISphNoncopyable
5049 {
5050 	CSphVector<int> & m_dTypes;
GatherArgTypes_tGatherArgTypes_t5051 	explicit GatherArgTypes_t ( CSphVector<int> & dTypes )
5052 		: m_dTypes ( dTypes )
5053 	{}
CollectGatherArgTypes_t5054 	void Collect ( int , const ExprNode_t & tNode )
5055 	{
5056 		m_dTypes.Add ( tNode.m_iToken );
5057 	}
5058 };
5059 
GatherArgTypes(int iNode,CSphVector<int> & dTypes)5060 void ExprParser_t::GatherArgTypes ( int iNode, CSphVector<int> & dTypes )
5061 {
5062 	GatherArgTypes_t tCollector ( dTypes );
5063 	GatherArgT ( iNode, tCollector );
5064 }
5065 
5066 struct GatherArgNodes_t : ISphNoncopyable
5067 {
5068 	CSphVector<int> & m_dNodes;
GatherArgNodes_tGatherArgNodes_t5069 	explicit GatherArgNodes_t ( CSphVector<int> & dNodes )
5070 		: m_dNodes ( dNodes )
5071 	{}
CollectGatherArgNodes_t5072 	void Collect ( int iNode, const ExprNode_t & )
5073 	{
5074 		m_dNodes.Add ( iNode );
5075 	}
5076 };
5077 
GatherArgNodes(int iNode,CSphVector<int> & dNodes)5078 void ExprParser_t::GatherArgNodes ( int iNode, CSphVector<int> & dNodes )
5079 {
5080 	GatherArgNodes_t tCollector ( dNodes );
5081 	GatherArgT ( iNode, tCollector );
5082 }
5083 
5084 struct GatherArgReturnTypes_t : ISphNoncopyable
5085 {
5086 	CSphVector<ESphAttr> & m_dTypes;
GatherArgReturnTypes_tGatherArgReturnTypes_t5087 	explicit GatherArgReturnTypes_t ( CSphVector<ESphAttr> & dTypes )
5088 		: m_dTypes ( dTypes )
5089 	{}
CollectGatherArgReturnTypes_t5090 	void Collect ( int , const ExprNode_t & tNode )
5091 	{
5092 		m_dTypes.Add ( tNode.m_eRetType );
5093 	}
5094 };
5095 
GatherArgRetTypes(int iNode,CSphVector<ESphAttr> & dTypes)5096 void ExprParser_t::GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes )
5097 {
5098 	GatherArgReturnTypes_t tCollector ( dTypes );
5099 	GatherArgT ( iNode, tCollector );
5100 }
5101 
5102 template < typename T >
GatherArgT(int iNode,T & FUNCTOR)5103 void ExprParser_t::GatherArgT ( int iNode, T & FUNCTOR )
5104 {
5105 	if ( iNode<0 )
5106 		return;
5107 
5108 	m_dGatherStack.Resize ( 0 );
5109 	StackNode_t & tInitial = m_dGatherStack.Add();
5110 	const ExprNode_t & tNode = m_dNodes[iNode];
5111 	tInitial.m_iNode = iNode;
5112 	tInitial.m_iLeft = tNode.m_iLeft;
5113 	tInitial.m_iRight = tNode.m_iRight;
5114 
5115 	while ( m_dGatherStack.GetLength()>0 )
5116 	{
5117 		StackNode_t & tCur = m_dGatherStack.Last();
5118 		const ExprNode_t & tNode = m_dNodes[tCur.m_iNode];
5119 		if ( tNode.m_iToken!=',' )
5120 		{
5121 			FUNCTOR.Collect ( tCur.m_iNode, tNode );
5122 			m_dGatherStack.Pop();
5123 			continue;
5124 		}
5125 		if ( tCur.m_iLeft==-1 && tCur.m_iRight==-1 )
5126 		{
5127 			m_dGatherStack.Pop();
5128 			continue;
5129 		}
5130 
5131 		int iChild = -1;
5132 		if ( tCur.m_iLeft>=0 )
5133 		{
5134 			iChild = tCur.m_iLeft;
5135 			tCur.m_iLeft = -1;
5136 		} else if ( tCur.m_iRight>=0 )
5137 		{
5138 			iChild = tCur.m_iRight;
5139 			tCur.m_iRight = -1;
5140 		}
5141 
5142 		assert ( iChild>=0 );
5143 		const ExprNode_t & tChild = m_dNodes[iChild];
5144 		StackNode_t & tNext = m_dGatherStack.Add();
5145 		tNext.m_iNode = iChild;
5146 		tNext.m_iLeft = tChild.m_iLeft;
5147 		tNext.m_iRight = tChild.m_iRight;
5148 	}
5149 }
5150 
CheckForConstSet(int iArgsNode,int iSkip)5151 bool ExprParser_t::CheckForConstSet ( int iArgsNode, int iSkip )
5152 {
5153 	CSphVector<int> dTypes;
5154 	GatherArgTypes ( iArgsNode, dTypes );
5155 
5156 	for ( int i=iSkip; i<dTypes.GetLength(); i++ )
5157 		if ( dTypes[i]!=TOK_CONST_INT && dTypes[i]!=TOK_CONST_FLOAT && dTypes[i]!=TOK_MAP_ARG )
5158 			return false;
5159 	return true;
5160 }
5161 
5162 
5163 template < typename T >
WalkTree(int iRoot,T & FUNCTOR)5164 void ExprParser_t::WalkTree ( int iRoot, T & FUNCTOR )
5165 {
5166 	if ( iRoot>=0 )
5167 	{
5168 		const ExprNode_t & tNode = m_dNodes[iRoot];
5169 		FUNCTOR.Enter ( tNode, m_dNodes );
5170 		WalkTree ( tNode.m_iLeft, FUNCTOR );
5171 		WalkTree ( tNode.m_iRight, FUNCTOR );
5172 		FUNCTOR.Exit ( tNode );
5173 	}
5174 }
5175 
5176 
CreateIntervalNode(int iArgsNode,CSphVector<ISphExpr * > & dArgs)5177 ISphExpr * ExprParser_t::CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
5178 {
5179 	assert ( dArgs.GetLength()>=2 );
5180 
5181 	CSphVector<ESphAttr> dTypes;
5182 	GatherArgRetTypes ( iArgsNode, dTypes );
5183 
5184 	// force type conversion, where possible
5185 	if ( dTypes[0]==SPH_ATTR_JSON_FIELD )
5186 		dArgs[0] = new Expr_JsonFieldConv_c ( dArgs[0] );
5187 
5188 	bool bConst = CheckForConstSet ( iArgsNode, 1 );
5189 	ESphAttr eAttrType = m_dNodes[iArgsNode].m_eArgType;
5190 	if ( bConst )
5191 	{
5192 		switch ( eAttrType )
5193 		{
5194 			case SPH_ATTR_INTEGER:	return new Expr_IntervalConst_c<int> ( dArgs ); break;
5195 			case SPH_ATTR_BIGINT:	return new Expr_IntervalConst_c<int64_t> ( dArgs ); break;
5196 			default:				return new Expr_IntervalConst_c<float> ( dArgs ); break;
5197 		}
5198 	} else
5199 	{
5200 		switch ( eAttrType )
5201 		{
5202 			case SPH_ATTR_INTEGER:	return new Expr_Interval_c<int> ( dArgs ); break;
5203 			case SPH_ATTR_BIGINT:	return new Expr_Interval_c<int64_t> ( dArgs ); break;
5204 			default:				return new Expr_Interval_c<float> ( dArgs ); break;
5205 		}
5206 	}
5207 #if !USE_WINDOWS
5208 	return NULL;
5209 #endif
5210 }
5211 
5212 
CreateInNode(int iNode)5213 ISphExpr * ExprParser_t::CreateInNode ( int iNode )
5214 {
5215 	const ExprNode_t & tLeft = m_dNodes[m_dNodes[iNode].m_iLeft];
5216 	const ExprNode_t & tRight = m_dNodes[m_dNodes[iNode].m_iRight];
5217 
5218 	switch ( tRight.m_iToken )
5219 	{
5220 		// create IN(arg,constlist)
5221 		case TOK_CONST_LIST:
5222 			switch ( tLeft.m_iToken )
5223 			{
5224 				case TOK_ATTR_MVA32:
5225 					return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
5226 				case TOK_ATTR_MVA64:
5227 					return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
5228 				case TOK_ATTR_STRING:
5229 					return new Expr_StrIn_c ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL, m_eCollation );
5230 				case TOK_ATTR_JSON:
5231 					return new Expr_JsonFieldIn_c ( tRight.m_pConsts, NULL, CreateTree ( m_dNodes [ iNode ].m_iLeft ) );
5232 				default:
5233 				{
5234 					ISphExpr * pArg = CreateTree ( m_dNodes[iNode].m_iLeft );
5235 					switch ( WidestType ( tLeft.m_eRetType, tRight.m_pConsts->m_eRetType ) )
5236 					{
5237 						case SPH_ATTR_INTEGER:	return new Expr_In_c<int> ( pArg, tRight.m_pConsts ); break;
5238 						case SPH_ATTR_BIGINT:	return new Expr_In_c<int64_t> ( pArg, tRight.m_pConsts ); break;
5239 						default:				return new Expr_In_c<float> ( pArg, tRight.m_pConsts ); break;
5240 					}
5241 				}
5242 			}
5243 			break;
5244 
5245 		// create IN(arg,uservar)
5246 		case TOK_USERVAR:
5247 		{
5248 			if ( !g_pUservarsHook )
5249 			{
5250 				m_sCreateError.SetSprintf ( "internal error: no uservars hook" );
5251 				return NULL;
5252 			}
5253 
5254 			UservarIntSet_c * pUservar = g_pUservarsHook ( m_dUservars[(int)tRight.m_iConst] );
5255 			if ( !pUservar )
5256 			{
5257 				m_sCreateError.SetSprintf ( "undefined user variable '%s'", m_dUservars[(int)tRight.m_iConst].cstr() );
5258 				return NULL;
5259 			}
5260 
5261 			switch ( tLeft.m_iToken )
5262 			{
5263 				case TOK_ATTR_MVA32:
5264 					return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
5265 				case TOK_ATTR_MVA64:
5266 					return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
5267 				case TOK_ATTR_STRING:
5268 					return new Expr_StrIn_c ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar, m_eCollation );
5269 				case TOK_ATTR_JSON:
5270 					return new Expr_JsonFieldIn_c ( NULL, pUservar, CreateTree ( m_dNodes[iNode].m_iLeft ) );
5271 				default:
5272 					return new Expr_InUservar_c ( CreateTree ( m_dNodes[iNode].m_iLeft ), pUservar );
5273 			}
5274 			break;
5275 		}
5276 
5277 		// oops, unhandled case
5278 		default:
5279 			m_sCreateError = "IN() arguments must be constants (except the 1st one)";
5280 			return NULL;
5281 	}
5282 }
5283 
5284 
CreateLengthNode(const ExprNode_t & tNode,ISphExpr * pLeft)5285 ISphExpr * ExprParser_t::CreateLengthNode ( const ExprNode_t & tNode, ISphExpr * pLeft )
5286 {
5287 	const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
5288 	switch ( tLeft.m_iToken )
5289 	{
5290 		case TOK_ATTR_MVA32:
5291 		case TOK_ATTR_MVA64:
5292 			return new Expr_MVALength_c ( tLeft.m_tLocator, tLeft.m_iLocator, tLeft.m_iToken==TOK_ATTR_MVA64 );
5293 		case TOK_ATTR_JSON:
5294 			return new Expr_JsonFieldLength_c ( pLeft );
5295 		default:
5296 			m_sCreateError = "LENGTH() argument must be MVA or JSON field";
5297 			return NULL;
5298 	}
5299 }
5300 
5301 
CreateGeodistNode(int iArgs)5302 ISphExpr * ExprParser_t::CreateGeodistNode ( int iArgs )
5303 {
5304 	CSphVector<int> dArgs;
5305 	GatherArgNodes ( iArgs, dArgs );
5306 	assert ( dArgs.GetLength()==4 || dArgs.GetLength()==5 );
5307 
5308 	float fOut = 1.0f; // result scale, defaults to out=meters
5309 	bool bDeg = false; // arg units, defaults to in=radians
5310 	GeoFunc_e eMethod = GEO_ADAPTIVE; // geodist function to use, defaults to adaptive
5311 
5312 	if ( dArgs.GetLength()==5 )
5313 	{
5314 		assert ( m_dNodes [ dArgs[4] ].m_eRetType==SPH_ATTR_MAPARG );
5315 		CSphVector<CSphNamedVariant> & dOpts = m_dNodes [ dArgs[4] ].m_pMapArg->m_dPairs;
5316 
5317 		// FIXME! handle errors in options somehow?
5318 		ARRAY_FOREACH ( i, dOpts )
5319 		{
5320 			const CSphNamedVariant & t = dOpts[i];
5321 			if ( t.m_sKey=="in" )
5322 			{
5323 				if ( t.m_sValue=="deg" || t.m_sValue=="degrees" )
5324 					bDeg = true;
5325 				else if ( t.m_sValue=="rad" || t.m_sValue=="radians" )
5326 					bDeg = false;
5327 
5328 			} else if ( t.m_sKey=="out" )
5329 			{
5330 				if ( t.m_sValue=="km" || t.m_sValue=="kilometers" )
5331 					fOut = 1.0f / 1000.0f;
5332 				else if ( t.m_sValue=="mi" || t.m_sValue=="miles" )
5333 					fOut = 1.0f / 1609.34f;
5334 				else if ( t.m_sValue=="ft" || t.m_sValue=="feet" )
5335 					fOut = 1.0f / 0.3048f;
5336 				else if ( t.m_sValue=="m" || t.m_sValue=="meters" )
5337 					fOut = 1.0f;
5338 			} else if ( t.m_sKey=="method" )
5339 			{
5340 				if ( t.m_sValue=="haversine" )
5341 					eMethod = GEO_HAVERSINE;
5342 				else if ( t.m_sValue=="adaptive" )
5343 					eMethod = GEO_ADAPTIVE;
5344 			}
5345 		}
5346 	}
5347 
5348 	bool bConst1 = ( IsConst ( &m_dNodes[dArgs[0]] ) && IsConst ( &m_dNodes[dArgs[1]] ) );
5349 	bool bConst2 = ( IsConst ( &m_dNodes[dArgs[2]] ) && IsConst ( &m_dNodes[dArgs[3]] ) );
5350 
5351 	if ( bConst1 && bConst2 )
5352 	{
5353 		float t[4];
5354 		for ( int i=0; i<4; i++ )
5355 			t[i] = FloatVal ( &m_dNodes[dArgs[i]] );
5356 		return new Expr_GetConst_c ( fOut*Geodist ( eMethod, bDeg, t[0], t[1], t[2], t[3] ) );
5357 	}
5358 
5359 	if ( bConst1 )
5360 	{
5361 		Swap ( dArgs[0], dArgs[2] );
5362 		Swap ( dArgs[1], dArgs[3] );
5363 		Swap ( bConst1, bConst2 );
5364 	}
5365 
5366 	if ( bConst2 )
5367 	{
5368 		// constant anchor
5369 		if ( m_dNodes[dArgs[0]].m_iToken==TOK_ATTR_FLOAT && m_dNodes[dArgs[1]].m_iToken==TOK_ATTR_FLOAT )
5370 		{
5371 			// attr point
5372 			return new Expr_GeodistAttrConst_c ( GeodistFn ( eMethod, bDeg ), fOut,
5373 				m_dNodes[dArgs[0]].m_tLocator, m_dNodes[dArgs[1]].m_tLocator,
5374 				FloatVal ( &m_dNodes[dArgs[2]] ), FloatVal ( &m_dNodes[dArgs[3]] ),
5375 				m_dNodes[dArgs[0]].m_iLocator, m_dNodes[dArgs[1]].m_iLocator );
5376 		} else
5377 		{
5378 			// expr point
5379 			return new Expr_GeodistConst_c ( GeodistFn ( eMethod, bDeg ), fOut,
5380 				CreateTree ( dArgs[0] ), CreateTree ( dArgs[1] ),
5381 				FloatVal ( &m_dNodes[dArgs[2]] ), FloatVal ( &m_dNodes[dArgs[3]] ) );
5382 		}
5383 	}
5384 
5385 	// four expressions
5386 	CSphVector<ISphExpr *> dExpr;
5387 	FoldArglist ( CreateTree ( iArgs ), dExpr );
5388 	assert ( dExpr.GetLength()==4 );
5389 	return new Expr_Geodist_c ( GeodistFn ( eMethod, bDeg ), fOut, dExpr[0], dExpr[1], dExpr[2], dExpr[3] );
5390 }
5391 
5392 
CreatePFNode(int iArg)5393 ISphExpr * ExprParser_t::CreatePFNode ( int iArg )
5394 {
5395 	m_eEvalStage = SPH_EVAL_FINAL;
5396 
5397 	DWORD uNodeFactorFlags = SPH_FACTOR_ENABLE | SPH_FACTOR_CALC_ATC;
5398 
5399 	CSphVector<int> dArgs;
5400 	GatherArgNodes ( iArg, dArgs );
5401 	assert ( dArgs.GetLength()==0 || dArgs.GetLength()==1 );
5402 
5403 	bool bNoATC = false;
5404 	bool bJsonOut = false;
5405 
5406 	if ( dArgs.GetLength()==1 )
5407 	{
5408 		assert ( m_dNodes[dArgs[0]].m_eRetType==SPH_ATTR_MAPARG );
5409 		CSphVector<CSphNamedVariant> & dOpts = m_dNodes[dArgs[0]].m_pMapArg->m_dPairs;
5410 
5411 		ARRAY_FOREACH ( i, dOpts )
5412 		{
5413 			if ( dOpts[i].m_sKey=="no_atc" && dOpts[i].m_iValue>0)
5414 				bNoATC = true;
5415 			else if ( dOpts[i].m_sKey=="json" && dOpts[i].m_iValue>0 )
5416 				bJsonOut = true;
5417 		}
5418 	}
5419 
5420 	if ( bNoATC )
5421 		uNodeFactorFlags &= ~SPH_FACTOR_CALC_ATC;
5422 	if ( bJsonOut )
5423 		uNodeFactorFlags |= SPH_FACTOR_JSON_OUT;
5424 
5425 	m_uPackedFactorFlags |= uNodeFactorFlags;
5426 
5427 	return new Expr_GetPackedFactors_c();
5428 }
5429 
5430 
5431 
CreateBitdotNode(int iArgsNode,CSphVector<ISphExpr * > & dArgs)5432 ISphExpr * ExprParser_t::CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
5433 {
5434 	assert ( dArgs.GetLength()>=1 );
5435 
5436 	ESphAttr eAttrType = m_dNodes[iArgsNode].m_eRetType;
5437 	switch ( eAttrType )
5438 	{
5439 		case SPH_ATTR_INTEGER:	return new Expr_Bitdot_c<int> ( dArgs ); break;
5440 		case SPH_ATTR_BIGINT:	return new Expr_Bitdot_c<int64_t> ( dArgs ); break;
5441 		default:				return new Expr_Bitdot_c<float> ( dArgs ); break;
5442 	}
5443 }
5444 
5445 
CreateAggregateNode(const ExprNode_t & tNode,ESphAggrFunc eFunc,ISphExpr * pLeft)5446 ISphExpr * ExprParser_t::CreateAggregateNode ( const ExprNode_t & tNode, ESphAggrFunc eFunc, ISphExpr * pLeft )
5447 {
5448 	const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
5449 	switch ( tLeft.m_iToken )
5450 	{
5451 		case TOK_ATTR_JSON:		return new Expr_JsonFieldAggr_c ( pLeft, eFunc );
5452 		case TOK_ATTR_MVA32:	return new Expr_MVAAggr_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
5453 		case TOK_ATTR_MVA64:	return new Expr_MVAAggr_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
5454 		default:				return NULL;
5455 	}
5456 }
5457 
5458 
FixupIterators(int iNode,const char * sKey,SphAttr_t * pAttr)5459 void ExprParser_t::FixupIterators ( int iNode, const char * sKey, SphAttr_t * pAttr )
5460 {
5461 	if ( iNode==-1 )
5462 		return;
5463 
5464 	ExprNode_t & tNode = m_dNodes[iNode];
5465 
5466 	if ( tNode.m_iToken==TOK_IDENT && !strcmp ( sKey, tNode.m_sIdent ) )
5467 	{
5468 		tNode.m_iToken = TOK_ITERATOR;
5469 		tNode.m_pAttr = pAttr;
5470 	}
5471 
5472 	FixupIterators ( tNode.m_iLeft, sKey, pAttr );
5473 	FixupIterators ( tNode.m_iRight, sKey, pAttr );
5474 }
5475 
5476 
CreateForInNode(int iNode)5477 ISphExpr * ExprParser_t::CreateForInNode ( int iNode )
5478 {
5479 	ExprNode_t & tNode = m_dNodes[iNode];
5480 
5481 	int iFunc = tNode.m_iFunc;
5482 	int iExprNode = tNode.m_iLeft;
5483 	int iNameNode = tNode.m_iRight;
5484 	int iDataNode = m_dNodes[iNameNode].m_iLeft;
5485 
5486 	Expr_ForIn_c * pFunc = new Expr_ForIn_c ( CreateTree ( iDataNode ), iFunc==FUNC_ALL, iFunc==FUNC_INDEXOF );
5487 
5488 	FixupIterators ( iExprNode, m_dNodes[iNameNode].m_sIdent, pFunc->GetRef() );
5489 	pFunc->SetExpr ( CreateTree ( iExprNode ) );
5490 
5491 	return pFunc;
5492 }
5493 
5494 //////////////////////////////////////////////////////////////////////////
5495 
yylex(YYSTYPE * lvalp,ExprParser_t * pParser)5496 int yylex ( YYSTYPE * lvalp, ExprParser_t * pParser )
5497 {
5498 	return pParser->GetToken ( lvalp );
5499 }
5500 
yyerror(ExprParser_t * pParser,const char * sMessage)5501 void yyerror ( ExprParser_t * pParser, const char * sMessage )
5502 {
5503 	pParser->m_sParserError.SetSprintf ( "Sphinx expr: %s near '%s'", sMessage, pParser->m_pLastTokenStart );
5504 }
5505 
5506 #if USE_WINDOWS
5507 #pragma warning(push,1)
5508 #endif
5509 
5510 #ifdef CMAKE_GENERATED_GRAMMAR
5511 	#include "bissphinxexpr.c"
5512 #else
5513 	#include "yysphinxexpr.c"
5514 #endif
5515 
5516 #if USE_WINDOWS
5517 #pragma warning(pop)
5518 #endif
5519 
5520 //////////////////////////////////////////////////////////////////////////
5521 
~ExprParser_t()5522 ExprParser_t::~ExprParser_t ()
5523 {
5524 	// i kinda own those things
5525 	ARRAY_FOREACH ( i, m_dNodes )
5526 	{
5527 		if ( m_dNodes[i].m_iToken==TOK_CONST_LIST )
5528 			SafeDelete ( m_dNodes[i].m_pConsts );
5529 		if ( m_dNodes[i].m_iToken==TOK_MAP_ARG )
5530 			SafeDelete ( m_dNodes[i].m_pMapArg );
5531 	}
5532 
5533 	// free any UDF calls that weren't taken over
5534 	ARRAY_FOREACH ( i, m_dUdfCalls )
5535 		SafeDelete ( m_dUdfCalls[i] );
5536 
5537 	// free temp map arguments storage
5538 	ARRAY_FOREACH ( i, m_dIdents )
5539 		SafeDeleteArray ( m_dIdents[i] );
5540 }
5541 
GetWidestRet(int iLeft,int iRight)5542 ESphAttr ExprParser_t::GetWidestRet ( int iLeft, int iRight )
5543 {
5544 	ESphAttr uLeftType = ( iLeft<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iLeft].m_eRetType;
5545 	ESphAttr uRightType = ( iRight<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iRight].m_eRetType;
5546 
5547 	ESphAttr uRes = SPH_ATTR_FLOAT; // default is float
5548 	if ( ( uLeftType==SPH_ATTR_INTEGER || uLeftType==SPH_ATTR_BIGINT ) &&
5549 		( uRightType==SPH_ATTR_INTEGER || uRightType==SPH_ATTR_BIGINT ) )
5550 	{
5551 		// both types are integer (int32 or int64), compute in integers
5552 		uRes = ( uLeftType==SPH_ATTR_INTEGER && uRightType==SPH_ATTR_INTEGER )
5553 			? SPH_ATTR_INTEGER
5554 			: SPH_ATTR_BIGINT;
5555 	}
5556 
5557 	// if json vs numeric then return numeric type (for the autoconversion)
5558 	if ( uLeftType==SPH_ATTR_JSON_FIELD && IsNumeric ( uRightType ) )
5559 		uRes = uRightType;
5560 	else if ( uRightType==SPH_ATTR_JSON_FIELD && IsNumeric ( uLeftType ) )
5561 		uRes = uLeftType;
5562 
5563 	return uRes;
5564 }
5565 
AddNodeInt(int64_t iValue)5566 int ExprParser_t::AddNodeInt ( int64_t iValue )
5567 {
5568 	ExprNode_t & tNode = m_dNodes.Add ();
5569 	tNode.m_iToken = TOK_CONST_INT;
5570 	tNode.m_eRetType = GetIntType ( iValue );
5571 	tNode.m_iConst = iValue;
5572 	return m_dNodes.GetLength()-1;
5573 }
5574 
AddNodeFloat(float fValue)5575 int ExprParser_t::AddNodeFloat ( float fValue )
5576 {
5577 	ExprNode_t & tNode = m_dNodes.Add ();
5578 	tNode.m_iToken = TOK_CONST_FLOAT;
5579 	tNode.m_eRetType = SPH_ATTR_FLOAT;
5580 	tNode.m_fConst = fValue;
5581 	return m_dNodes.GetLength()-1;
5582 }
5583 
AddNodeString(int64_t iValue)5584 int ExprParser_t::AddNodeString ( int64_t iValue )
5585 {
5586 	ExprNode_t & tNode = m_dNodes.Add ();
5587 	tNode.m_iToken = TOK_CONST_STRING;
5588 	tNode.m_eRetType = SPH_ATTR_STRING;
5589 	tNode.m_iConst = iValue;
5590 	return m_dNodes.GetLength()-1;
5591 }
5592 
AddNodeAttr(int iTokenType,uint64_t uAttrLocator)5593 int ExprParser_t::AddNodeAttr ( int iTokenType, uint64_t uAttrLocator )
5594 {
5595 	assert ( iTokenType==TOK_ATTR_INT || iTokenType==TOK_ATTR_BITS || iTokenType==TOK_ATTR_FLOAT
5596 		|| iTokenType==TOK_ATTR_MVA32 || iTokenType==TOK_ATTR_MVA64 || iTokenType==TOK_ATTR_STRING
5597 		|| iTokenType==TOK_ATTR_FACTORS || iTokenType==TOK_ATTR_JSON );
5598 	ExprNode_t & tNode = m_dNodes.Add ();
5599 	tNode.m_iToken = iTokenType;
5600 	sphUnpackAttrLocator ( uAttrLocator, &tNode );
5601 
5602 	if ( iTokenType==TOK_ATTR_FLOAT )			tNode.m_eRetType = SPH_ATTR_FLOAT;
5603 	else if ( iTokenType==TOK_ATTR_MVA32 )		tNode.m_eRetType = SPH_ATTR_UINT32SET;
5604 	else if ( iTokenType==TOK_ATTR_MVA64 )		tNode.m_eRetType = SPH_ATTR_INT64SET;
5605 	else if ( iTokenType==TOK_ATTR_STRING )		tNode.m_eRetType = SPH_ATTR_STRING;
5606 	else if ( iTokenType==TOK_ATTR_FACTORS )	tNode.m_eRetType = SPH_ATTR_FACTORS;
5607 	else if ( iTokenType==TOK_ATTR_JSON )		tNode.m_eRetType = SPH_ATTR_JSON_FIELD;
5608 	else if ( tNode.m_tLocator.m_iBitCount>32 )	tNode.m_eRetType = SPH_ATTR_BIGINT;
5609 	else										tNode.m_eRetType = SPH_ATTR_INTEGER;
5610 	return m_dNodes.GetLength()-1;
5611 }
5612 
AddNodeID()5613 int ExprParser_t::AddNodeID ()
5614 {
5615 	ExprNode_t & tNode = m_dNodes.Add ();
5616 	tNode.m_iToken = TOK_ID;
5617 	tNode.m_eRetType = USE_64BIT ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER;
5618 	return m_dNodes.GetLength()-1;
5619 }
5620 
AddNodeWeight()5621 int ExprParser_t::AddNodeWeight ()
5622 {
5623 	ExprNode_t & tNode = m_dNodes.Add ();
5624 	tNode.m_iToken = TOK_WEIGHT;
5625 	tNode.m_eRetType = SPH_ATTR_INTEGER;
5626 	return m_dNodes.GetLength()-1;
5627 }
5628 
AddNodeOp(int iOp,int iLeft,int iRight)5629 int ExprParser_t::AddNodeOp ( int iOp, int iLeft, int iRight )
5630 {
5631 	ExprNode_t & tNode = m_dNodes.Add ();
5632 	tNode.m_iToken = iOp;
5633 
5634 	// deduce type
5635 	tNode.m_eRetType = SPH_ATTR_FLOAT; // default to float
5636 	if ( iOp==TOK_NEG )
5637 	{
5638 		// NEG just inherits the type
5639 		tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
5640 		tNode.m_eRetType = tNode.m_eArgType;
5641 
5642 	} else if ( iOp==TOK_NOT )
5643 	{
5644 		// NOT result is integer, and its argument must be integer
5645 		tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
5646 		tNode.m_eRetType = SPH_ATTR_INTEGER;
5647 		if (!( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
5648 		{
5649 			m_sParserError.SetSprintf ( "NOT argument must be integer" );
5650 			return -1;
5651 		}
5652 
5653 	} else if ( iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE
5654 		|| iOp=='<' || iOp=='>' || iOp==TOK_AND || iOp==TOK_OR
5655 		|| iOp=='+' || iOp=='-' || iOp=='*' || iOp==','
5656 		|| iOp=='&' || iOp=='|' || iOp=='%'
5657 		|| iOp==TOK_IS_NULL || iOp==TOK_IS_NOT_NULL )
5658 	{
5659 		tNode.m_eArgType = GetWidestRet ( iLeft, iRight );
5660 
5661 		// arithmetical operations return arg type, logical return int
5662 		tNode.m_eRetType = ( iOp=='+' || iOp=='-' || iOp=='*' || iOp==',' || iOp=='&' || iOp=='|' || iOp=='%' )
5663 			? tNode.m_eArgType
5664 			: SPH_ATTR_INTEGER;
5665 
5666 		// both logical and bitwise AND/OR can only be over ints
5667 		if ( ( iOp==TOK_AND || iOp==TOK_OR || iOp=='&' || iOp=='|' )
5668 			&& !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
5669 		{
5670 			m_sParserError.SetSprintf ( "%s arguments must be integer", ( iOp==TOK_AND || iOp=='&' ) ? "AND" : "OR" );
5671 			return -1;
5672 		}
5673 
5674 		// MOD can only be over ints
5675 		if ( iOp=='%'
5676 			&& !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
5677 		{
5678 			m_sParserError.SetSprintf ( "MOD arguments must be integer" );
5679 			return -1;
5680 		}
5681 
5682 	} else
5683 	{
5684 		// check for unknown op
5685 		assert ( iOp=='/' && "unknown op in AddNodeOp() type deducer" );
5686 	}
5687 
5688 	tNode.m_iArgs = 0;
5689 	if ( iOp==',' )
5690 	{
5691 		if ( iLeft>=0 )		tNode.m_iArgs += ( m_dNodes[iLeft].m_iToken==',' ) ? m_dNodes[iLeft].m_iArgs : 1;
5692 		if ( iRight>=0 )	tNode.m_iArgs += ( m_dNodes[iRight].m_iToken==',' ) ? m_dNodes[iRight].m_iArgs : 1;
5693 	}
5694 
5695 	// argument type conversion for functions like INDEXOF(), ALL() and ANY()
5696 	// we need no conversion for operands of comma!
5697 	if ( iOp!=',' && iLeft>=0 && iRight>=0 )
5698 	{
5699 		if ( m_dNodes[iRight].m_eRetType==SPH_ATTR_STRING && m_dNodes[iLeft].m_iToken==TOK_IDENT )
5700 			m_dNodes[iLeft].m_eRetType = SPH_ATTR_STRING;
5701 		else if ( m_dNodes[iLeft].m_eRetType==SPH_ATTR_STRING && m_dNodes[iRight].m_iToken==TOK_IDENT )
5702 			m_dNodes[iRight].m_eRetType = SPH_ATTR_STRING;
5703 	}
5704 
5705 	tNode.m_iLeft = iLeft;
5706 	tNode.m_iRight = iRight;
5707 	return m_dNodes.GetLength()-1;
5708 }
5709 
5710 
AddNodeFunc(int iFunc,int iFirst,int iSecond,int iThird,int iFourth)5711 int ExprParser_t::AddNodeFunc ( int iFunc, int iFirst, int iSecond, int iThird, int iFourth )
5712 {
5713 	// regular case, iFirst is entire arglist, iSecond is -1
5714 	// special case for IN(), iFirst is arg, iSecond is constlist
5715 	// special case for REMAP(), iFirst and iSecond are expressions, iThird and iFourth are constlists
5716 	assert ( iFunc>=0 && iFunc< int ( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0]) ) );
5717 	Func_e eFunc = (Func_e)iFunc;
5718 	assert ( g_dFuncs [ iFunc ].m_eFunc==eFunc );
5719 	const char * sFuncName = g_dFuncs [ iFunc ].m_sName;
5720 
5721 	// check args count
5722 	if ( iSecond<0 || eFunc==FUNC_IN )
5723 	{
5724 		int iExpectedArgc = g_dFuncs [ iFunc ].m_iArgs;
5725 		int iArgc = 0;
5726 		if ( iFirst>=0 )
5727 			iArgc = ( m_dNodes [ iFirst ].m_iToken==',' ) ? m_dNodes [ iFirst ].m_iArgs : 1;
5728 		if ( iExpectedArgc<0 )
5729 		{
5730 			if ( iArgc<-iExpectedArgc )
5731 			{
5732 				m_sParserError.SetSprintf ( "%s() called with %d args, at least %d args expected", sFuncName, iArgc, -iExpectedArgc );
5733 				return -1;
5734 			}
5735 		} else if ( iArgc!=iExpectedArgc )
5736 		{
5737 			m_sParserError.SetSprintf ( "%s() called with %d args, %d args expected", sFuncName, iArgc, iExpectedArgc );
5738 			return -1;
5739 		}
5740 	}
5741 
5742 	// check arg types
5743 	//
5744 	// check for string args
5745 	// most builtin functions take numeric args only
5746 	bool bGotString = false, bGotMva = false;
5747 	CSphVector<ESphAttr> dRetTypes;
5748 	if ( iSecond<0 )
5749 	{
5750 		GatherArgRetTypes ( iFirst, dRetTypes );
5751 		ARRAY_FOREACH ( i, dRetTypes )
5752 		{
5753 			bGotString |= ( dRetTypes[i]==SPH_ATTR_STRING );
5754 			bGotMva |= ( dRetTypes[i]==SPH_ATTR_UINT32SET || dRetTypes[i]==SPH_ATTR_INT64SET );
5755 		}
5756 	}
5757 	if ( bGotString && !( eFunc==FUNC_CRC32 || eFunc==FUNC_EXIST || eFunc==FUNC_POLY2D || eFunc==FUNC_GEOPOLY2D ) )
5758 	{
5759 		m_sParserError.SetSprintf ( "%s() arguments can not be string", sFuncName );
5760 		return -1;
5761 	}
5762 	if ( bGotMva && !( eFunc==FUNC_IN || eFunc==FUNC_TO_STRING || eFunc==FUNC_LENGTH || eFunc==FUNC_LEAST || eFunc==FUNC_GREATEST ) )
5763 	{
5764 		m_sParserError.SetSprintf ( "%s() arguments can not be MVA", sFuncName );
5765 		return -1;
5766 	}
5767 
5768 	// check that first BITDOT arg is integer or bigint
5769 	if ( eFunc==FUNC_BITDOT )
5770 	{
5771 		int iLeftmost = iFirst;
5772 		while ( m_dNodes [ iLeftmost ].m_iToken==',' )
5773 			iLeftmost = m_dNodes [ iLeftmost ].m_iLeft;
5774 
5775 		ESphAttr eArg = m_dNodes [ iLeftmost ].m_eRetType;
5776 		if ( eArg!=SPH_ATTR_INTEGER && eArg!=SPH_ATTR_BIGINT )
5777 		{
5778 			m_sParserError.SetSprintf ( "first BITDOT() argument must be integer" );
5779 			return -1;
5780 		}
5781 	}
5782 
5783 	if ( eFunc==FUNC_EXIST )
5784 	{
5785 		int iExistLeft = m_dNodes [ iFirst ].m_iLeft;
5786 		int iExistRight = m_dNodes [ iFirst ].m_iRight;
5787 		bool bIsLeftGood = ( m_dNodes [ iExistLeft ].m_eRetType==SPH_ATTR_STRING );
5788 		ESphAttr eRight = m_dNodes [ iExistRight ].m_eRetType;
5789 		bool bIsRightGood = ( eRight==SPH_ATTR_INTEGER || eRight==SPH_ATTR_TIMESTAMP || eRight==SPH_ATTR_BOOL
5790 			|| eRight==SPH_ATTR_FLOAT || eRight==SPH_ATTR_BIGINT );
5791 
5792 		if ( !bIsLeftGood || !bIsRightGood )
5793 		{
5794 			if ( bIsRightGood )
5795 				m_sParserError.SetSprintf ( "first EXIST() argument must be string" );
5796 			else
5797 				m_sParserError.SetSprintf ( "ill-formed EXIST" );
5798 			return -1;
5799 		}
5800 	}
5801 
5802 
5803 	// check that first SINT or timestamp family arg is integer
5804 	if ( eFunc==FUNC_SINT || eFunc==FUNC_DAY || eFunc==FUNC_MONTH || eFunc==FUNC_YEAR || eFunc==FUNC_YEARMONTH || eFunc==FUNC_YEARMONTHDAY
5805 		|| eFunc==FUNC_FIBONACCI )
5806 	{
5807 		assert ( iFirst>=0 );
5808 		if ( m_dNodes [ iFirst ].m_eRetType!=SPH_ATTR_INTEGER )
5809 		{
5810 			m_sParserError.SetSprintf ( "%s() argument must be integer", sFuncName );
5811 			return -1;
5812 		}
5813 	}
5814 
5815 	// check that CONTAINS args are poly, float, float
5816 	if ( eFunc==FUNC_CONTAINS )
5817 	{
5818 		assert ( dRetTypes.GetLength()==3 );
5819 		if ( dRetTypes[0]!=SPH_ATTR_POLY2D )
5820 		{
5821 			m_sParserError.SetSprintf ( "1st CONTAINS() argument must be a 2D polygon (see POLY2D)" );
5822 			return -1;
5823 		}
5824 		if ( !IsNumeric ( dRetTypes[1] ) || !IsNumeric ( dRetTypes[2] ) )
5825 		{
5826 			m_sParserError.SetSprintf ( "2nd and 3rd CONTAINS() arguments must be numeric" );
5827 			return -1;
5828 		}
5829 	}
5830 
5831 	// check POLY2D args
5832 	if ( eFunc==FUNC_POLY2D || eFunc==FUNC_GEOPOLY2D )
5833 	{
5834 		if ( dRetTypes.GetLength()==1 )
5835 		{
5836 			// handle 1 arg version, POLY2D(string-attr)
5837 			if ( dRetTypes[0]!=SPH_ATTR_STRING )
5838 			{
5839 				m_sParserError.SetSprintf ( "%s() argument must be a string attribute", sFuncName );
5840 				return -1;
5841 			}
5842 		} else if ( dRetTypes.GetLength()<6 )
5843 		{
5844 			// handle 2..5 arg versions, invalid
5845 			m_sParserError.SetSprintf ( "bad %s() argument count, must be either 1 (string) or 6+ (x/y pairs list)", sFuncName );
5846 			return -1;
5847 
5848 		} else
5849 		{
5850 			// handle 6+ arg version, POLY2D(xy-list)
5851 			if ( dRetTypes.GetLength() & 1 )
5852 			{
5853 				m_sParserError.SetSprintf ( "bad %s() argument count, must be even", sFuncName );
5854 				return -1;
5855 			}
5856 			ARRAY_FOREACH ( i, dRetTypes )
5857 				if ( !IsNumeric ( dRetTypes[i] ) )
5858 			{
5859 				m_sParserError.SetSprintf ( "%s() argument %d must be numeric", sFuncName, 1+i );
5860 				return -1;
5861 			}
5862 		}
5863 	}
5864 
5865 	// check that BM25F args are float, float [, {file_name=weight}]
5866 	if ( eFunc==FUNC_BM25F )
5867 	{
5868 		if ( dRetTypes.GetLength()>3 )
5869 		{
5870 			m_sParserError.SetSprintf ( "%s() called with %d args, at most 3 args expected", sFuncName, dRetTypes.GetLength() );
5871 			return -1;
5872 		}
5873 
5874 		if ( dRetTypes[0]!=SPH_ATTR_FLOAT || dRetTypes[1]!=SPH_ATTR_FLOAT )
5875 		{
5876 			m_sParserError.SetSprintf ( "%s() arguments 1,2 must be numeric", sFuncName );
5877 			return -1;
5878 		}
5879 
5880 		if ( dRetTypes.GetLength()==3 && dRetTypes[2]!=SPH_ATTR_MAPARG )
5881 		{
5882 			m_sParserError.SetSprintf ( "%s() argument 3 must be map", sFuncName );
5883 			return -1;
5884 		}
5885 	}
5886 
5887 	// check GEODIST args count, and that optional arg 5 is a map argument
5888 	if ( eFunc==FUNC_GEODIST )
5889 	{
5890 		if ( dRetTypes.GetLength()>5 )
5891 		{
5892 			m_sParserError.SetSprintf ( "%s() called with %d args, at most 5 args expected", sFuncName, dRetTypes.GetLength() );
5893 			return -1;
5894 		}
5895 
5896 		if ( dRetTypes.GetLength()==5 && dRetTypes[4]!=SPH_ATTR_MAPARG )
5897 		{
5898 			m_sParserError.SetSprintf ( "%s() argument 5 must be map", sFuncName );
5899 			return -1;
5900 		}
5901 	}
5902 
5903 	// check REMAP(expr, expr, (constlist), (constlist)) args
5904 	if ( eFunc==FUNC_REMAP )
5905 	{
5906 		if ( m_dNodes [ iFirst ].m_iToken==TOK_IDENT )
5907 		{
5908 			m_sParserError.SetSprintf ( "%s() incorrect first argument (not integer?)", sFuncName );
5909 			return 1;
5910 		}
5911 		if ( m_dNodes [ iSecond ].m_iToken==TOK_IDENT )
5912 		{
5913 			m_sParserError.SetSprintf ( "%s() incorrect second argument (not integer/float?)", sFuncName );
5914 			return 1;
5915 		}
5916 
5917 		ESphAttr eFirstRet = m_dNodes [ iFirst ].m_eRetType;
5918 		ESphAttr eSecondRet = m_dNodes [ iSecond ].m_eRetType;
5919 		if ( eFirstRet!=SPH_ATTR_INTEGER && eFirstRet!=SPH_ATTR_BIGINT )
5920 		{
5921 			m_sParserError.SetSprintf ( "%s() first argument should result in integer value", sFuncName );
5922 			return -1;
5923 		}
5924 		if ( eSecondRet!=SPH_ATTR_INTEGER && eSecondRet!=SPH_ATTR_BIGINT && eSecondRet!=SPH_ATTR_FLOAT )
5925 		{
5926 			m_sParserError.SetSprintf ( "%s() second argument should result in integer or float value", sFuncName );
5927 			return -1;
5928 		}
5929 
5930 		ConstList_c & tThirdList = *m_dNodes [ iThird ].m_pConsts;
5931 		ConstList_c & tFourthList = *m_dNodes [ iFourth ].m_pConsts;
5932 		if ( tThirdList.m_dInts.GetLength()==0 )
5933 		{
5934 			m_sParserError.SetSprintf ( "%s() first constlist should consist of integer values", sFuncName );
5935 			return -1;
5936 		}
5937 		if ( tThirdList.m_dInts.GetLength()!=tFourthList.m_dInts.GetLength() &&
5938 			tThirdList.m_dInts.GetLength()!=tFourthList.m_dFloats.GetLength() )
5939 		{
5940 			m_sParserError.SetSprintf ( "%s() both constlists should have the same length", sFuncName );
5941 			return -1;
5942 		}
5943 
5944 		if ( eSecondRet==SPH_ATTR_FLOAT && tFourthList.m_dFloats.GetLength()==0 )
5945 		{
5946 			m_sParserError.SetSprintf ( "%s() second argument results in float value and thus fourth argument should be a list of floats", sFuncName );
5947 			return -1;
5948 		}
5949 		if ( eSecondRet!=SPH_ATTR_FLOAT && tFourthList.m_dInts.GetLength()==0 )
5950 		{
5951 			m_sParserError.SetSprintf ( "%s() second argument results in integer value and thus fourth argument should be a list of integers", sFuncName );
5952 			return -1;
5953 		}
5954 	}
5955 
5956 	// do add
5957 	ExprNode_t & tNode = m_dNodes.Add ();
5958 	tNode.m_iToken = TOK_FUNC;
5959 	tNode.m_iFunc = iFunc;
5960 	tNode.m_iLeft = iFirst;
5961 	tNode.m_iRight = iSecond;
5962 	tNode.m_eArgType = ( iFirst>=0 ) ? m_dNodes [ iFirst ].m_eRetType : SPH_ATTR_INTEGER;
5963 	tNode.m_eRetType = g_dFuncs [ iFunc ].m_eRet;
5964 
5965 	// fixup return type in a few special cases
5966 	if ( eFunc==FUNC_MIN || eFunc==FUNC_MAX || eFunc==FUNC_MADD || eFunc==FUNC_MUL3 || eFunc==FUNC_ABS || eFunc==FUNC_IDIV )
5967 		tNode.m_eRetType = tNode.m_eArgType;
5968 
5969 	if ( eFunc==FUNC_EXIST )
5970 	{
5971 		int iExistRight = m_dNodes [ iFirst ].m_iRight;
5972 		ESphAttr eType = m_dNodes [ iExistRight ].m_eRetType;
5973 		tNode.m_eArgType = eType;
5974 		tNode.m_eRetType = eType;
5975 	}
5976 
5977 	if ( eFunc==FUNC_BIGINT && tNode.m_eRetType==SPH_ATTR_FLOAT )
5978 		tNode.m_eRetType = SPH_ATTR_FLOAT; // enforce if we can; FIXME! silently ignores BIGINT() on floats; should warn or raise an error
5979 
5980 	if ( eFunc==FUNC_IF || eFunc==FUNC_BITDOT )
5981 		tNode.m_eRetType = GetWidestRet ( iFirst, iSecond );
5982 
5983 	// fixup MVA return type according to the leftmost argument
5984 	if ( eFunc==FUNC_GREATEST || eFunc==FUNC_LEAST )
5985 	{
5986 		int iLeftmost = iFirst;
5987 		while ( m_dNodes [ iLeftmost ].m_iToken==',' )
5988 			iLeftmost = m_dNodes [ iLeftmost ].m_iLeft;
5989 		ESphAttr eArg = m_dNodes [ iLeftmost ].m_eRetType;
5990 		if ( eArg==SPH_ATTR_INT64SET )
5991 			tNode.m_eRetType = SPH_ATTR_BIGINT;
5992 		if ( eArg==SPH_ATTR_UINT32SET )
5993 			tNode.m_eRetType = SPH_ATTR_INTEGER;
5994 	}
5995 
5996 	if ( eFunc==FUNC_REMAP )
5997 	{
5998 		// function return type depends on second expression
5999 		tNode.m_eRetType = m_dNodes [ iSecond ].m_eRetType;
6000 	}
6001 
6002 	// all ok
6003 	assert ( tNode.m_eRetType!=SPH_ATTR_NONE );
6004 	return m_dNodes.GetLength()-1;
6005 }
6006 
AddNodeUdf(int iCall,int iArg)6007 int ExprParser_t::AddNodeUdf ( int iCall, int iArg )
6008 {
6009 	UdfCall_t * pCall = m_dUdfCalls[iCall];
6010 	SPH_UDF_INIT & tInit = pCall->m_tInit;
6011 	SPH_UDF_ARGS & tArgs = pCall->m_tArgs;
6012 
6013 	// initialize UDF right here, at AST creation stage
6014 	// just because it's easy to gather arg types here
6015 	if ( iArg>=0 )
6016 	{
6017 		// gather arg types
6018 		CSphVector<DWORD> dArgTypes;
6019 
6020 		int iCur = iArg;
6021 		while ( iCur>=0 )
6022 		{
6023 			if ( m_dNodes[iCur].m_iToken!=',' )
6024 			{
6025 				const ExprNode_t & tNode = m_dNodes[iCur];
6026 				if ( tNode.m_iToken==TOK_FUNC && ( tNode.m_iFunc==FUNC_PACKEDFACTORS || tNode.m_iFunc==FUNC_RANKFACTORS || tNode.m_iFunc==FUNC_FACTORS ) )
6027 					pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
6028 				dArgTypes.Add ( tNode.m_eRetType );
6029 				break;
6030 			}
6031 
6032 			int iRight = m_dNodes[iCur].m_iRight;
6033 			if ( iRight>=0 )
6034 			{
6035 				const ExprNode_t & tNode = m_dNodes[iRight];
6036 				assert ( tNode.m_iToken!=',' );
6037 				if ( tNode.m_iToken==TOK_FUNC && ( tNode.m_iFunc==FUNC_PACKEDFACTORS || tNode.m_iFunc==FUNC_RANKFACTORS || tNode.m_iFunc==FUNC_FACTORS) )
6038 					pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
6039 				dArgTypes.Add ( tNode.m_eRetType );
6040 			}
6041 
6042 			iCur = m_dNodes[iCur].m_iLeft;
6043 		}
6044 
6045 		assert ( dArgTypes.GetLength() );
6046 		tArgs.arg_count = dArgTypes.GetLength();
6047 		tArgs.arg_types = new sphinx_udf_argtype [ tArgs.arg_count ];
6048 
6049 		// we gathered internal type ids in right-to-left order
6050 		// reverse and remap
6051 		// FIXME! eliminate remap, maybe?
6052 		ARRAY_FOREACH ( i, dArgTypes )
6053 		{
6054 			sphinx_udf_argtype & eRes = tArgs.arg_types [ tArgs.arg_count-1-i ];
6055 			switch ( dArgTypes[i] )
6056 			{
6057 				case SPH_ATTR_INTEGER:
6058 				case SPH_ATTR_TIMESTAMP:
6059 				case SPH_ATTR_BOOL:
6060 					eRes = SPH_UDF_TYPE_UINT32;
6061 					break;
6062 				case SPH_ATTR_FLOAT:
6063 					eRes = SPH_UDF_TYPE_FLOAT;
6064 					break;
6065 				case SPH_ATTR_BIGINT:
6066 					eRes = SPH_UDF_TYPE_INT64;
6067 					break;
6068 				case SPH_ATTR_STRING:
6069 					eRes = SPH_UDF_TYPE_STRING;
6070 					break;
6071 				case SPH_ATTR_UINT32SET:
6072 					eRes = SPH_UDF_TYPE_UINT32SET;
6073 					break;
6074 				case SPH_ATTR_INT64SET:
6075 					eRes = SPH_UDF_TYPE_UINT64SET;
6076 					break;
6077 				case SPH_ATTR_FACTORS:
6078 					eRes = SPH_UDF_TYPE_FACTORS;
6079 					break;
6080 				default:
6081 					m_sParserError.SetSprintf ( "internal error: unmapped UDF argument type (arg=%d, type=%d)", i, dArgTypes[i] );
6082 					return -1;
6083 			}
6084 		}
6085 
6086 		ARRAY_FOREACH ( i, pCall->m_dArgs2Free )
6087 			pCall->m_dArgs2Free[i] = tArgs.arg_count - 1 - pCall->m_dArgs2Free[i];
6088 	}
6089 
6090 	// init
6091 	if ( pCall->m_pUdf->m_fnInit )
6092 	{
6093 		char sError [ SPH_UDF_ERROR_LEN ];
6094 		if ( pCall->m_pUdf->m_fnInit ( &tInit, &tArgs, sError ) )
6095 		{
6096 			m_sParserError = sError;
6097 			return -1;
6098 		}
6099 	}
6100 
6101 	// do add
6102 	ExprNode_t & tNode = m_dNodes.Add ();
6103 	tNode.m_iToken = TOK_UDF;
6104 	tNode.m_iFunc = iCall;
6105 	tNode.m_iLeft = iArg;
6106 	tNode.m_iRight = -1;
6107 
6108 	// deduce type
6109 	tNode.m_eArgType = ( iArg>=0 ) ? m_dNodes[iArg].m_eRetType : SPH_ATTR_INTEGER;
6110 	tNode.m_eRetType = pCall->m_pUdf->m_eRetType;
6111 	return m_dNodes.GetLength()-1;
6112 }
6113 
AddNodePF(int iFunc,int iArg)6114 int	ExprParser_t::AddNodePF ( int iFunc, int iArg )
6115 {
6116 	assert ( iFunc>=0 && iFunc< int ( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0]) ) );
6117 	const char * sFuncName = g_dFuncs [ iFunc ].m_sName;
6118 
6119 	CSphVector<ESphAttr> dRetTypes;
6120 	GatherArgRetTypes ( iArg, dRetTypes );
6121 
6122 	assert ( dRetTypes.GetLength()==0 || dRetTypes.GetLength()==1 );
6123 
6124 	if ( dRetTypes.GetLength()==1 && dRetTypes[0]!=SPH_ATTR_MAPARG )
6125 	{
6126 		m_sParserError.SetSprintf ( "%s() argument must be a map", sFuncName );
6127 		return -1;
6128 	}
6129 
6130 	ExprNode_t & tNode = m_dNodes.Add ();
6131 	tNode.m_iToken = TOK_FUNC;
6132 	tNode.m_iFunc = iFunc;
6133 	tNode.m_iLeft = iArg;
6134 	tNode.m_iRight = -1;
6135 	tNode.m_eArgType = SPH_ATTR_MAPARG;
6136 	tNode.m_eRetType = g_dFuncs[iFunc].m_eRet;
6137 
6138 	return m_dNodes.GetLength()-1;
6139 }
6140 
AddNodeConstlist(int64_t iValue)6141 int ExprParser_t::AddNodeConstlist ( int64_t iValue )
6142 {
6143 	ExprNode_t & tNode = m_dNodes.Add();
6144 	tNode.m_iToken = TOK_CONST_LIST;
6145 	tNode.m_pConsts = new ConstList_c();
6146 	tNode.m_pConsts->Add ( iValue );
6147 	tNode.m_pConsts->m_sExpr = m_sExpr;
6148 	return m_dNodes.GetLength()-1;
6149 }
6150 
AddNodeConstlist(float iValue)6151 int ExprParser_t::AddNodeConstlist ( float iValue )
6152 {
6153 	ExprNode_t & tNode = m_dNodes.Add();
6154 	tNode.m_iToken = TOK_CONST_LIST;
6155 	tNode.m_pConsts = new ConstList_c();
6156 	tNode.m_pConsts->Add ( iValue );
6157 	return m_dNodes.GetLength()-1;
6158 }
6159 
AppendToConstlist(int iNode,int64_t iValue)6160 void ExprParser_t::AppendToConstlist ( int iNode, int64_t iValue )
6161 {
6162 	m_dNodes[iNode].m_pConsts->Add ( iValue );
6163 }
6164 
AppendToConstlist(int iNode,float iValue)6165 void ExprParser_t::AppendToConstlist ( int iNode, float iValue )
6166 {
6167 	m_dNodes[iNode].m_pConsts->Add ( iValue );
6168 }
6169 
AddNodeUservar(int iUservar)6170 int ExprParser_t::AddNodeUservar ( int iUservar )
6171 {
6172 	ExprNode_t & tNode = m_dNodes.Add();
6173 	tNode.m_iToken = TOK_USERVAR;
6174 	tNode.m_iConst = iUservar;
6175 	return m_dNodes.GetLength()-1;
6176 }
6177 
AddNodeHookIdent(int iID)6178 int ExprParser_t::AddNodeHookIdent ( int iID )
6179 {
6180 	ExprNode_t & tNode = m_dNodes.Add();
6181 	tNode.m_iToken = TOK_HOOK_IDENT;
6182 	tNode.m_iFunc = iID;
6183 	tNode.m_eRetType = m_pHook->GetIdentType ( iID );
6184 	return m_dNodes.GetLength()-1;
6185 }
6186 
AddNodeHookFunc(int iID,int iLeft)6187 int ExprParser_t::AddNodeHookFunc ( int iID, int iLeft )
6188 {
6189 	CSphVector<ESphAttr> dArgTypes;
6190 	GatherArgRetTypes ( iLeft, dArgTypes );
6191 
6192 	ESphAttr eRet = m_pHook->GetReturnType ( iID, dArgTypes, CheckForConstSet ( iLeft, 0 ), m_sParserError );
6193 	if ( eRet==SPH_ATTR_NONE )
6194 		return -1;
6195 
6196 	ExprNode_t & tNode = m_dNodes.Add();
6197 	tNode.m_iToken = TOK_HOOK_FUNC;
6198 	tNode.m_iFunc = iID;
6199 	tNode.m_iLeft = iLeft;
6200 	tNode.m_iRight = -1;
6201 
6202 	// deduce type
6203 	tNode.m_eArgType = ( iLeft>=0 ) ? m_dNodes[iLeft].m_eRetType : SPH_ATTR_INTEGER;
6204 	tNode.m_eRetType = eRet;
6205 
6206 	return m_dNodes.GetLength()-1;
6207 }
6208 
AddNodeMapArg(const char * sKey,const char * sValue,int64_t iValue)6209 int ExprParser_t::AddNodeMapArg ( const char * sKey, const char * sValue, int64_t iValue )
6210 {
6211 	ExprNode_t & tNode = m_dNodes.Add();
6212 	tNode.m_iToken = TOK_MAP_ARG;
6213 	tNode.m_pMapArg = new MapArg_c();
6214 	tNode.m_pMapArg->Add ( sKey, sValue, iValue );
6215 	tNode.m_eRetType = SPH_ATTR_MAPARG;
6216 	return m_dNodes.GetLength()-1;
6217 }
6218 
AppendToMapArg(int iNode,const char * sKey,const char * sValue,int64_t iValue)6219 void ExprParser_t::AppendToMapArg ( int iNode, const char * sKey, const char * sValue, int64_t iValue )
6220 {
6221 	m_dNodes[iNode].m_pMapArg->Add ( sKey, sValue, iValue );
6222 }
6223 
Attr2Ident(uint64_t uAttrLoc)6224 const char * ExprParser_t::Attr2Ident ( uint64_t uAttrLoc )
6225 {
6226 	ExprNode_t tAttr;
6227 	sphUnpackAttrLocator ( uAttrLoc, &tAttr );
6228 
6229 	CSphString sIdent;
6230 	sIdent = m_pSchema->GetAttr ( tAttr.m_iLocator ).m_sName;
6231 	m_dIdents.Add ( sIdent.Leak() );
6232 	return m_dIdents.Last();
6233 }
6234 
6235 
AddNodeJsonField(uint64_t uAttrLocator,int iLeft)6236 int ExprParser_t::AddNodeJsonField ( uint64_t uAttrLocator, int iLeft )
6237 {
6238 	int iNode = AddNodeAttr ( TOK_ATTR_JSON, uAttrLocator );
6239 	m_dNodes[iNode].m_iLeft = iLeft;
6240 	return m_dNodes.GetLength()-1;
6241 }
6242 
6243 
AddNodeJsonSubkey(int64_t iValue)6244 int ExprParser_t::AddNodeJsonSubkey ( int64_t iValue )
6245 {
6246 	ExprNode_t & tNode = m_dNodes.Add ();
6247 	tNode.m_iToken = TOK_SUBKEY;
6248 	tNode.m_eRetType = SPH_ATTR_STRING;
6249 	tNode.m_iConst = iValue;
6250 	return m_dNodes.GetLength()-1;
6251 }
6252 
6253 
AddNodeDotNumber(int64_t iValue)6254 int ExprParser_t::AddNodeDotNumber ( int64_t iValue )
6255 {
6256 	ExprNode_t & tNode = m_dNodes.Add ();
6257 	tNode.m_iToken = TOK_CONST_FLOAT;
6258 	tNode.m_eRetType = SPH_ATTR_FLOAT;
6259 	const char * pCur = m_sExpr + (int)( iValue>>32 );
6260 	tNode.m_fConst = (float) strtod ( pCur-1, NULL );
6261 	return m_dNodes.GetLength()-1;
6262 }
6263 
6264 
AddNodeIdent(const char * sKey,int iLeft)6265 int ExprParser_t::AddNodeIdent ( const char * sKey, int iLeft )
6266 {
6267 	ExprNode_t & tNode = m_dNodes.Add ();
6268 	tNode.m_sIdent = sKey;
6269 	tNode.m_iLeft = iLeft;
6270 	tNode.m_iToken = TOK_IDENT;
6271 	tNode.m_eRetType = SPH_ATTR_JSON_FIELD;
6272 	return m_dNodes.GetLength()-1;
6273 }
6274 
6275 //////////////////////////////////////////////////////////////////////////
6276 
6277 // performs simple semantic analysis
6278 // checks operand types for some arithmetic operators
6279 struct TypeCheck_fn
6280 {
6281 	CSphString m_sError;
6282 
EnterTypeCheck_fn6283 	void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & dNodes )
6284 	{
6285 		if ( !m_sError.IsEmpty() )
6286 			return;
6287 
6288 		bool bNumberOp = tNode.m_iToken=='+' || tNode.m_iToken=='-' || tNode.m_iToken=='*' || tNode.m_iToken=='/';
6289 		if ( bNumberOp )
6290 		{
6291 			bool bLeftNumeric =	tNode.m_iLeft==-1 ? false : IsNumericNode ( dNodes[tNode.m_iLeft] );
6292 			bool bRightNumeric = tNode.m_iRight==-1 ? false : IsNumericNode ( dNodes[tNode.m_iRight] );
6293 
6294 			// if json vs numeric then let it pass (for the autoconversion)
6295 			if ( ( bLeftNumeric && dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_JSON_FIELD )
6296 				|| ( bRightNumeric && dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD ) )
6297 					return;
6298 
6299 			if ( !bLeftNumeric || !bRightNumeric )
6300 			{
6301 				m_sError = "numeric operation applied to non-numeric operands";
6302 				return;
6303 			}
6304 		}
6305 
6306 		if ( tNode.m_iToken==TOK_EQ )
6307 		{
6308 			// string equal must work with string columns only
6309 			ESphAttr eLeftRet = tNode.m_iLeft==-1 ? SPH_ATTR_NONE : dNodes[tNode.m_iLeft].m_eRetType;
6310 			ESphAttr eRightRet = tNode.m_iRight==-1 ? SPH_ATTR_NONE : dNodes[tNode.m_iRight].m_eRetType;
6311 			bool bLeftStr = ( eLeftRet==SPH_ATTR_STRING || eLeftRet==SPH_ATTR_STRINGPTR || eLeftRet==SPH_ATTR_JSON_FIELD );
6312 			bool bRightStr = ( eRightRet==SPH_ATTR_STRING || eRightRet==SPH_ATTR_STRINGPTR || eRightRet==SPH_ATTR_JSON_FIELD );
6313 			if ( bLeftStr!=bRightStr && eLeftRet!=SPH_ATTR_JSON_FIELD && eRightRet!=SPH_ATTR_JSON_FIELD )
6314 			{
6315 				m_sError = "equal operation applied to part string operands";
6316 				return;
6317 			}
6318 		}
6319 	}
6320 
ExitTypeCheck_fn6321 	void Exit ( const ExprNode_t & )
6322 	{}
6323 
IsNumericNodeTypeCheck_fn6324 	bool IsNumericNode ( const ExprNode_t & tNode )
6325 	{
6326 		return tNode.m_eRetType==SPH_ATTR_INTEGER || tNode.m_eRetType==SPH_ATTR_BOOL || tNode.m_eRetType==SPH_ATTR_FLOAT ||
6327 			tNode.m_eRetType==SPH_ATTR_BIGINT || tNode.m_eRetType==SPH_ATTR_TOKENCOUNT || tNode.m_eRetType==SPH_ATTR_TIMESTAMP;
6328 	}
6329 };
6330 
6331 
6332 // checks whether we have a WEIGHT() in expression
6333 struct WeightCheck_fn
6334 {
6335 	bool * m_pRes;
6336 
WeightCheck_fnWeightCheck_fn6337 	explicit WeightCheck_fn ( bool * pRes )
6338 		: m_pRes ( pRes )
6339 	{
6340 		assert ( m_pRes );
6341 		*m_pRes = false;
6342 	}
6343 
EnterWeightCheck_fn6344 	void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & )
6345 	{
6346 		if ( tNode.m_iToken==TOK_WEIGHT )
6347 			*m_pRes = true;
6348 	}
6349 
ExitWeightCheck_fn6350 	void Exit ( const ExprNode_t & )
6351 	{}
6352 };
6353 
6354 // checks whether expression has functions defined not in this file like
6355 // searchd-level function or ranker-level functions
6356 struct HookCheck_fn
6357 {
6358 	ISphExprHook * m_pHook;
6359 
HookCheck_fnHookCheck_fn6360 	explicit HookCheck_fn ( ISphExprHook * pHook )
6361 		: m_pHook ( pHook )
6362 	{}
6363 
EnterHookCheck_fn6364 	void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & )
6365 	{
6366 		if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
6367 			m_pHook->CheckEnter ( tNode.m_iFunc );
6368 	}
6369 
ExitHookCheck_fn6370 	void Exit ( const ExprNode_t & tNode )
6371 	{
6372 		if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
6373 			m_pHook->CheckExit ( tNode.m_iFunc );
6374 	}
6375 };
6376 
6377 
Parse(const char * sExpr,const ISphSchema & tSchema,ESphAttr * pAttrType,bool * pUsesWeight,CSphString & sError)6378 ISphExpr * ExprParser_t::Parse ( const char * sExpr, const ISphSchema & tSchema,
6379 	ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError )
6380 {
6381 	m_sLexerError = "";
6382 	m_sParserError = "";
6383 	m_sCreateError = "";
6384 
6385 	// setup lexer
6386 	m_sExpr = sExpr;
6387 	m_pCur = sExpr;
6388 	m_pSchema = &tSchema;
6389 
6390 	// setup constant functions
6391 	m_iConstNow = (int) time ( NULL );
6392 
6393 	// build abstract syntax tree
6394 	m_iParsed = -1;
6395 	yyparse ( this );
6396 
6397 	// handle errors
6398 	if ( m_iParsed<0 || !m_sLexerError.IsEmpty() || !m_sParserError.IsEmpty() )
6399 	{
6400 		sError = !m_sLexerError.IsEmpty() ? m_sLexerError : m_sParserError;
6401 		if ( sError.IsEmpty() ) sError = "general parsing error";
6402 		return NULL;
6403 	}
6404 
6405 	// deduce return type
6406 	ESphAttr eAttrType = m_dNodes[m_iParsed].m_eRetType;
6407 
6408 	// Check expression stack to fit for mutual recursive function calls.
6409 	// This check is an approximation, because different compilers with
6410 	// different settings produce code which requires different stack size.
6411 	if ( m_dNodes.GetLength()>100 )
6412 	{
6413 		CSphVector<int> dNodes;
6414 		dNodes.Reserve ( m_dNodes.GetLength()/2 );
6415 		int iMaxHeight = 1;
6416 		int iHeight = 1;
6417 		dNodes.Add ( m_iParsed );
6418 		while ( dNodes.GetLength() )
6419 		{
6420 			const ExprNode_t & tExpr = m_dNodes[dNodes.Pop()];
6421 			iHeight += ( tExpr.m_iLeft>=0 || tExpr.m_iRight>=0 ? 1 : -1 );
6422 			iMaxHeight = Max ( iMaxHeight, iHeight );
6423 			if ( tExpr.m_iRight>=0 )
6424 				dNodes.Add ( tExpr.m_iRight );
6425 			if ( tExpr.m_iLeft>=0 )
6426 				dNodes.Add ( tExpr.m_iLeft );
6427 		}
6428 
6429 #define SPH_EXPRNODE_STACK_SIZE 160
6430 		int64_t iExprStack = sphGetStackUsed() + iMaxHeight*SPH_EXPRNODE_STACK_SIZE;
6431 		if ( g_iThreadStackSize<=iExprStack )
6432 		{
6433 			sError.SetSprintf ( "query too complex, not enough stack (thread_stack=%dK or higher required)",
6434 				(int)( ( iExprStack + 1024 - ( iExprStack%1024 ) ) / 1024 ) );
6435 			return NULL;
6436 		}
6437 	}
6438 
6439 	// perform optimizations (tree transformations)
6440 	Optimize ( m_iParsed );
6441 #if 0
6442 	Dump ( m_iParsed );
6443 	fflush ( stdout );
6444 #endif
6445 
6446 	// simple semantic analysis
6447 	TypeCheck_fn tTypeChecker;
6448 	WalkTree ( m_iParsed, tTypeChecker );
6449 	if ( !tTypeChecker.m_sError.IsEmpty() )
6450 	{
6451 		sError.Swap ( tTypeChecker.m_sError );
6452 		return NULL;
6453 	}
6454 
6455 	// create evaluator
6456 	ISphExpr * pRes = CreateTree ( m_iParsed );
6457 	if ( !m_sCreateError.IsEmpty() )
6458 	{
6459 		sError = m_sCreateError;
6460 		SafeRelease ( pRes );
6461 	} else if ( !pRes )
6462 	{
6463 		sError.SetSprintf ( "empty expression" );
6464 	}
6465 
6466 	if ( pAttrType )
6467 		*pAttrType = eAttrType;
6468 
6469 	if ( pUsesWeight )
6470 	{
6471 		WeightCheck_fn tWeightFunctor ( pUsesWeight );
6472 		WalkTree ( m_iParsed, tWeightFunctor );
6473 	}
6474 
6475 	if ( m_pHook )
6476 	{
6477 		HookCheck_fn tHookFunctor ( m_pHook );
6478 		WalkTree ( m_iParsed, tHookFunctor );
6479 	}
6480 
6481 	return pRes;
6482 }
6483 
6484 //////////////////////////////////////////////////////////////////////////
6485 // PUBLIC STUFF
6486 //////////////////////////////////////////////////////////////////////////
6487 
6488 /// parser entry point
sphExprParse(const char * sExpr,const ISphSchema & tSchema,ESphAttr * pAttrType,bool * pUsesWeight,CSphString & sError,CSphQueryProfile * pProfiler,ESphCollation eCollation,ISphExprHook * pHook,bool * pZonespanlist,DWORD * pPackedFactorsFlags,ESphEvalStage * pEvalStage)6489 ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight,
6490 	CSphString & sError, CSphQueryProfile * pProfiler, ESphCollation eCollation, ISphExprHook * pHook, bool * pZonespanlist, DWORD * pPackedFactorsFlags, ESphEvalStage * pEvalStage )
6491 {
6492 	// parse into opcodes
6493 	ExprParser_t tParser ( pHook, pProfiler, eCollation );
6494 	ISphExpr * pRes = tParser.Parse ( sExpr, tSchema, pAttrType, pUsesWeight, sError );
6495 	if ( pZonespanlist )
6496 		*pZonespanlist = tParser.m_bHasZonespanlist;
6497 	if ( pEvalStage )
6498 		*pEvalStage = tParser.m_eEvalStage;
6499 	if ( pPackedFactorsFlags )
6500 		*pPackedFactorsFlags = tParser.m_uPackedFactorFlags;
6501 	return pRes;
6502 }
6503 
6504 //
6505 // $Id$
6506 //
6507