1 //
2 // $Id: sphinxexpr.cpp 4113 2013-08-26 07:43:28Z deogar $
3 //
4 
5 //
6 // Copyright (c) 2001-2013, Andrew Aksyonoff
7 // Copyright (c) 2008-2013, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 #include "sphinx.h"
17 #include "sphinxexpr.h"
18 #include "sphinxudf.h"
19 #include "sphinxutils.h"
20 #include "sphinxint.h"
21 #include <time.h>
22 #include <math.h>
23 
24 #if !USE_WINDOWS
25 #include <unistd.h>
26 #include <sys/time.h>
27 #ifdef HAVE_DLOPEN
28 #include <dlfcn.h>
29 #endif // HAVE_DLOPEN
30 #endif // !USE_WINDOWS
31 
32 //////////////////////////////////////////////////////////////////////////
33 
34 #ifndef M_LOG2E
35 #define M_LOG2E		1.44269504088896340736
36 #endif
37 
38 #ifndef M_LOG10E
39 #define M_LOG10E	0.434294481903251827651
40 #endif
41 
42 #if !USE_WINDOWS
43 #ifndef HAVE_DLERROR
44 #define dlerror() ""
45 #endif // HAVE_DLERROR
46 #endif // !USE_WINDOWS
47 
48 
49 typedef int ( *UdfInit_fn ) ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error );
50 typedef void ( *UdfDeinit_fn ) ( SPH_UDF_INIT * init );
51 
52 
53 /// loaded UDF library
54 struct UdfLib_t
55 {
56 	void *				m_pHandle;	///< handle from dlopen()
57 	int					m_iFuncs;	///< number of registered functions from this library
58 };
59 
60 
61 /// registered UDF function
62 struct UdfFunc_t
63 {
64 	UdfLib_t *			m_pLib;			///< library descriptor (pointer to library hash value)
65 	const CSphString *	m_pLibName;		///< library name (pointer to library hash key)
66 	ESphAttr			m_eRetType;		///< function type, currently FLOAT or INT
67 	UdfInit_fn			m_fnInit;		///< per-query init function, mandatory
68 	UdfDeinit_fn		m_fnDeinit;		///< per-query deinit function, optional
69 	void *				m_fnFunc;		///< per-row worker function, mandatory
70 	int					m_iUserCount;	///< number of active users currently working this function
71 	bool				m_bToDrop;		///< scheduled for DROP; do not use
72 };
73 
74 
75 /// UDF call site
76 struct UdfCall_t
77 {
78 	UdfFunc_t *			m_pUdf;
79 	SPH_UDF_INIT		m_tInit;
80 	SPH_UDF_ARGS		m_tArgs;
81 
82 	UdfCall_t();
83 	~UdfCall_t();
84 };
85 
86 //////////////////////////////////////////////////////////////////////////
87 // GLOBALS
88 //////////////////////////////////////////////////////////////////////////
89 
90 // hack hack hack
91 UservarIntSet_c * ( *g_pUservarsHook )( const CSphString & sUservar );
92 
93 static bool								g_bUdfEnabled = false;
94 static CSphString						g_sUdfDir;
95 static CSphStaticMutex					g_tUdfMutex;
96 static SmallStringHash_T<UdfLib_t>		g_hUdfLibs;
97 static SmallStringHash_T<UdfFunc_t>		g_hUdfFuncs;
98 
99 //////////////////////////////////////////////////////////////////////////
100 // UDF CALL SITE
101 //////////////////////////////////////////////////////////////////////////
102 
UdfCall_t()103 UdfCall_t::UdfCall_t ()
104 {
105 	m_pUdf = NULL;
106 	m_tInit.func_data = NULL;
107 	m_tInit.is_const = false;
108 	m_tArgs.arg_count = 0;
109 	m_tArgs.arg_types = NULL;
110 	m_tArgs.arg_values = NULL;
111 	m_tArgs.arg_names = NULL;
112 	m_tArgs.str_lengths = NULL;
113 }
114 
~UdfCall_t()115 UdfCall_t::~UdfCall_t ()
116 {
117 	if ( m_pUdf )
118 	{
119 		g_tUdfMutex.Lock ();
120 		m_pUdf->m_iUserCount--;
121 		g_tUdfMutex.Unlock ();
122 	}
123 	SafeDeleteArray ( m_tArgs.arg_types );
124 	SafeDeleteArray ( m_tArgs.arg_values );
125 	SafeDeleteArray ( m_tArgs.arg_names );
126 	SafeDeleteArray ( m_tArgs.str_lengths );
127 }
128 
129 //////////////////////////////////////////////////////////////////////////
130 // EVALUATION ENGINE
131 //////////////////////////////////////////////////////////////////////////
132 
133 struct ExprLocatorTraits_t : public ISphExpr
134 {
135 	CSphAttrLocator m_tLocator;
136 	int m_iLocator;
137 
ExprLocatorTraits_tExprLocatorTraits_t138 	ExprLocatorTraits_t ( const CSphAttrLocator & tLocator, int iLocator ) : m_tLocator ( tLocator ), m_iLocator ( iLocator ) {}
GetDependencyColumnsExprLocatorTraits_t139 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
140 	{
141 		dColumns.Add ( m_iLocator );
142 	}
143 };
144 
145 
146 struct Expr_GetInt_c : public ExprLocatorTraits_t
147 {
Expr_GetInt_cExpr_GetInt_c148 	Expr_GetInt_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetInt_c149 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); } // FIXME! OPTIMIZE!!! we can go the short route here
IntEvalExpr_GetInt_c150 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetInt_c151 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
152 };
153 
154 
155 struct Expr_GetBits_c : public ExprLocatorTraits_t
156 {
Expr_GetBits_cExpr_GetBits_c157 	Expr_GetBits_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetBits_c158 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); }
IntEvalExpr_GetBits_c159 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetBits_c160 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
161 };
162 
163 
164 struct Expr_GetSint_c : public ExprLocatorTraits_t
165 {
Expr_GetSint_cExpr_GetSint_c166 	Expr_GetSint_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetSint_c167 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)(int)tMatch.GetAttr ( m_tLocator ); }
IntEvalExpr_GetSint_c168 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetSint_c169 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
170 };
171 
172 
173 struct Expr_GetFloat_c : public ExprLocatorTraits_t
174 {
Expr_GetFloat_cExpr_GetFloat_c175 	Expr_GetFloat_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetFloat_c176 	virtual float Eval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrFloat ( m_tLocator ); }
177 };
178 
179 
180 struct Expr_GetString_c : public ExprLocatorTraits_t
181 {
182 	const BYTE * m_pStrings;
183 
Expr_GetString_cExpr_GetString_c184 	Expr_GetString_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetString_c185 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
SetStringPoolExpr_GetString_c186 	virtual void SetStringPool ( const BYTE * pStrings ) { m_pStrings = pStrings; }
187 
StringEvalExpr_GetString_c188 	virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
189 	{
190 		SphAttr_t iOff = tMatch.GetAttr ( m_tLocator );
191 		if ( iOff>0 )
192 			return sphUnpackStr ( m_pStrings + iOff, ppStr );
193 
194 		*ppStr = NULL;
195 		return 0;
196 	}
197 };
198 
199 
200 struct Expr_GetMva_c : public ExprLocatorTraits_t
201 {
202 	const DWORD * m_pMva;
203 
Expr_GetMva_cExpr_GetMva_c204 	Expr_GetMva_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetMva_c205 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
SetMVAPoolExpr_GetMva_c206 	virtual void SetMVAPool ( const DWORD * pMva ) { m_pMva = pMva; }
MvaEvalExpr_GetMva_c207 	virtual const DWORD * MvaEval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrMVA ( m_tLocator, m_pMva ); }
208 };
209 
210 
211 struct Expr_GetConst_c : public ISphExpr
212 {
213 	float m_fValue;
Expr_GetConst_cExpr_GetConst_c214 	explicit Expr_GetConst_c ( float fValue ) : m_fValue ( fValue ) {}
EvalExpr_GetConst_c215 	virtual float Eval ( const CSphMatch & ) const { return m_fValue; }
IntEvalExpr_GetConst_c216 	virtual int IntEval ( const CSphMatch & ) const { return (int)m_fValue; }
Int64EvalExpr_GetConst_c217 	virtual int64_t Int64Eval ( const CSphMatch & ) const { return (int64_t)m_fValue; }
218 };
219 
220 
221 struct Expr_GetIntConst_c : public ISphExpr
222 {
223 	int m_iValue;
Expr_GetIntConst_cExpr_GetIntConst_c224 	explicit Expr_GetIntConst_c ( int iValue ) : m_iValue ( iValue ) {}
EvalExpr_GetIntConst_c225 	virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
IntEvalExpr_GetIntConst_c226 	virtual int IntEval ( const CSphMatch & ) const { return m_iValue; }
Int64EvalExpr_GetIntConst_c227 	virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
228 };
229 
230 
231 struct Expr_GetInt64Const_c : public ISphExpr
232 {
233 	int64_t m_iValue;
Expr_GetInt64Const_cExpr_GetInt64Const_c234 	explicit Expr_GetInt64Const_c ( int64_t iValue ) : m_iValue ( iValue ) {}
EvalExpr_GetInt64Const_c235 	virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
IntEvalExpr_GetInt64Const_c236 	virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return (int)m_iValue; }
Int64EvalExpr_GetInt64Const_c237 	virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
238 };
239 
240 
241 struct Expr_GetStrConst_c : public ISphExpr
242 {
243 	CSphString m_sVal;
244 	int m_iLen;
245 
Expr_GetStrConst_cExpr_GetStrConst_c246 	explicit Expr_GetStrConst_c ( const char * sVal, int iLen )
247 	{
248 		if ( iLen>0 )
249 			SqlUnescape ( m_sVal, sVal, iLen );
250 		m_iLen = m_sVal.Length();
251 	}
252 
StringEvalExpr_GetStrConst_c253 	virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
254 	{
255 		*ppStr = (const BYTE*) m_sVal.cstr();
256 		return m_iLen;
257 	}
258 
EvalExpr_GetStrConst_c259 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
IntEvalExpr_GetStrConst_c260 	virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
Int64EvalExpr_GetStrConst_c261 	virtual int64_t Int64Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
262 };
263 
264 
265 struct Expr_GetId_c : public ISphExpr
266 {
EvalExpr_GetId_c267 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_iDocID; }
IntEvalExpr_GetId_c268 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_iDocID; }
Int64EvalExpr_GetId_c269 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_iDocID; }
270 };
271 
272 
273 struct Expr_GetWeight_c : public ISphExpr
274 {
EvalExpr_GetWeight_c275 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_iWeight; }
IntEvalExpr_GetWeight_c276 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_iWeight; }
Int64EvalExpr_GetWeight_c277 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_iWeight; }
278 };
279 
280 //////////////////////////////////////////////////////////////////////////
281 
282 struct Expr_Arglist_c : public ISphExpr
283 {
284 	CSphVector<ISphExpr *> m_dArgs;
285 
Expr_Arglist_cExpr_Arglist_c286 	Expr_Arglist_c ( ISphExpr * pLeft, ISphExpr * pRight )
287 	{
288 		AddArgs ( pLeft );
289 		AddArgs ( pRight );
290 	}
291 
~Expr_Arglist_cExpr_Arglist_c292 	~Expr_Arglist_c ()
293 	{
294 		ARRAY_FOREACH ( i, m_dArgs )
295 			SafeRelease ( m_dArgs[i] );
296 	}
297 
AddArgsExpr_Arglist_c298 	void AddArgs ( ISphExpr * pExpr )
299 	{
300 		// not an arglist? just add it
301 		if ( !pExpr->IsArglist() )
302 		{
303 			m_dArgs.Add ( pExpr );
304 			return;
305 		}
306 
307 		// arglist? take ownership of its args, and dismiss it
308 		Expr_Arglist_c * pArgs = (Expr_Arglist_c *) pExpr;
309 		ARRAY_FOREACH ( i, pArgs->m_dArgs )
310 		{
311 			m_dArgs.Add ( pArgs->m_dArgs[i] );
312 			pArgs->m_dArgs[i] = NULL;
313 		}
314 		SafeRelease ( pExpr );
315 	}
316 
IsArglistExpr_Arglist_c317 	virtual bool IsArglist () const
318 	{
319 		return true;
320 	}
321 
EvalExpr_Arglist_c322 	virtual float Eval ( const CSphMatch & ) const
323 	{
324 		assert ( 0 && "internal error: Eval() must not be explicitly called on arglist" );
325 		return 0.0f;
326 	}
327 
GetDependencyColumnsExpr_Arglist_c328 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
329 	{
330 		ARRAY_FOREACH ( i, m_dArgs )
331 			m_dArgs[i]->GetDependencyColumns ( dColumns );
332 	}
333 };
334 
335 //////////////////////////////////////////////////////////////////////////
336 
337 struct Expr_Unary_c : public ISphExpr
338 {
339 	ISphExpr * m_pFirst;
340 
~Expr_Unary_cExpr_Unary_c341 	~Expr_Unary_c() { SafeRelease ( m_pFirst ); }
342 
SetMVAPoolExpr_Unary_c343 	virtual void SetMVAPool ( const DWORD * pMvaPool ) { m_pFirst->SetMVAPool ( pMvaPool ); }
SetStringPoolExpr_Unary_c344 	virtual void SetStringPool ( const BYTE * pStrings ) { m_pFirst->SetStringPool ( pStrings ); }
GetDependencyColumnsExpr_Unary_c345 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const { m_pFirst->GetDependencyColumns ( dColumns ); }
346 };
347 
348 struct Expr_Crc32_c : public Expr_Unary_c
349 {
Expr_Crc32_cExpr_Crc32_c350 	explicit Expr_Crc32_c ( ISphExpr * pFirst ) { m_pFirst = pFirst; }
EvalExpr_Crc32_c351 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
IntEvalExpr_Crc32_c352 	virtual int IntEval ( const CSphMatch & tMatch ) const
353 	{
354 		const BYTE * pStr;
355 		int iLen = m_pFirst->StringEval ( tMatch, &pStr );
356 		return sphCRC32 ( pStr, iLen );
357 	}
Int64EvalExpr_Crc32_c358 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
359 };
360 
Fibonacci(int i)361 static inline int Fibonacci ( int i )
362 {
363 	if ( i<0 )
364 		return 0;
365 	int f0 = 0;
366 	int f1 = 1;
367 	int j = 0;
368 	for ( j=0; j+1<i; j+=2 )
369 	{
370 		f0 += f1; // f_j
371 		f1 += f0; // f_{j+1}
372 	}
373 	return ( i & 1 ) ? f1 : f0;
374 }
375 
376 struct Expr_Fibonacci_c : public Expr_Unary_c
377 {
Expr_Fibonacci_cExpr_Fibonacci_c378 	explicit Expr_Fibonacci_c ( ISphExpr * pFirst ) { m_pFirst = pFirst; }
379 
EvalExpr_Fibonacci_c380 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
IntEvalExpr_Fibonacci_c381 	virtual int IntEval ( const CSphMatch & tMatch ) const { return Fibonacci ( m_pFirst->IntEval ( tMatch ) ); }
Int64EvalExpr_Fibonacci_c382 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
383 };
384 
385 //////////////////////////////////////////////////////////////////////////
386 
387 #define FIRST	m_pFirst->Eval(tMatch)
388 #define SECOND	m_pSecond->Eval(tMatch)
389 #define THIRD	m_pThird->Eval(tMatch)
390 
391 #define INTFIRST	m_pFirst->IntEval(tMatch)
392 #define INTSECOND	m_pSecond->IntEval(tMatch)
393 #define INTTHIRD	m_pThird->IntEval(tMatch)
394 
395 #define INT64FIRST	m_pFirst->Int64Eval(tMatch)
396 #define INT64SECOND	m_pSecond->Int64Eval(tMatch)
397 #define INT64THIRD	m_pThird->Int64Eval(tMatch)
398 
399 #define DECLARE_UNARY_TRAITS(_classname,_expr) \
400 	struct _classname : public ISphExpr \
401 	{ \
402 		ISphExpr * m_pFirst; \
403 		explicit _classname ( ISphExpr * pFirst ) : m_pFirst ( pFirst ) {} \
404 		~_classname () { SafeRelease ( m_pFirst ); } \
405 		virtual void SetMVAPool ( const DWORD * pMvaPool ) { m_pFirst->SetMVAPool ( pMvaPool ); } \
406 		virtual void SetStringPool ( const BYTE * pStrings ) { m_pFirst->SetStringPool ( pStrings ); } \
407 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
408 		virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const { m_pFirst->GetDependencyColumns ( dColumns ); } \
409 
410 #define DECLARE_UNARY_FLT(_classname,_expr) \
411 		DECLARE_UNARY_TRAITS ( _classname, _expr ) \
412 	};
413 
414 #define DECLARE_UNARY_INT(_classname,_expr,_expr2,_expr3) \
415 		DECLARE_UNARY_TRAITS ( _classname, _expr ) \
416 		virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
417 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
418 	};
419 
420 #define IABS(_arg) ( (_arg)>0 ? (_arg) : (-_arg) )
421 
422 DECLARE_UNARY_INT ( Expr_Neg_c,		-FIRST,			-INTFIRST,		-INT64FIRST )
423 DECLARE_UNARY_INT ( Expr_Abs_c,		fabs(FIRST),	IABS(INTFIRST),	IABS(INT64FIRST) )
424 DECLARE_UNARY_FLT ( Expr_Ceil_c,	float(ceil(FIRST)) )
425 DECLARE_UNARY_FLT ( Expr_Floor_c,	float(floor(FIRST)) )
426 DECLARE_UNARY_FLT ( Expr_Sin_c,		float(sin(FIRST)) )
427 DECLARE_UNARY_FLT ( Expr_Cos_c,		float(cos(FIRST)) )
428 DECLARE_UNARY_FLT ( Expr_Ln_c,		float(log(FIRST)) )
429 DECLARE_UNARY_FLT ( Expr_Log2_c,	float(log(FIRST)*M_LOG2E) )
430 DECLARE_UNARY_FLT ( Expr_Log10_c,	float(log(FIRST)*M_LOG10E) )
431 DECLARE_UNARY_FLT ( Expr_Exp_c,		float(exp(FIRST)) )
432 DECLARE_UNARY_FLT ( Expr_Sqrt_c,	float(sqrt(FIRST)) )
433 
434 DECLARE_UNARY_INT ( Expr_NotInt_c,		(float)(INTFIRST?0:1),		INTFIRST?0:1,	INTFIRST?0:1 );
435 DECLARE_UNARY_INT ( Expr_NotInt64_c,	(float)(INT64FIRST?0:1),	INT64FIRST?0:1,	INT64FIRST?0:1 );
436 DECLARE_UNARY_INT ( Expr_Sint_c,		(float)(INTFIRST),			INTFIRST,		INTFIRST )
437 
438 //////////////////////////////////////////////////////////////////////////
439 
440 #define DECLARE_BINARY_TRAITS(_classname) \
441 	struct _classname : public ISphExpr \
442 	{ \
443 		ISphExpr * m_pFirst; \
444 		ISphExpr * m_pSecond; \
445 		_classname ( ISphExpr * pFirst, ISphExpr * pSecond ) : m_pFirst ( pFirst ), m_pSecond ( pSecond ) {} \
446 		~_classname () { SafeRelease ( m_pFirst ); SafeRelease ( m_pSecond ); } \
447 		virtual void SetMVAPool ( const DWORD * pMvaPool ) { m_pFirst->SetMVAPool ( pMvaPool ); m_pSecond->SetMVAPool ( pMvaPool ); } \
448 		virtual void SetStringPool ( const BYTE * pStrings ) { m_pFirst->SetStringPool ( pStrings ); m_pSecond->SetStringPool ( pStrings ); } \
449 		virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const \
450 		{ \
451 			m_pFirst->GetDependencyColumns ( dColumns ); \
452 			m_pSecond->GetDependencyColumns ( dColumns ); \
453 		} \
454 
455 #define DECLARE_END() };
456 
457 #define DECLARE_BINARY_FLT(_classname,_expr) \
458 		DECLARE_BINARY_TRAITS ( _classname ) \
459 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
460 	};
461 
462 #define DECLARE_BINARY_INT(_classname,_expr,_expr2,_expr3) \
463 		DECLARE_BINARY_TRAITS ( _classname ) \
464 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
465 		virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
466 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
467 	};
468 
469 #define DECLARE_BINARY_POLY(_classname,_expr,_expr2,_expr3) \
470 	DECLARE_BINARY_INT ( _classname##Float_c,	_expr,						(int)Eval(tMatch),		(int64_t)Eval(tMatch ) ) \
471 	DECLARE_BINARY_INT ( _classname##Int_c,		(float)IntEval(tMatch),		_expr2,					(int64_t)IntEval(tMatch) ) \
472 	DECLARE_BINARY_INT ( _classname##Int64_c,	(float)Int64Eval(tMatch),	(int)Int64Eval(tMatch),	_expr3 )
473 
474 #define IFFLT(_expr)	( (_expr) ? 1.0f : 0.0f )
475 #define IFINT(_expr)	( (_expr) ? 1 : 0 )
476 
477 DECLARE_BINARY_INT ( Expr_Add_c,	FIRST + SECOND,						INTFIRST + INTSECOND,				INT64FIRST + INT64SECOND )
478 DECLARE_BINARY_INT ( Expr_Sub_c,	FIRST - SECOND,						INTFIRST - INTSECOND,				INT64FIRST - INT64SECOND )
DECLARE_BINARY_INT(Expr_Mul_c,FIRST * SECOND,INTFIRST * INTSECOND,INT64FIRST * INT64SECOND)479 DECLARE_BINARY_INT ( Expr_Mul_c,	FIRST * SECOND,						INTFIRST * INTSECOND,				INT64FIRST * INT64SECOND )
480 DECLARE_BINARY_FLT ( Expr_Div_c,	FIRST / SECOND )
481 DECLARE_BINARY_INT ( Expr_BitAnd_c,	(float)(int(FIRST)&int(SECOND)),	INTFIRST & INTSECOND,				INT64FIRST & INT64SECOND )
482 DECLARE_BINARY_INT ( Expr_BitOr_c,	(float)(int(FIRST)|int(SECOND)),	INTFIRST | INTSECOND,				INT64FIRST | INT64SECOND )
483 DECLARE_BINARY_INT ( Expr_Mod_c,	(float)(int(FIRST)%int(SECOND)),	INTFIRST % INTSECOND,				INT64FIRST % INT64SECOND )
484 
485 DECLARE_BINARY_TRAITS ( Expr_Idiv_c )
486 	virtual float Eval ( const CSphMatch & tMatch ) const
487 	{
488 		int iSecond = int(SECOND);
489 		return iSecond ? float(int(FIRST)/iSecond) : 0.0f;
490 	}
491 
IntEval(const CSphMatch & tMatch) const492 	virtual int IntEval ( const CSphMatch & tMatch ) const
493 	{
494 		int iSecond = INTSECOND;
495 		return iSecond ? ( INTFIRST / iSecond ) : 0;
496 	}
497 
Int64Eval(const CSphMatch & tMatch) const498 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
499 	{
500 		int64_t iSecond = INT64SECOND;
501 		return iSecond ? ( INT64FIRST / iSecond ) : 0;
502 	}
503 DECLARE_END()
504 
505 DECLARE_BINARY_POLY ( Expr_Lt,		IFFLT ( FIRST<SECOND ),					IFINT ( INTFIRST<INTSECOND ),		IFINT ( INT64FIRST<INT64SECOND ) )
506 DECLARE_BINARY_POLY ( Expr_Gt,		IFFLT ( FIRST>SECOND ),					IFINT ( INTFIRST>INTSECOND ),		IFINT ( INT64FIRST>INT64SECOND ) )
507 DECLARE_BINARY_POLY ( Expr_Lte,		IFFLT ( FIRST<=SECOND ),				IFINT ( INTFIRST<=INTSECOND ),		IFINT ( INT64FIRST<=INT64SECOND ) )
508 DECLARE_BINARY_POLY ( Expr_Gte,		IFFLT ( FIRST>=SECOND ),				IFINT ( INTFIRST>=INTSECOND ),		IFINT ( INT64FIRST>=INT64SECOND ) )
509 DECLARE_BINARY_POLY ( Expr_Eq,		IFFLT ( fabs ( FIRST-SECOND )<=1e-6 ),	IFINT ( INTFIRST==INTSECOND ),		IFINT ( INT64FIRST==INT64SECOND ) )
510 DECLARE_BINARY_POLY ( Expr_Ne,		IFFLT ( fabs ( FIRST-SECOND )>1e-6 ),	IFINT ( INTFIRST!=INTSECOND ),		IFINT ( INT64FIRST!=INT64SECOND ) )
511 
512 DECLARE_BINARY_INT ( Expr_Min_c,	Min ( FIRST, SECOND ),					Min ( INTFIRST, INTSECOND ),		Min ( INT64FIRST, INT64SECOND ) )
513 DECLARE_BINARY_INT ( Expr_Max_c,	Max ( FIRST, SECOND ),					Max ( INTFIRST, INTSECOND ),		Max ( INT64FIRST, INT64SECOND ) )
514 DECLARE_BINARY_FLT ( Expr_Pow_c,	float ( pow ( FIRST, SECOND ) ) )
515 
516 DECLARE_BINARY_POLY ( Expr_And,		FIRST!=0.0f && SECOND!=0.0f,		IFINT ( INTFIRST && INTSECOND ),	IFINT ( INT64FIRST && INT64SECOND ) )
517 DECLARE_BINARY_POLY ( Expr_Or,		FIRST!=0.0f || SECOND!=0.0f,		IFINT ( INTFIRST || INTSECOND ),	IFINT ( INT64FIRST || INT64SECOND ) )
518 
519 //////////////////////////////////////////////////////////////////////////
520 
521 #define DECLARE_TERNARY(_classname,_expr,_expr2,_expr3) \
522 	struct _classname : public ISphExpr \
523 	{ \
524 		ISphExpr * m_pFirst; \
525 		ISphExpr * m_pSecond; \
526 		ISphExpr * m_pThird; \
527 		_classname ( ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird ) : m_pFirst ( pFirst ), m_pSecond ( pSecond ), m_pThird ( pThird ) {} \
528 		~_classname () { SafeRelease ( m_pFirst ); SafeRelease ( m_pSecond ); SafeRelease ( m_pThird ); } \
529 		virtual void SetMVAPool ( const DWORD * pMvaPool ) { m_pFirst->SetMVAPool ( pMvaPool ); m_pSecond->SetMVAPool ( pMvaPool ); m_pThird->SetMVAPool ( pMvaPool ); } \
530 		virtual void SetStringPool ( const BYTE * pStrings ) { m_pFirst->SetStringPool ( pStrings ); m_pSecond->SetStringPool ( pStrings ); m_pThird->SetStringPool ( pStrings ); } \
531 		virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
532 		virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
533 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
534 		virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const \
535 		{ \
536 			m_pFirst->GetDependencyColumns ( dColumns ); \
537 			m_pSecond->GetDependencyColumns ( dColumns ); \
538 			m_pThird->GetDependencyColumns ( dColumns ); \
539 		} \
540 	};
541 
542 DECLARE_TERNARY ( Expr_If_c,	( FIRST!=0.0f ) ? SECOND : THIRD,	INTFIRST ? INTSECOND : INTTHIRD,	INT64FIRST ? INT64SECOND : INT64THIRD )
543 DECLARE_TERNARY ( Expr_Madd_c,	FIRST*SECOND+THIRD,					INTFIRST*INTSECOND + INTTHIRD,		INT64FIRST*INT64SECOND + INT64THIRD )
544 DECLARE_TERNARY ( Expr_Mul3_c,	FIRST*SECOND*THIRD,					INTFIRST*INTSECOND*INTTHIRD,		INT64FIRST*INT64SECOND*INT64THIRD )
545 
546 //////////////////////////////////////////////////////////////////////////
547 
548 #define DECLARE_TIMESTAMP(_classname,_expr) \
549 	DECLARE_UNARY_TRAITS ( _classname, (float)IntEval(tMatch) ) \
550 		virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval(tMatch); } \
551 		virtual int IntEval ( const CSphMatch & tMatch ) const \
552 		{ \
553 			time_t ts = (time_t)INTFIRST;	\
554 			struct tm s; \
555 			localtime_r ( &ts, &s ); \
556 			return _expr; \
557 		} \
558 	};
559 
560 DECLARE_TIMESTAMP ( Expr_Day_c,				s.tm_mday );
561 DECLARE_TIMESTAMP ( Expr_Month_c,			s.tm_mon+1 );
562 DECLARE_TIMESTAMP ( Expr_Year_c,			s.tm_year+1900 );
563 DECLARE_TIMESTAMP ( Expr_YearMonth_c,		(s.tm_year+1900)*100+s.tm_mon+1 );
564 DECLARE_TIMESTAMP ( Expr_YearMonthDay_c,	(s.tm_year+1900)*10000+(s.tm_mon+1)*100+s.tm_mday );
565 
566 //////////////////////////////////////////////////////////////////////////
567 // PARSER INTERNALS
568 //////////////////////////////////////////////////////////////////////////
569 
570 #include "yysphinxexpr.h"
571 
572 /// known functions
573 enum Func_e
574 {
575 	FUNC_NOW,
576 
577 	FUNC_ABS,
578 	FUNC_CEIL,
579 	FUNC_FLOOR,
580 	FUNC_SIN,
581 	FUNC_COS,
582 	FUNC_LN,
583 	FUNC_LOG2,
584 	FUNC_LOG10,
585 	FUNC_EXP,
586 	FUNC_SQRT,
587 	FUNC_BIGINT,
588 	FUNC_SINT,
589 	FUNC_CRC32,
590 	FUNC_FIBONACCI,
591 
592 	FUNC_DAY,
593 	FUNC_MONTH,
594 	FUNC_YEAR,
595 	FUNC_YEARMONTH,
596 	FUNC_YEARMONTHDAY,
597 
598 	FUNC_MIN,
599 	FUNC_MAX,
600 	FUNC_POW,
601 	FUNC_IDIV,
602 
603 	FUNC_IF,
604 	FUNC_MADD,
605 	FUNC_MUL3,
606 
607 	FUNC_INTERVAL,
608 	FUNC_IN,
609 	FUNC_BITDOT,
610 
611 	FUNC_GEODIST
612 };
613 
614 
615 struct FuncDesc_t
616 {
617 	const char *	m_sName;
618 	int				m_iArgs;
619 	Func_e			m_eFunc;
620 	ESphAttr		m_eRet;
621 };
622 
623 
624 static FuncDesc_t g_dFuncs[] =
625 {
626 	{ "now",			0,	FUNC_NOW,			SPH_ATTR_INTEGER },
627 
628 	{ "abs",			1,	FUNC_ABS,			SPH_ATTR_NONE },
629 	{ "ceil",			1,	FUNC_CEIL,			SPH_ATTR_FLOAT },
630 	{ "floor",			1,	FUNC_FLOOR,			SPH_ATTR_FLOAT },
631 	{ "sin",			1,	FUNC_SIN,			SPH_ATTR_FLOAT },
632 	{ "cos",			1,	FUNC_COS,			SPH_ATTR_FLOAT },
633 	{ "ln",				1,	FUNC_LN,			SPH_ATTR_FLOAT },
634 	{ "log2",			1,	FUNC_LOG2,			SPH_ATTR_FLOAT },
635 	{ "log10",			1,	FUNC_LOG10,			SPH_ATTR_FLOAT },
636 	{ "exp",			1,	FUNC_EXP,			SPH_ATTR_FLOAT },
637 	{ "sqrt",			1,	FUNC_SQRT,			SPH_ATTR_FLOAT },
638 	{ "bigint",			1,	FUNC_BIGINT,		SPH_ATTR_BIGINT },	// type-enforcer special as-if-function
639 	{ "sint",			1,	FUNC_SINT,			SPH_ATTR_BIGINT },	// type-enforcer special as-if-function
640 	{ "crc32",			1,	FUNC_CRC32,			SPH_ATTR_INTEGER },
641 	{ "fibonacci",		1,	FUNC_FIBONACCI,		SPH_ATTR_INTEGER },
642 
643 	{ "day",			1,	FUNC_DAY,			SPH_ATTR_INTEGER },
644 	{ "month",			1,	FUNC_MONTH,			SPH_ATTR_INTEGER },
645 	{ "year",			1,	FUNC_YEAR,			SPH_ATTR_INTEGER },
646 	{ "yearmonth",		1,	FUNC_YEARMONTH,		SPH_ATTR_INTEGER },
647 	{ "yearmonthday",	1,	FUNC_YEARMONTHDAY,	SPH_ATTR_INTEGER },
648 
649 	{ "min",			2,	FUNC_MIN,			SPH_ATTR_NONE },
650 	{ "max",			2,	FUNC_MAX,			SPH_ATTR_NONE },
651 	{ "pow",			2,	FUNC_POW,			SPH_ATTR_FLOAT },
652 	{ "idiv",			2,	FUNC_IDIV,			SPH_ATTR_NONE },
653 
654 	{ "if",				3,	FUNC_IF,			SPH_ATTR_NONE },
655 	{ "madd",			3,	FUNC_MADD,			SPH_ATTR_NONE },
656 	{ "mul3",			3,	FUNC_MUL3,			SPH_ATTR_NONE },
657 
658 	{ "interval",		-2,	FUNC_INTERVAL,		SPH_ATTR_INTEGER },
659 	{ "in",				-1, FUNC_IN,			SPH_ATTR_INTEGER },
660 	{ "bitdot",			-1, FUNC_BITDOT,		SPH_ATTR_NONE },
661 
662 	{ "geodist",		4,	FUNC_GEODIST,		SPH_ATTR_FLOAT }
663 };
664 
665 //////////////////////////////////////////////////////////////////////////
666 
667 /// check for type based on int value
GetIntType(int64_t iValue)668 static inline ESphAttr GetIntType ( int64_t iValue )
669 {
670 	return ( iValue>=(int64_t)INT_MIN && iValue<=(int64_t)INT_MAX ) ? SPH_ATTR_INTEGER : SPH_ATTR_BIGINT;
671 }
672 
673 /// list of constants
674 class ConstList_c
675 {
676 public:
677 	CSphVector<int64_t>		m_dInts;		///< dword/int64 storage
678 	CSphVector<float>		m_dFloats;		///< float storage
679 	ESphAttr				m_eRetType;		///< SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, or SPH_ATTR_FLOAT
680 
681 public:
ConstList_c()682 	ConstList_c ()
683 		: m_eRetType ( SPH_ATTR_INTEGER )
684 	{}
685 
Add(int64_t iValue)686 	void Add ( int64_t iValue )
687 	{
688 		if ( m_eRetType!=SPH_ATTR_FLOAT )
689 		{
690 			m_eRetType = GetIntType ( iValue );
691 			m_dInts.Add ( iValue );
692 		} else
693 		{
694 			m_dFloats.Add ( (float)iValue );
695 		}
696 	}
697 
Add(float fValue)698 	void Add ( float fValue )
699 	{
700 		if ( m_eRetType!=SPH_ATTR_FLOAT )
701 		{
702 			assert ( m_dFloats.GetLength()==0 );
703 			ARRAY_FOREACH ( i, m_dInts )
704 				m_dFloats.Add ( (float)m_dInts[i] );
705 			m_dInts.Reset ();
706 			m_eRetType = SPH_ATTR_FLOAT;
707 		}
708 		m_dFloats.Add ( fValue );
709 	}
710 };
711 
712 /// expression tree node
713 struct ExprNode_t
714 {
715 	int				m_iToken;	///< token type, including operators
716 	ESphAttr		m_eRetType;	///< result type
717 	ESphAttr		m_eArgType;	///< args type
718 	CSphAttrLocator	m_tLocator;	///< attribute locator, for TOK_ATTR type
719 	int				m_iLocator; ///< index of attribute locator in schema
720 	union
721 	{
722 		int64_t			m_iConst;		///< constant value, for TOK_CONST_INT type
723 		float			m_fConst;		///< constant value, for TOK_CONST_FLOAT type
724 		int				m_iFunc;		///< built-in function id, for TOK_FUNC type
725 		int				m_iArgs;		///< args count, for arglist (token==',') type
726 		ConstList_c *	m_pConsts;		///< constants list, for TOK_CONST_LIST type
727 	};
728 	int				m_iLeft;
729 	int				m_iRight;
730 
ExprNode_tExprNode_t731 	ExprNode_t () : m_iToken ( 0 ), m_eRetType ( SPH_ATTR_NONE ), m_eArgType ( SPH_ATTR_NONE ), m_iLocator ( -1 ), m_iLeft ( -1 ), m_iRight ( -1 ) {}
732 
FloatValExprNode_t733 	float FloatVal()
734 	{
735 		assert ( m_iToken==TOK_CONST_INT || m_iToken==TOK_CONST_FLOAT );
736 		return ( m_iToken==TOK_CONST_INT ) ? (float)m_iConst : m_fConst;
737 	}
738 };
739 
740 
741 /// expression parser
742 class ExprParser_t
743 {
744 	friend int				yylex ( YYSTYPE * lvalp, ExprParser_t * pParser );
745 	friend int				yyparse ( ExprParser_t * pParser );
746 	friend void				yyerror ( ExprParser_t * pParser, const char * sMessage );
747 
748 public:
ExprParser_t(CSphSchema * pExtra,ISphExprHook * pHook)749 	ExprParser_t ( CSphSchema * pExtra, ISphExprHook * pHook )
750 		: m_pHook ( pHook )
751 		, m_pExtra ( pExtra )
752 	{}
753 
754 							~ExprParser_t ();
755 	ISphExpr *				Parse ( const char * sExpr, const CSphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError );
756 
757 protected:
758 	int						m_iParsed;	///< filled by yyparse() at the very end
759 	CSphString				m_sLexerError;
760 	CSphString				m_sParserError;
761 	CSphString				m_sCreateError;
762 	ISphExprHook *			m_pHook;
763 
764 protected:
765 	ESphAttr				GetWidestRet ( int iLeft, int iRight );
766 
767 	int						AddNodeInt ( int64_t iValue );
768 	int						AddNodeFloat ( float fValue );
769 	int						AddNodeString ( int64_t iValue );
770 	int						AddNodeAttr ( int iTokenType, uint64_t uAttrLocator );
771 	int						AddNodeID ();
772 	int						AddNodeWeight ();
773 	int						AddNodeOp ( int iOp, int iLeft, int iRight );
774 	int						AddNodeFunc ( int iFunc, int iLeft, int iRight=-1 );
775 	int						AddNodeUdf ( int iCall, int iArg );
776 	int						AddNodeConstlist ( int64_t iValue );
777 	int						AddNodeConstlist ( float iValue );
778 	void					AppendToConstlist ( int iNode, int64_t iValue );
779 	void					AppendToConstlist ( int iNode, float iValue );
780 	int						AddNodeUservar ( int iUservar );
781 	int						AddNodeHookIdent ( int iID );
782 	int						AddNodeHookFunc ( int iID, int iLeft );
783 
784 private:
785 	const char *			m_sExpr;
786 	const char *			m_pCur;
787 	const char *			m_pLastTokenStart;
788 	const CSphSchema *		m_pSchema;
789 	CSphVector<ExprNode_t>	m_dNodes;
790 	CSphVector<CSphString>	m_dUservars;
791 	CSphVector<UdfCall_t*>	m_dUdfCalls;
792 
793 	CSphSchema *			m_pExtra;
794 
795 	int						m_iConstNow;
796 
797 private:
798 	int						GetToken ( YYSTYPE * lvalp );
799 
800 	void					GatherArgTypes ( int iNode, CSphVector<int> & dTypes );
801 	void					GatherArgNodes ( int iNode, CSphVector<int> & dNodes );
802 	void					GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes );
803 
804 	bool					CheckForConstSet ( int iArgsNode, int iSkip );
805 	int						ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp );
806 
807 	template < typename T >
808 	void					WalkTree ( int iRoot, T & FUNCTOR );
809 
810 	void					Optimize ( int iNode );
811 	void					Dump ( int iNode );
812 
813 	ISphExpr *				CreateTree ( int iNode );
814 	ISphExpr *				CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
815 	ISphExpr *				CreateInNode ( int iNode );
816 	ISphExpr *				CreateGeodistNode ( int iArgs );
817 	ISphExpr *				CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
818 	ISphExpr *				CreateUdfNode ( int iCall, ISphExpr * pLeft );
819 };
820 
821 //////////////////////////////////////////////////////////////////////////
822 
823 /// parse that numeric constant
ParseNumeric(YYSTYPE * lvalp,const char ** ppStr)824 static int ParseNumeric ( YYSTYPE * lvalp, const char ** ppStr )
825 {
826 	assert ( lvalp && ppStr && *ppStr );
827 
828 	// try float route
829 	char * pEnd = NULL;
830 	float fRes = (float) strtod ( *ppStr, &pEnd );
831 
832 	// try int route
833 	int64_t iRes = 0;
834 	bool bInt = true;
835 	for ( const char * p=(*ppStr); p<pEnd; p++ && bInt )
836 	{
837 		if ( isdigit(*p) )
838 			iRes = iRes*10 + (int)( (*p)-'0' ); // FIXME! missing overflow check, missing octal/hex handling
839 		else
840 			bInt = false;
841 	}
842 
843 	// choose your destiny
844 	*ppStr = pEnd;
845 	if ( bInt )
846 	{
847 		lvalp->iConst = iRes;
848 		return TOK_CONST_INT;
849 	} else
850 	{
851 		lvalp->fConst = fRes;
852 		return TOK_CONST_FLOAT;
853 	}
854 }
855 
sphPackAttrLocator(const CSphAttrLocator & tLoc,int iLocator)856 static uint64_t sphPackAttrLocator ( const CSphAttrLocator & tLoc, int iLocator )
857 {
858 	assert ( iLocator>=0 && iLocator<=0xff );
859 	uint64_t uIndex = 0;
860 	uIndex = ( tLoc.m_iBitOffset<<16 ) + tLoc.m_iBitCount + ( (uint64_t)iLocator<<32 );
861 	if ( tLoc.m_bDynamic )
862 		uIndex |= ( U64C(1)<<63 );
863 
864 	return uIndex;
865 }
866 
sphUnpackAttrLocator(uint64_t uIndex,ExprNode_t * pNode)867 static void sphUnpackAttrLocator ( uint64_t uIndex, ExprNode_t * pNode )
868 {
869 	assert ( pNode );
870 	pNode->m_tLocator.m_iBitOffset = (int)( ( uIndex>>16 ) & 0xffff );
871 	pNode->m_tLocator.m_iBitCount = (int)( uIndex & 0xffff );
872 	pNode->m_tLocator.m_bDynamic = ( ( uIndex & ( U64C(1)<<63 ) )!=0 );
873 
874 	pNode->m_iLocator = (int)( ( uIndex>>32 ) & 0xff );
875 }
876 
ParseAttr(int iAttr,const char * sTok,YYSTYPE * lvalp)877 int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
878 {
879 	// check attribute type and width
880 	const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iAttr );
881 
882 	int iRes = -1;
883 	switch ( tCol.m_eAttrType )
884 	{
885 	case SPH_ATTR_FLOAT:		iRes = TOK_ATTR_FLOAT;	break;
886 	case SPH_ATTR_UINT32SET:	iRes = TOK_ATTR_MVA32; break;
887 	case SPH_ATTR_INT64SET:		iRes = TOK_ATTR_MVA64; break;
888 	case SPH_ATTR_STRING:		iRes = TOK_ATTR_STRING; break;
889 	case SPH_ATTR_INTEGER:
890 	case SPH_ATTR_TIMESTAMP:
891 	case SPH_ATTR_BOOL:
892 	case SPH_ATTR_BIGINT:
893 	case SPH_ATTR_WORDCOUNT:	iRes = tCol.m_tLocator.IsBitfield() ? TOK_ATTR_BITS : TOK_ATTR_INT; break;
894 	default:
895 		m_sLexerError.SetSprintf ( "attribute '%s' is of unsupported type (type=%d)", sTok, tCol.m_eAttrType );
896 		return -1;
897 	}
898 
899 	if ( m_pExtra )
900 		m_pExtra->AddAttr ( tCol, true );
901 	lvalp->iAttrLocator = sphPackAttrLocator ( tCol.m_tLocator, iAttr );
902 	return iRes;
903 }
904 
905 /// a lexer of my own
906 /// returns token id and fills lvalp on success
907 /// returns -1 and fills sError on failure
GetToken(YYSTYPE * lvalp)908 int ExprParser_t::GetToken ( YYSTYPE * lvalp )
909 {
910 	// skip whitespace, check eof
911 	while ( isspace ( *m_pCur ) ) m_pCur++;
912 	m_pLastTokenStart = m_pCur;
913 	if ( !*m_pCur ) return 0;
914 
915 	// check for constant
916 	if ( isdigit ( *m_pCur ) )
917 		return ParseNumeric ( lvalp, &m_pCur );
918 
919 	// check for field, function, or magic name
920 	if ( sphIsAttr ( m_pCur[0] )
921 		|| ( m_pCur[0]=='@' && sphIsAttr ( m_pCur[1] ) && !isdigit ( m_pCur[1] ) ) )
922 	{
923 		// get token
924 		const char * pStart = m_pCur++;
925 		while ( sphIsAttr ( *m_pCur ) ) m_pCur++;
926 
927 		CSphString sTok;
928 		sTok.SetBinary ( pStart, m_pCur-pStart );
929 		sTok.ToLower ();
930 
931 		// check for magic name
932 		if ( sTok=="@id" )			return TOK_ATID;
933 		if ( sTok=="@weight" )		return TOK_ATWEIGHT;
934 		if ( sTok=="id" )			return TOK_ID;
935 		if ( sTok=="weight" )		return TOK_WEIGHT;
936 		if ( sTok=="distinct" )		return TOK_DISTINCT;
937 		if ( sTok=="@geodist" )
938 		{
939 			int iGeodist = m_pSchema->GetAttrIndex("@geodist");
940 			if ( iGeodist==-1 )
941 			{
942 				m_sLexerError = "geoanchor is not set, @geodist expression unavailable";
943 				return -1;
944 			}
945 			const CSphAttrLocator & tLoc = m_pSchema->GetAttr ( iGeodist ).m_tLocator;
946 			lvalp->iAttrLocator = sphPackAttrLocator ( tLoc, iGeodist );
947 			return TOK_ATTR_FLOAT;
948 		}
949 
950 		// check for uservar
951 		if ( pStart[0]=='@' )
952 		{
953 			lvalp->iNode = m_dUservars.GetLength();
954 			m_dUservars.Add ( sTok );
955 			return TOK_USERVAR;
956 		}
957 
958 		// check for keyword
959 		if ( sTok=="and" )		{ return TOK_AND; }
960 		if ( sTok=="or" )		{ return TOK_OR; }
961 		if ( sTok=="not" )		{ return TOK_NOT; }
962 		if ( sTok=="div" )		{ return TOK_DIV; }
963 		if ( sTok=="mod" )		{ return TOK_MOD; }
964 
965 		if ( sTok=="count" )
966 		{
967 			int iAttr = m_pSchema->GetAttrIndex ( "count" );
968 			if ( iAttr>=0 )
969 				ParseAttr ( iAttr, sTok.cstr(), lvalp );
970 			return TOK_COUNT;
971 		}
972 
973 		// check for attribute
974 		int iAttr = m_pSchema->GetAttrIndex ( sTok.cstr() );
975 		if ( iAttr>=0 )
976 			return ParseAttr ( iAttr, sTok.cstr(), lvalp );
977 
978 		// check for function
979 		sTok.ToLower();
980 		for ( int i=0; i<int(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])); i++ )
981 			if ( sTok==g_dFuncs[i].m_sName )
982 		{
983 			lvalp->iFunc = i;
984 			return g_dFuncs[i].m_eFunc==FUNC_IN ? TOK_FUNC_IN : TOK_FUNC;
985 		}
986 
987 		// ask hook
988 		if ( m_pHook )
989 		{
990 			int iID = m_pHook->IsKnownIdent ( sTok.cstr() );
991 			if ( iID>=0 )
992 			{
993 				lvalp->iNode = iID;
994 				return TOK_HOOK_IDENT;
995 			}
996 
997 			iID = m_pHook->IsKnownFunc ( sTok.cstr() );
998 			if ( iID>=0 )
999 			{
1000 				lvalp->iNode = iID;
1001 				return TOK_HOOK_FUNC;
1002 			}
1003 		}
1004 
1005 		// check for UDF
1006 		if ( g_bUdfEnabled )
1007 		{
1008 			g_tUdfMutex.Lock();
1009 			UdfFunc_t * pUdf = g_hUdfFuncs ( sTok );
1010 			if ( pUdf )
1011 			{
1012 				if ( pUdf->m_bToDrop )
1013 					pUdf = NULL; // DROP in progress, can not use
1014 				else
1015 					pUdf->m_iUserCount++; // protection against concurrent DROP (decrements in ~UdfCall_t())
1016 				g_tUdfMutex.Unlock();
1017 
1018 				lvalp->iNode = m_dUdfCalls.GetLength();
1019 				m_dUdfCalls.Add ( new UdfCall_t() );
1020 				m_dUdfCalls.Last()->m_pUdf = pUdf;
1021 				return TOK_UDF;
1022 			}
1023 			g_tUdfMutex.Unlock();
1024 		}
1025 
1026 		m_sLexerError.SetSprintf ( "unknown identifier '%s' (not an attribute, not a function)", sTok.cstr() );
1027 		return -1;
1028 	}
1029 
1030 	// check for known operators, then
1031 	switch ( *m_pCur )
1032 	{
1033 		case '+':
1034 		case '-':
1035 		case '*':
1036 		case '/':
1037 		case '(':
1038 		case ')':
1039 		case ',':
1040 		case '&':
1041 		case '|':
1042 		case '%':
1043 			return *m_pCur++;
1044 
1045 		case '<':
1046 			m_pCur++;
1047 			if ( *m_pCur=='>' ) { m_pCur++; return TOK_NE; }
1048 			if ( *m_pCur=='=' ) { m_pCur++; return TOK_LTE; }
1049 			return '<';
1050 
1051 		case '>':
1052 			m_pCur++;
1053 			if ( *m_pCur=='=' ) { m_pCur++; return TOK_GTE; }
1054 			return '>';
1055 
1056 		case '=':
1057 			m_pCur++;
1058 			if ( *m_pCur=='=' ) m_pCur++;
1059 			return TOK_EQ;
1060 
1061 		// special case for float values without leading zero
1062 		case '.':
1063 			{
1064 				char * pEnd = NULL;
1065 				lvalp->fConst = (float) strtod ( m_pCur, &pEnd );
1066 				if ( pEnd )
1067 				{
1068 					m_pCur = pEnd;
1069 					return TOK_CONST_FLOAT;
1070 				}
1071 				break;
1072 			}
1073 
1074 		case '\'':
1075 		case '"':
1076 			{
1077 				const char cEnd = *m_pCur;
1078 				for ( const char * s = m_pCur+1; *s; s++ )
1079 				{
1080 					if ( *s==cEnd )
1081 					{
1082 						int iBeg = (int)( m_pCur-m_sExpr );
1083 						int iLen = (int)( s-m_sExpr ) - iBeg + 1;
1084 						lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
1085 						m_pCur = s+1;
1086 						return TOK_CONST_STRING;
1087 
1088 					} else if ( *s=='\\' )
1089 					{
1090 						s++;
1091 						if ( !*s )
1092 							break;
1093 					}
1094 				}
1095 				m_sLexerError.SetSprintf ( "unterminated string constant near '%s'", m_pCur );
1096 				return -1;
1097 			}
1098 	}
1099 
1100 	m_sLexerError.SetSprintf ( "unknown operator '%c' near '%s'", *m_pCur, m_pCur );
1101 	return -1;
1102 }
1103 
1104 /// is add/sub?
IsAddSub(const ExprNode_t * pNode)1105 static inline bool IsAddSub ( const ExprNode_t * pNode )
1106 {
1107 	return pNode->m_iToken=='+' || pNode->m_iToken=='-';
1108 }
1109 
1110 /// is unary operator?
IsUnary(const ExprNode_t * pNode)1111 static inline bool IsUnary ( const ExprNode_t * pNode )
1112 {
1113 	return pNode->m_iToken==TOK_NEG || pNode->m_iToken==TOK_NOT;
1114 }
1115 
1116 /// is arithmetic?
IsAri(const ExprNode_t * pNode)1117 static inline bool IsAri ( const ExprNode_t * pNode )
1118 {
1119 	int iTok = pNode->m_iToken;
1120 	return iTok=='+' || iTok=='-' || iTok=='*' || iTok=='/';
1121 }
1122 
1123 /// is constant?
IsConst(const ExprNode_t * pNode)1124 static inline bool IsConst ( const ExprNode_t * pNode )
1125 {
1126 	return pNode->m_iToken==TOK_CONST_INT || pNode->m_iToken==TOK_CONST_FLOAT;
1127 }
1128 
1129 /// float value of a constant
FloatVal(const ExprNode_t * pNode)1130 static inline float FloatVal ( const ExprNode_t * pNode )
1131 {
1132 	assert ( IsConst(pNode) );
1133 	return pNode->m_iToken==TOK_CONST_INT
1134 		? (float)pNode->m_iConst
1135 		: pNode->m_fConst;
1136 }
1137 
1138 /// optimize subtree
Optimize(int iNode)1139 void ExprParser_t::Optimize ( int iNode )
1140 {
1141 	if ( iNode<0 )
1142 		return;
1143 
1144 	Optimize ( m_dNodes[iNode].m_iLeft );
1145 	Optimize ( m_dNodes[iNode].m_iRight );
1146 
1147 	ExprNode_t * pRoot = &m_dNodes[iNode];
1148 	ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes[pRoot->m_iLeft] : NULL;
1149 	ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes[pRoot->m_iRight] : NULL;
1150 
1151 	// unary arithmetic expression with constant
1152 	if ( IsUnary ( pRoot ) )
1153 	{
1154 		assert ( pLeft && !pRight );
1155 
1156 		if ( IsConst ( pLeft ) )
1157 		{
1158 			if ( pLeft->m_iToken==TOK_CONST_INT )
1159 			{
1160 				switch ( pRoot->m_iToken )
1161 				{
1162 					case TOK_NEG:	pRoot->m_iConst = -pLeft->m_iConst; break;
1163 					case TOK_NOT:	pRoot->m_iConst = !pLeft->m_iConst; break;
1164 					default:		assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
1165 				}
1166 
1167 			} else
1168 			{
1169 				switch ( pRoot->m_iToken )
1170 				{
1171 					case TOK_NEG:	pRoot->m_fConst = -pLeft->m_fConst; break;
1172 					case TOK_NOT:	pRoot->m_fConst = !pLeft->m_fConst; break;
1173 					default:		assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
1174 				}
1175 			}
1176 
1177 			pRoot->m_iToken = pLeft->m_iToken;
1178 			pRoot->m_iLeft = -1;
1179 		}
1180 	}
1181 
1182 	// arithmetic expression with constants
1183 	if ( IsAri(pRoot) )
1184 	{
1185 		assert ( pLeft && pRight );
1186 
1187 		// optimize fully-constant expressions
1188 		if ( IsConst(pLeft) && IsConst(pRight) )
1189 		{
1190 			if ( pLeft->m_iToken==TOK_CONST_INT && pRight->m_iToken==TOK_CONST_INT && pRoot->m_iToken!='/' )
1191 			{
1192 				switch ( pRoot->m_iToken )
1193 				{
1194 					case '+':	pRoot->m_iConst = pLeft->m_iConst + pRight->m_iConst; break;
1195 					case '-':	pRoot->m_iConst = pLeft->m_iConst - pRight->m_iConst; break;
1196 					case '*':	pRoot->m_iConst = pLeft->m_iConst * pRight->m_iConst; break;
1197 					default:	assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
1198 				}
1199 				pRoot->m_iToken = TOK_CONST_INT;
1200 
1201 			} else
1202 			{
1203 				float fLeft = FloatVal(pLeft);
1204 				float fRight = FloatVal(pRight);
1205 				switch ( pRoot->m_iToken )
1206 				{
1207 					case '+':	pRoot->m_fConst = fLeft + fRight; break;
1208 					case '-':	pRoot->m_fConst = fLeft - fRight; break;
1209 					case '*':	pRoot->m_fConst = fLeft * fRight; break;
1210 					case '/':	pRoot->m_fConst = fLeft / fRight; break;
1211 					default:	assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
1212 				}
1213 				pRoot->m_iToken = TOK_CONST_FLOAT;
1214 			}
1215 			pRoot->m_iLeft = -1;
1216 			pRoot->m_iRight = -1;
1217 			return;
1218 		}
1219 
1220 		// canonize (expr op const), move const to the left
1221 		if ( IsConst(pRight) )
1222 		{
1223 			assert ( !IsConst(pLeft) );
1224 			Swap ( pRoot->m_iLeft, pRoot->m_iRight );
1225 			Swap ( pLeft, pRight );
1226 
1227 			// fixup (expr-const) to ((-const)+expr)
1228 			if ( pRoot->m_iToken=='-' )
1229 			{
1230 				pRoot->m_iToken = '+';
1231 				if ( pLeft->m_iToken==TOK_CONST_INT )
1232 					pLeft->m_iConst *= -1;
1233 				else
1234 					pLeft->m_fConst *= -1;
1235 			}
1236 
1237 			// fixup (expr/const) to ((1/const)*expr)
1238 			if ( pRoot->m_iToken=='/' )
1239 			{
1240 				pRoot->m_iToken = '*';
1241 				pLeft->m_fConst = 1.0f / FloatVal(pLeft);
1242 				pLeft->m_iToken = TOK_CONST_FLOAT;
1243 			}
1244 		}
1245 
1246 		// optimize compatible operations with constants
1247 		if ( IsConst(pLeft) && IsAri(pRight) && IsAddSub(pRoot)==IsAddSub(pRight) && IsConst ( &m_dNodes[pRight->m_iLeft] ) )
1248 		{
1249 			ExprNode_t * pConst = &m_dNodes[pRight->m_iLeft];
1250 			ExprNode_t * pExpr = &m_dNodes[pRight->m_iRight];
1251 			assert ( !IsConst(pExpr) ); // must had been optimized
1252 
1253 			// optimize (left op (const op2 expr)) to ((left op const) op*op2 expr)
1254 			if ( IsAddSub(pRoot) )
1255 			{
1256 				// fold consts
1257 				int iSign = ( ( pRoot->m_iToken=='+' ) ? 1 : -1 );
1258 				if ( pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
1259 				{
1260 					pLeft->m_iConst += iSign*pConst->m_iConst;
1261 				} else
1262 				{
1263 					pLeft->m_fConst = FloatVal(pLeft) + iSign*FloatVal(pConst);
1264 					pLeft->m_iToken = TOK_CONST_FLOAT;
1265 				}
1266 
1267 				// fold ops
1268 				pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '+' : '-';
1269 
1270 			} else
1271 			{
1272 				// fols consts
1273 				if ( pRoot->m_iToken=='*' && pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
1274 				{
1275 					pLeft->m_iConst *= pConst->m_iConst;
1276 				} else
1277 				{
1278 					if ( pRoot->m_iToken=='*' )
1279 						pLeft->m_fConst = FloatVal(pLeft) * FloatVal(pConst);
1280 					else
1281 						pLeft->m_fConst = FloatVal(pLeft) / FloatVal(pConst);
1282 					pLeft->m_iToken = TOK_CONST_FLOAT;
1283 				}
1284 
1285 				// fold ops
1286 				pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '*' : '/';
1287 			}
1288 
1289 			// promote expr arg
1290 			pRoot->m_iRight = pRight->m_iRight;
1291 			pRight = pExpr;
1292 		}
1293 
1294 		// promote children constants
1295 		if ( IsAri(pLeft) && IsAddSub(pLeft)==IsAddSub(pRoot) && IsConst ( &m_dNodes[pLeft->m_iLeft] ) )
1296 		{
1297 			// ((const op lr) op2 right) gets replaced with (const op (lr op2/op right))
1298 			// constant gets promoted one level up
1299 			int iConst = pLeft->m_iLeft;
1300 			pLeft->m_iLeft = pLeft->m_iRight;
1301 			pLeft->m_iRight = pRoot->m_iRight; // (c op lr) -> (lr ... r)
1302 
1303 			switch ( pLeft->m_iToken )
1304 			{
1305 				case '+':
1306 				case '*':
1307 					// (c + lr) op r -> c + (lr op r)
1308 					// (c * lr) op r -> c * (lr op r)
1309 					Swap ( pLeft->m_iToken, pRoot->m_iToken );
1310 					break;
1311 
1312 				case '-':
1313 					// (c - lr) + r -> c - (lr - r)
1314 					// (c - lr) - r -> c - (lr + r)
1315 					pLeft->m_iToken = ( pRoot->m_iToken=='+' ? '-' : '+' );
1316 					pRoot->m_iToken = '-';
1317 					break;
1318 
1319 				case '/':
1320 					// (c / lr) * r -> c * (r / lr)
1321 					// (c / lr) / r -> c / (r * lr)
1322 					Swap ( pLeft->m_iLeft, pLeft->m_iRight );
1323 					pLeft->m_iToken = ( pRoot->m_iToken=='*' ) ? '/' : '*';
1324 					break;
1325 
1326 				default:
1327 					assert ( 0 && "internal error: unhandled op in left-const promotion" );
1328 			}
1329 
1330 			pRoot->m_iRight = pRoot->m_iLeft;
1331 			pRoot->m_iLeft = iConst;
1332 
1333 			pLeft = &m_dNodes[pRoot->m_iLeft];
1334 			pRight = &m_dNodes[pRoot->m_iRight];
1335 		}
1336 	}
1337 
1338 	// madd, mul3
1339 	// FIXME! separate pass for these? otherwise (2+(a*b))+3 won't get const folding
1340 	if ( ( pRoot->m_iToken=='+' || pRoot->m_iToken=='*' ) && ( pLeft->m_iToken=='*' || pRight->m_iToken=='*' ) )
1341 	{
1342 		if ( pLeft->m_iToken!='*' )
1343 		{
1344 			Swap ( pRoot->m_iLeft, pRoot->m_iRight );
1345 			Swap ( pLeft, pRight );
1346 		}
1347 
1348 		pLeft->m_iToken = ',';
1349 
1350 		int iLeft = pRoot->m_iLeft;
1351 		int iRight = pRoot->m_iRight;
1352 
1353 		pRoot->m_iFunc = ( pRoot->m_iToken=='+' ) ? FUNC_MADD : FUNC_MUL3;
1354 		pRoot->m_iToken = TOK_FUNC;
1355 		pRoot->m_iLeft = m_dNodes.GetLength();
1356 		pRoot->m_iRight = -1;
1357 		assert ( g_dFuncs[pRoot->m_iFunc].m_eFunc==pRoot->m_iFunc );
1358 
1359 		ExprNode_t & tArgs = m_dNodes.Add(); // invalidates all pointers!
1360 		tArgs.m_iToken = ',';
1361 		tArgs.m_iLeft = iLeft;
1362 		tArgs.m_iRight = iRight;
1363 		return;
1364 	}
1365 
1366 	// division by a constant (replace with multiplication by inverse)
1367 	if ( pRoot->m_iToken=='/' && pRight->m_iToken==TOK_CONST_FLOAT )
1368 	{
1369 		pRight->m_fConst = 1.0f / pRight->m_fConst;
1370 		pRoot->m_iToken = '*';
1371 		return;
1372 	}
1373 
1374 	// unary function from a constant
1375 	if ( pRoot->m_iToken==TOK_FUNC && g_dFuncs[pRoot->m_iFunc].m_iArgs==1 )
1376 	{
1377 		assert ( pLeft );
1378 
1379 		if ( IsConst ( pLeft ) )
1380 		{
1381 			float fArg = pLeft->m_iToken==TOK_CONST_FLOAT ? pLeft->m_fConst : float(pLeft->m_iConst);
1382 			switch ( g_dFuncs[pRoot->m_iFunc].m_eFunc )
1383 			{
1384 				case FUNC_ABS:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fabs(fArg); break;
1385 				case FUNC_CEIL:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(ceil(fArg)); break;
1386 				case FUNC_FLOOR:	pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(floor(fArg)); break;
1387 				case FUNC_SIN:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(sin(fArg)); break;
1388 				case FUNC_COS:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(cos(fArg)); break;
1389 				case FUNC_LN:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(log(fArg)); break;
1390 				case FUNC_LOG2:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(log(fArg)*M_LOG2E); break;
1391 				case FUNC_LOG10:	pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(log(fArg)*M_LOG10E); break;
1392 				case FUNC_EXP:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(exp(fArg)); break;
1393 				case FUNC_SQRT:		pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float(sqrt(fArg)); break;
1394 				default:			break;
1395 			}
1396 			return;
1397 		}
1398 	}
1399 
1400 	// constant function (such as NOW())
1401 	if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_NOW )
1402 	{
1403 		pRoot->m_iToken = TOK_CONST_INT;
1404 		pRoot->m_iConst = m_iConstNow;
1405 		return;
1406 	}
1407 
1408 	// SINT(int-attr)
1409 	if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_SINT )
1410 	{
1411 		assert ( pLeft );
1412 
1413 		if ( pLeft->m_iToken==TOK_ATTR_INT || pLeft->m_iToken==TOK_ATTR_BITS )
1414 		{
1415 			pRoot->m_iToken = TOK_ATTR_SINT;
1416 			pRoot->m_tLocator = pLeft->m_tLocator;
1417 			pRoot->m_iLeft = -1;
1418 		}
1419 	}
1420 }
1421 
1422 
1423 // debug dump
Dump(int iNode)1424 void ExprParser_t::Dump ( int iNode )
1425 {
1426 	if ( iNode<0 )
1427 		return;
1428 
1429 	ExprNode_t & tNode = m_dNodes[iNode];
1430 	switch ( tNode.m_iToken )
1431 	{
1432 		case TOK_CONST_INT:
1433 			printf ( INT64_FMT, tNode.m_iConst );
1434 			break;
1435 
1436 		case TOK_CONST_FLOAT:
1437 			printf ( "%f", tNode.m_fConst );
1438 			break;
1439 
1440 		case TOK_ATTR_INT:
1441 		case TOK_ATTR_SINT:
1442 			printf ( "row[%d]", tNode.m_tLocator.m_iBitOffset/32 );
1443 			break;
1444 
1445 		default:
1446 			printf ( "(" );
1447 			Dump ( tNode.m_iLeft );
1448 			printf ( ( tNode.m_iToken<256 ) ? " %c " : " op-%d ", tNode.m_iToken );
1449 			Dump ( tNode.m_iRight );
1450 			printf ( ")" );
1451 			break;
1452 	}
1453 }
1454 
1455 
1456 /// fold arglist into array
FoldArglist(ISphExpr * pLeft,CSphVector<ISphExpr * > & dArgs)1457 static void FoldArglist ( ISphExpr * pLeft, CSphVector<ISphExpr *> & dArgs )
1458 {
1459 	if ( !pLeft || !pLeft->IsArglist() )
1460 	{
1461 		dArgs.Add ( pLeft );
1462 		return;
1463 	}
1464 
1465 	Expr_Arglist_c * pArgs = dynamic_cast<Expr_Arglist_c *> ( pLeft );
1466 	assert ( pLeft );
1467 
1468 	Swap ( dArgs, pArgs->m_dArgs );
1469 	SafeRelease ( pLeft );
1470 }
1471 
1472 
1473 typedef sphinx_int64_t ( *UdfInt_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
1474 typedef double ( *UdfDouble_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
1475 
1476 
1477 class Expr_Udf_c : public ISphExpr
1478 {
1479 public:
1480 	CSphVector<ISphExpr*>			m_dArgs;
1481 
1482 protected:
1483 	UdfCall_t *						m_pCall;
1484 	mutable CSphVector<int64_t>		m_dArgvals;
1485 	mutable char					m_bError;
1486 
1487 public:
Expr_Udf_c(UdfCall_t * pCall)1488 	explicit Expr_Udf_c ( UdfCall_t * pCall )
1489 		: m_pCall ( pCall )
1490 		, m_bError ( 0 )
1491 	{
1492 		SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
1493 
1494 		assert ( tArgs.arg_values==NULL );
1495 		tArgs.arg_values = new char * [ tArgs.arg_count ];
1496 		tArgs.str_lengths = new int [ tArgs.arg_count ];
1497 
1498 		m_dArgvals.Resize ( tArgs.arg_count );
1499 		ARRAY_FOREACH ( i, m_dArgvals )
1500 			tArgs.arg_values[i] = (char*) &m_dArgvals[i];
1501 	}
1502 
~Expr_Udf_c()1503 	~Expr_Udf_c ()
1504 	{
1505 		if ( m_pCall->m_pUdf->m_fnDeinit )
1506 			m_pCall->m_pUdf->m_fnDeinit ( &m_pCall->m_tInit );
1507 		SafeDeleteArray ( m_pCall->m_tArgs.arg_names );
1508 		SafeDeleteArray ( m_pCall->m_tArgs.arg_types );
1509 		SafeDeleteArray ( m_pCall->m_tArgs.arg_values );
1510 		SafeDeleteArray ( m_pCall->m_tArgs.str_lengths );
1511 		SafeDelete ( m_pCall );
1512 
1513 		ARRAY_FOREACH ( i, m_dArgs )
1514 			SafeRelease ( m_dArgs[i] );
1515 	}
1516 
FillArgs(const CSphMatch & tMatch) const1517 	void FillArgs ( const CSphMatch & tMatch ) const
1518 	{
1519 		// FIXME? a cleaner way to reinterpret?
1520 		SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
1521 		ARRAY_FOREACH ( i, m_dArgs )
1522 		{
1523 			switch ( tArgs.arg_types[i] )
1524 			{
1525 				case SPH_UDF_TYPE_UINT32:		*(DWORD*)&m_dArgvals[i] = m_dArgs[i]->IntEval ( tMatch ); break;
1526 				case SPH_UDF_TYPE_INT64:		m_dArgvals[i] = m_dArgs[i]->Int64Eval ( tMatch ); break;
1527 				case SPH_UDF_TYPE_FLOAT:		*(float*)&m_dArgvals[i] = m_dArgs[i]->Eval ( tMatch ); break;
1528 				case SPH_UDF_TYPE_STRING:		tArgs.str_lengths[i] = m_dArgs[i]->StringEval ( tMatch, (const BYTE**)&tArgs.arg_values[i] ); break;
1529 				case SPH_UDF_TYPE_UINT32SET:	tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
1530 				case SPH_UDF_TYPE_UINT64SET:	tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
1531 				default:						assert ( 0 ); m_dArgvals[i] = 0; break;
1532 			}
1533 		}
1534 	}
1535 
SetMVAPool(const DWORD * pPool)1536 	virtual void SetMVAPool ( const DWORD * pPool ) { ARRAY_FOREACH ( i, m_dArgs ) m_dArgs[i]->SetMVAPool ( pPool ); }
SetStringPool(const BYTE * pPool)1537 	virtual void SetStringPool ( const BYTE * pPool ) { ARRAY_FOREACH ( i, m_dArgs ) m_dArgs[i]->SetStringPool ( pPool ); }
GetDependencyColumns(CSphVector<int> & dDeps) const1538 	virtual void GetDependencyColumns ( CSphVector<int> & dDeps ) const { ARRAY_FOREACH ( i, m_dArgs ) m_dArgs[i]->GetDependencyColumns ( dDeps ); }
1539 };
1540 
1541 
1542 class Expr_UdfInt_c : public Expr_Udf_c
1543 {
1544 public:
Expr_UdfInt_c(UdfCall_t * pCall)1545 	explicit Expr_UdfInt_c ( UdfCall_t * pCall )
1546 		: Expr_Udf_c ( pCall )
1547 	{
1548 		assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_INTEGER || pCall->m_pUdf->m_eRetType==SPH_ATTR_BIGINT );
1549 	}
1550 
Int64Eval(const CSphMatch & tMatch) const1551 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
1552 	{
1553 		if ( m_bError )
1554 			return 0;
1555 		FillArgs ( tMatch );
1556 		UdfInt_fn pFn = (UdfInt_fn) m_pCall->m_pUdf->m_fnFunc;
1557 		return (int64_t) pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
1558 	}
1559 
IntEval(const CSphMatch & tMatch) const1560 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Int64Eval ( tMatch ); }
Eval(const CSphMatch & tMatch) const1561 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) Int64Eval ( tMatch ); }
1562 };
1563 
1564 
1565 class Expr_UdfFloat_c : public Expr_Udf_c
1566 {
1567 public:
Expr_UdfFloat_c(UdfCall_t * pCall)1568 	explicit Expr_UdfFloat_c ( UdfCall_t * pCall )
1569 		: Expr_Udf_c ( pCall )
1570 	{
1571 		assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_FLOAT );
1572 	}
1573 
Eval(const CSphMatch & tMatch) const1574 	virtual float Eval ( const CSphMatch & tMatch ) const
1575 	{
1576 		if ( m_bError )
1577 			return 0;
1578 		FillArgs ( tMatch );
1579 		UdfDouble_fn pFn = (UdfDouble_fn) m_pCall->m_pUdf->m_fnFunc;
1580 		return (float) pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
1581 	}
1582 
IntEval(const CSphMatch & tMatch) const1583 	virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Eval ( tMatch ); }
Int64Eval(const CSphMatch & tMatch) const1584 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t) Eval ( tMatch ); }
1585 };
1586 
1587 
CreateUdfNode(int iCall,ISphExpr * pLeft)1588 ISphExpr * ExprParser_t::CreateUdfNode ( int iCall, ISphExpr * pLeft )
1589 {
1590 	Expr_Udf_c * pRes = NULL;
1591 	switch ( m_dUdfCalls[iCall]->m_pUdf->m_eRetType )
1592 	{
1593 		case SPH_ATTR_INTEGER:
1594 		case SPH_ATTR_BIGINT:
1595 			pRes = new Expr_UdfInt_c ( m_dUdfCalls[iCall] );
1596 			break;
1597 		case SPH_ATTR_FLOAT:
1598 			pRes = new Expr_UdfFloat_c ( m_dUdfCalls[iCall] );
1599 			break;
1600 		default:
1601 			m_sParserError.SetSprintf ( "internal error: unhandled type %d in CreateUdfNode()", m_dUdfCalls[iCall]->m_pUdf->m_eRetType );
1602 			break;
1603 	}
1604 	if ( pRes )
1605 	{
1606 		if ( pLeft )
1607 			FoldArglist ( pLeft, pRes->m_dArgs );
1608 		m_dUdfCalls[iCall] = NULL; // evaluator owns it now
1609 	}
1610 	return pRes;
1611 }
1612 
1613 
1614 /// fold nodes subtree into opcodes
CreateTree(int iNode)1615 ISphExpr * ExprParser_t::CreateTree ( int iNode )
1616 {
1617 	if ( iNode<0 )
1618 		return NULL;
1619 
1620 	const ExprNode_t & tNode = m_dNodes[iNode];
1621 
1622 	// avoid spawning argument node in some cases
1623 	bool bSkipLeft = false;
1624 	bool bSkipRight = false;
1625 	if ( tNode.m_iToken==TOK_FUNC )
1626 	{
1627 		Func_e eFunc = g_dFuncs[tNode.m_iFunc].m_eFunc;
1628 		if ( eFunc==FUNC_GEODIST || eFunc==FUNC_IN )
1629 			bSkipLeft = true;
1630 		if ( eFunc==FUNC_IN )
1631 			bSkipRight = true;
1632 	}
1633 
1634 	ISphExpr * pLeft = bSkipLeft ? NULL : CreateTree ( tNode.m_iLeft );
1635 	ISphExpr * pRight = bSkipRight ? NULL : CreateTree ( tNode.m_iRight );
1636 
1637 #define LOC_SPAWN_POLY(_classname) \
1638 	if ( tNode.m_eArgType==SPH_ATTR_INTEGER )		return new _classname##Int_c ( pLeft, pRight ); \
1639 	else if ( tNode.m_eArgType==SPH_ATTR_BIGINT )	return new _classname##Int64_c ( pLeft, pRight ); \
1640 	else											return new _classname##Float_c ( pLeft, pRight );
1641 
1642 	switch ( tNode.m_iToken )
1643 	{
1644 		case TOK_ATTR_INT:		return new Expr_GetInt_c ( tNode.m_tLocator, tNode.m_iLocator );
1645 		case TOK_ATTR_BITS:		return new Expr_GetBits_c ( tNode.m_tLocator, tNode.m_iLocator );
1646 		case TOK_ATTR_FLOAT:	return new Expr_GetFloat_c ( tNode.m_tLocator, tNode.m_iLocator );
1647 		case TOK_ATTR_SINT:		return new Expr_GetSint_c ( tNode.m_tLocator, tNode.m_iLocator );
1648 		case TOK_ATTR_STRING:	return new Expr_GetString_c ( tNode.m_tLocator, tNode.m_iLocator );
1649 		case TOK_ATTR_MVA64:
1650 		case TOK_ATTR_MVA32:	return new Expr_GetMva_c ( tNode.m_tLocator, tNode.m_iLocator );
1651 
1652 		case TOK_CONST_FLOAT:	return new Expr_GetConst_c ( tNode.m_fConst );
1653 		case TOK_CONST_INT:
1654 			if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
1655 				return new Expr_GetIntConst_c ( (int)tNode.m_iConst );
1656 			else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
1657 				return new Expr_GetInt64Const_c ( tNode.m_iConst );
1658 			else
1659 				return new Expr_GetConst_c ( float(tNode.m_iConst) );
1660 			break;
1661 		case TOK_CONST_STRING:
1662 			return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ) );
1663 
1664 		case TOK_ID:			return new Expr_GetId_c ();
1665 		case TOK_WEIGHT:		return new Expr_GetWeight_c ();
1666 
1667 		case '+':				return new Expr_Add_c ( pLeft, pRight ); break;
1668 		case '-':				return new Expr_Sub_c ( pLeft, pRight ); break;
1669 		case '*':				return new Expr_Mul_c ( pLeft, pRight ); break;
1670 		case '/':				return new Expr_Div_c ( pLeft, pRight ); break;
1671 		case '&':				return new Expr_BitAnd_c ( pLeft, pRight ); break;
1672 		case '|':				return new Expr_BitOr_c ( pLeft, pRight ); break;
1673 		case '%':				return new Expr_Mod_c ( pLeft, pRight ); break;
1674 
1675 		case '<':				LOC_SPAWN_POLY ( Expr_Lt ); break;
1676 		case '>':				LOC_SPAWN_POLY ( Expr_Gt ); break;
1677 		case TOK_LTE:			LOC_SPAWN_POLY ( Expr_Lte ); break;
1678 		case TOK_GTE:			LOC_SPAWN_POLY ( Expr_Gte ); break;
1679 		case TOK_EQ:			LOC_SPAWN_POLY ( Expr_Eq ); break;
1680 		case TOK_NE:			LOC_SPAWN_POLY ( Expr_Ne ); break;
1681 		case TOK_AND:			LOC_SPAWN_POLY ( Expr_And ); break;
1682 		case TOK_OR:			LOC_SPAWN_POLY ( Expr_Or ); break;
1683 		case TOK_NOT:
1684 			if ( tNode.m_eArgType==SPH_ATTR_BIGINT )
1685 				return new Expr_NotInt64_c ( pLeft );
1686 			else
1687 				return new Expr_NotInt_c ( pLeft );
1688 			break;
1689 
1690 		case ',':				return new Expr_Arglist_c ( pLeft, pRight ); break;
1691 		case TOK_NEG:			assert ( pRight==NULL ); return new Expr_Neg_c ( pLeft ); break;
1692 		case TOK_FUNC:
1693 			{
1694 				// fold arglist to array
1695 				Func_e eFunc = g_dFuncs[tNode.m_iFunc].m_eFunc;
1696 
1697 				CSphVector<ISphExpr *> dArgs;
1698 				if ( !bSkipLeft )
1699 					FoldArglist ( pLeft, dArgs );
1700 
1701 				// spawn proper function
1702 				assert ( tNode.m_iFunc>=0 && tNode.m_iFunc<int(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])) );
1703 				assert (
1704 					( bSkipLeft ) || // function will handle its arglist,
1705 					( g_dFuncs[tNode.m_iFunc].m_iArgs>=0 && g_dFuncs[tNode.m_iFunc].m_iArgs==dArgs.GetLength() ) || // arg count matches,
1706 					( g_dFuncs[tNode.m_iFunc].m_iArgs<0 && -g_dFuncs[tNode.m_iFunc].m_iArgs<=dArgs.GetLength() ) ); // or min vararg count reached
1707 
1708 				switch ( eFunc )
1709 				{
1710 					case FUNC_NOW:		assert ( 0 ); break; // prevent gcc bitching
1711 
1712 					case FUNC_ABS:		return new Expr_Abs_c ( dArgs[0] );
1713 					case FUNC_CEIL:		return new Expr_Ceil_c ( dArgs[0] );
1714 					case FUNC_FLOOR:	return new Expr_Floor_c ( dArgs[0] );
1715 					case FUNC_SIN:		return new Expr_Sin_c ( dArgs[0] );
1716 					case FUNC_COS:		return new Expr_Cos_c ( dArgs[0] );
1717 					case FUNC_LN:		return new Expr_Ln_c ( dArgs[0] );
1718 					case FUNC_LOG2:		return new Expr_Log2_c ( dArgs[0] );
1719 					case FUNC_LOG10:	return new Expr_Log10_c ( dArgs[0] );
1720 					case FUNC_EXP:		return new Expr_Exp_c ( dArgs[0] );
1721 					case FUNC_SQRT:		return new Expr_Sqrt_c ( dArgs[0] );
1722 					case FUNC_BIGINT:	return dArgs[0];
1723 					case FUNC_SINT:		return new Expr_Sint_c ( dArgs[0] );
1724 					case FUNC_CRC32:	return new Expr_Crc32_c ( dArgs[0] );
1725 					case FUNC_FIBONACCI:return new Expr_Fibonacci_c ( dArgs[0] );
1726 
1727 					case FUNC_DAY:			return new Expr_Day_c ( dArgs[0] );
1728 					case FUNC_MONTH:		return new Expr_Month_c ( dArgs[0] );
1729 					case FUNC_YEAR:			return new Expr_Year_c ( dArgs[0] );
1730 					case FUNC_YEARMONTH:	return new Expr_YearMonth_c ( dArgs[0] );
1731 					case FUNC_YEARMONTHDAY:	return new Expr_YearMonthDay_c ( dArgs[0] );
1732 
1733 					case FUNC_MIN:		return new Expr_Min_c ( dArgs[0], dArgs[1] );
1734 					case FUNC_MAX:		return new Expr_Max_c ( dArgs[0], dArgs[1] );
1735 					case FUNC_POW:		return new Expr_Pow_c ( dArgs[0], dArgs[1] );
1736 					case FUNC_IDIV:		return new Expr_Idiv_c ( dArgs[0], dArgs[1] );
1737 
1738 					case FUNC_IF:		return new Expr_If_c ( dArgs[0], dArgs[1], dArgs[2] );
1739 					case FUNC_MADD:		return new Expr_Madd_c ( dArgs[0], dArgs[1], dArgs[2] );
1740 					case FUNC_MUL3:		return new Expr_Mul3_c ( dArgs[0], dArgs[1], dArgs[2] );
1741 
1742 					case FUNC_INTERVAL:	return CreateIntervalNode ( tNode.m_iLeft, dArgs );
1743 					case FUNC_IN:		return CreateInNode ( iNode );
1744 					case FUNC_BITDOT:	return CreateBitdotNode ( tNode.m_iLeft, dArgs );
1745 
1746 					case FUNC_GEODIST:	return CreateGeodistNode ( tNode.m_iLeft );
1747 				}
1748 				assert ( 0 && "unhandled function id" );
1749 				break;
1750 			}
1751 
1752 		case TOK_UDF:			return CreateUdfNode ( tNode.m_iFunc, pLeft ); break;
1753 		case TOK_HOOK_IDENT:	return m_pHook->CreateNode ( tNode.m_iFunc, NULL ); break;
1754 		case TOK_HOOK_FUNC:		return m_pHook->CreateNode ( tNode.m_iFunc, pLeft ); break;
1755 		default:				assert ( 0 && "unhandled token type" ); break;
1756 	}
1757 
1758 #undef LOC_SPAWN_POLY
1759 
1760 	// fire exit
1761 	SafeRelease ( pLeft );
1762 	SafeRelease ( pRight );
1763 	return NULL;
1764 }
1765 
1766 //////////////////////////////////////////////////////////////////////////
1767 
1768 /// arg-vs-set function (currently, IN or INTERVAL) evaluator traits
1769 template < typename T >
1770 class Expr_ArgVsSet_c : public ISphExpr
1771 {
1772 protected:
1773 	ISphExpr *			m_pArg;
1774 
1775 public:
Expr_ArgVsSet_c(ISphExpr * pArg)1776 	explicit Expr_ArgVsSet_c ( ISphExpr * pArg ) : m_pArg ( pArg ) {}
~Expr_ArgVsSet_c()1777 	~Expr_ArgVsSet_c () { SafeRelease ( m_pArg ); }
1778 
1779 	virtual int IntEval ( const CSphMatch & tMatch ) const = 0;
Eval(const CSphMatch & tMatch) const1780 	virtual float Eval ( const CSphMatch & tMatch ) const { return (float) IntEval ( tMatch ); }
Int64Eval(const CSphMatch & tMatch) const1781 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
GetDependencyColumns(CSphVector<int> & dColumns) const1782 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
1783 	{
1784 		assert ( m_pArg );
1785 		m_pArg->GetDependencyColumns ( dColumns );
1786 	}
1787 
1788 protected:
1789 	T ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const;
1790 };
1791 
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const1792 template<> int Expr_ArgVsSet_c<int>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const			{ return pArg->IntEval ( tMatch ); }
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const1793 template<> DWORD Expr_ArgVsSet_c<DWORD>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const		{ return (DWORD)pArg->IntEval ( tMatch ); }
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const1794 template<> float Expr_ArgVsSet_c<float>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const		{ return pArg->Eval ( tMatch ); }
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const1795 template<> int64_t Expr_ArgVsSet_c<int64_t>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const	{ return pArg->Int64Eval ( tMatch ); }
1796 
1797 
1798 /// arg-vs-constant-set
1799 template < typename T >
1800 class Expr_ArgVsConstSet_c : public Expr_ArgVsSet_c<T>
1801 {
1802 protected:
1803 	CSphVector<T> m_dValues;
1804 
1805 public:
1806 	/// take ownership of arg, pre-evaluate and dismiss turn points
Expr_ArgVsConstSet_c(ISphExpr * pArg,CSphVector<ISphExpr * > & dArgs,int iSkip)1807 	Expr_ArgVsConstSet_c ( ISphExpr * pArg, CSphVector<ISphExpr *> & dArgs, int iSkip )
1808 		: Expr_ArgVsSet_c<T> ( pArg )
1809 	{
1810 		CSphMatch tDummy;
1811 		for ( int i=iSkip; i<dArgs.GetLength(); i++ )
1812 		{
1813 			m_dValues.Add ( Expr_ArgVsSet_c<T>::ExprEval ( dArgs[i], tDummy ) );
1814 			SafeRelease ( dArgs[i] );
1815 		}
1816 	}
1817 
1818 	/// take ownership of arg, and copy that constlist
Expr_ArgVsConstSet_c(ISphExpr * pArg,ConstList_c * pConsts)1819 	Expr_ArgVsConstSet_c ( ISphExpr * pArg, ConstList_c * pConsts )
1820 		: Expr_ArgVsSet_c<T> ( pArg )
1821 	{
1822 		if ( !pConsts )
1823 			return; // can happen on uservar path
1824 		if ( pConsts->m_eRetType==SPH_ATTR_FLOAT )
1825 		{
1826 			m_dValues.Reserve ( pConsts->m_dFloats.GetLength() );
1827 			ARRAY_FOREACH ( i, pConsts->m_dFloats )
1828 				m_dValues.Add ( (T)pConsts->m_dFloats[i] );
1829 		} else
1830 		{
1831 			m_dValues.Reserve ( pConsts->m_dInts.GetLength() );
1832 			ARRAY_FOREACH ( i, pConsts->m_dInts )
1833 				m_dValues.Add ( (T)pConsts->m_dInts[i] );
1834 		}
1835 	}
1836 };
1837 
1838 //////////////////////////////////////////////////////////////////////////
1839 
1840 /// INTERVAL() evaluator for constant turn point values case
1841 template < typename T >
1842 class Expr_IntervalConst_c : public Expr_ArgVsConstSet_c<T>
1843 {
1844 public:
1845 	/// take ownership of arg, pre-evaluate and dismiss turn points
Expr_IntervalConst_c(CSphVector<ISphExpr * > & dArgs)1846 	explicit Expr_IntervalConst_c ( CSphVector<ISphExpr *> & dArgs )
1847 		: Expr_ArgVsConstSet_c<T> ( dArgs[0], dArgs, 1 )
1848 	{}
1849 
1850 	/// evaluate arg, return interval id
IntEval(const CSphMatch & tMatch) const1851 	virtual int IntEval ( const CSphMatch & tMatch ) const
1852 	{
1853 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
1854 		ARRAY_FOREACH ( i, this->m_dValues ) // FIXME! OPTIMIZE! perform binary search here
1855 			if ( val<this->m_dValues[i] )
1856 				return i;
1857 		return this->m_dValues.GetLength();
1858 	}
1859 
SetMVAPool(const DWORD * pMvaPool)1860 	virtual void SetMVAPool ( const DWORD * pMvaPool ) { this->m_pArg->SetMVAPool ( pMvaPool ); }
SetStringPool(const BYTE * pStrings)1861 	virtual void SetStringPool ( const BYTE * pStrings ) { this->m_pArg->SetStringPool ( pStrings ); }
1862 };
1863 
1864 
1865 /// generic INTERVAL() evaluator
1866 template < typename T >
1867 class Expr_Interval_c : public Expr_ArgVsSet_c<T>
1868 {
1869 protected:
1870 	CSphVector<ISphExpr *> m_dTurnPoints;
1871 
1872 public:
1873 	/// take ownership of arg and turn points
Expr_Interval_c(const CSphVector<ISphExpr * > & dArgs)1874 	explicit Expr_Interval_c ( const CSphVector<ISphExpr *> & dArgs )
1875 		: Expr_ArgVsSet_c<T> ( dArgs[0] )
1876 	{
1877 		for ( int i=1; i<dArgs.GetLength(); i++ )
1878 			m_dTurnPoints.Add ( dArgs[i] );
1879 	}
1880 
1881 	/// evaluate arg, return interval id
IntEval(const CSphMatch & tMatch) const1882 	virtual int IntEval ( const CSphMatch & tMatch ) const
1883 	{
1884 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
1885 		ARRAY_FOREACH ( i, m_dTurnPoints )
1886 			if ( val < Expr_ArgVsSet_c<T>::ExprEval ( m_dTurnPoints[i], tMatch ) )
1887 				return i;
1888 		return m_dTurnPoints.GetLength();
1889 	}
1890 
SetMVAPool(const DWORD * pMvaPool)1891 	virtual void SetMVAPool ( const DWORD * pMvaPool )
1892 	{
1893 		this->m_pArg->SetMVAPool ( pMvaPool );
1894 		ARRAY_FOREACH ( i, m_dTurnPoints )
1895 			m_dTurnPoints[i]->SetMVAPool ( pMvaPool );
1896 	}
1897 
SetStringPool(const BYTE * pStrings)1898 	virtual void SetStringPool ( const BYTE * pStrings )
1899 	{
1900 		this->m_pArg->SetStringPool ( pStrings );
1901 		ARRAY_FOREACH ( i, m_dTurnPoints )
1902 			m_dTurnPoints[i]->SetStringPool ( pStrings );
1903 	}
1904 
GetDependencyColumns(CSphVector<int> & dColumns) const1905 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
1906 	{
1907 		Expr_ArgVsSet_c<T>::GetDependencyColumns ( dColumns );
1908 		ARRAY_FOREACH ( i, m_dTurnPoints )
1909 			m_dTurnPoints[i]->GetDependencyColumns ( dColumns );
1910 	}
1911 };
1912 
1913 //////////////////////////////////////////////////////////////////////////
1914 
1915 /// IN() evaluator, arbitrary scalar expression vs. constant values
1916 template < typename T >
1917 class Expr_In_c : public Expr_ArgVsConstSet_c<T>
1918 {
1919 public:
1920 	/// pre-sort values for binary search
Expr_In_c(ISphExpr * pArg,ConstList_c * pConsts)1921 	Expr_In_c ( ISphExpr * pArg, ConstList_c * pConsts ) :
1922 		Expr_ArgVsConstSet_c<T> ( pArg, pConsts )
1923 	{
1924 		this->m_dValues.Sort();
1925 	}
1926 
1927 	/// evaluate arg, check if the value is within set
IntEval(const CSphMatch & tMatch) const1928 	virtual int IntEval ( const CSphMatch & tMatch ) const
1929 	{
1930 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
1931 		return this->m_dValues.BinarySearch ( val )!=NULL;
1932 	}
1933 
SetMVAPool(const DWORD * pMvaPool)1934 	virtual void SetMVAPool ( const DWORD * pMvaPool ) { this->m_pArg->SetMVAPool ( pMvaPool ); }
SetStringPool(const BYTE * pStrings)1935 	virtual void SetStringPool ( const BYTE * pStrings ) { this->m_pArg->SetStringPool ( pStrings ); }
1936 };
1937 
1938 
1939 /// IN() evaluator, arbitrary scalar expression vs. uservar
1940 /// (for the sake of evaluator, uservar is a pre-sorted, refcounted external vector)
1941 class Expr_InUservar_c : public Expr_ArgVsSet_c<int64_t>
1942 {
1943 protected:
1944 	UservarIntSet_c * m_pConsts;
1945 
1946 public:
1947 	/// just get hold of args
Expr_InUservar_c(ISphExpr * pArg,UservarIntSet_c * pConsts)1948 	explicit Expr_InUservar_c ( ISphExpr * pArg, UservarIntSet_c * pConsts )
1949 		: Expr_ArgVsSet_c<int64_t> ( pArg )
1950 		, m_pConsts ( pConsts ) // no addref, hook should have addref'd (otherwise there'd be a race)
1951 	{}
1952 
1953 	/// release the uservar value
~Expr_InUservar_c()1954 	~Expr_InUservar_c()
1955 	{
1956 		SafeRelease ( m_pConsts );
1957 	}
1958 
1959 	/// evaluate arg, check if the value is within set
IntEval(const CSphMatch & tMatch) const1960 	virtual int IntEval ( const CSphMatch & tMatch ) const
1961 	{
1962 		int64_t iVal = ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
1963 		return m_pConsts->BinarySearch ( iVal )!=NULL;
1964 	}
1965 
SetMVAPool(const DWORD * pMvaPool)1966 	virtual void SetMVAPool ( const DWORD * pMvaPool ) { this->m_pArg->SetMVAPool ( pMvaPool ); }
SetStringPool(const BYTE * pStrings)1967 	virtual void SetStringPool ( const BYTE * pStrings ) { this->m_pArg->SetStringPool ( pStrings ); }
1968 };
1969 
1970 
1971 /// IN() evaluator, MVA attribute vs. constant values
1972 template < bool MVA64 >
1973 class Expr_MVAIn_c : public Expr_ArgVsConstSet_c<int64_t>
1974 {
1975 public:
1976 	/// pre-sort values for binary search
Expr_MVAIn_c(const CSphAttrLocator & tLoc,int iLocator,ConstList_c * pConsts,UservarIntSet_c * pUservar)1977 	Expr_MVAIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar )
1978 		: Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
1979 		, m_tLocator ( tLoc )
1980 		, m_iLocator ( iLocator )
1981 		, m_pMvaPool ( NULL )
1982 		, m_pUservar ( pUservar )
1983 	{
1984 		assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
1985 		assert ( !pConsts || !pUservar ); // either constlist or uservar, not both
1986 		this->m_dValues.Sort();
1987 	}
1988 
~Expr_MVAIn_c()1989 	~Expr_MVAIn_c()
1990 	{
1991 		SafeRelease ( m_pUservar );
1992 	}
1993 
1994 	int MvaEval ( const DWORD * pMva ) const;
1995 
MvaEval(const CSphMatch &) const1996 	virtual const DWORD * MvaEval ( const CSphMatch & ) const { assert ( 0 && "not implemented" ); return NULL; }
1997 
1998 	/// evaluate arg, check if any values are within set
IntEval(const CSphMatch & tMatch) const1999 	virtual int IntEval ( const CSphMatch & tMatch ) const
2000 	{
2001 		const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool );
2002 		if ( !pMva )
2003 			return 0;
2004 
2005 		return MvaEval ( pMva );
2006 	}
2007 
SetMVAPool(const DWORD * pMvaPool)2008 	virtual void SetMVAPool ( const DWORD * pMvaPool )
2009 	{
2010 		m_pMvaPool = pMvaPool; // finally, some real setup work!!!
2011 	}
2012 
GetDependencyColumns(CSphVector<int> & dColumns) const2013 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
2014 	{
2015 		dColumns.Add ( m_iLocator );
2016 	}
2017 
2018 protected:
2019 	CSphAttrLocator		m_tLocator;
2020 	int					m_iLocator;
2021 	const DWORD *		m_pMvaPool;
2022 	UservarIntSet_c *	m_pUservar;
2023 };
2024 
2025 
2026 template<>
MvaEval(const DWORD * pMva) const2027 int Expr_MVAIn_c<false>::MvaEval ( const DWORD * pMva ) const
2028 {
2029 	// OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
2030 	DWORD uLen = *pMva++;
2031 	const DWORD * pMvaMax = pMva+uLen;
2032 
2033 	const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
2034 	const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
2035 
2036 	const DWORD * L = pMva;
2037 	const DWORD * R = pMvaMax - 1;
2038 	for ( ; pFilter < pFilterMax; pFilter++ )
2039 	{
2040 		while ( L<=R )
2041 		{
2042 			const DWORD * m = L + (R - L) / 2;
2043 
2044 			if ( *pFilter > *m )
2045 				L = m + 1;
2046 			else if ( *pFilter < *m )
2047 				R = m - 1;
2048 			else
2049 				return 1;
2050 		}
2051 		R = pMvaMax - 1;
2052 	}
2053 	return 0;
2054 }
2055 
2056 
2057 template<>
MvaEval(const DWORD * pMva) const2058 int Expr_MVAIn_c<true>::MvaEval ( const DWORD * pMva ) const
2059 {
2060 	// OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
2061 	DWORD uLen = *pMva++;
2062 	assert ( ( uLen%2 )==0 );
2063 	const DWORD * pMvaMax = pMva+uLen;
2064 
2065 	const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
2066 	const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
2067 
2068 	const int64_t * L = (const int64_t *)pMva;
2069 	const int64_t * R = (const int64_t *)( pMvaMax - 2 );
2070 	for ( ; pFilter < pFilterMax; pFilter++ )
2071 	{
2072 		while ( L<=R )
2073 		{
2074 			const int64_t * pVal = L + (R - L) / 2;
2075 			int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
2076 
2077 			if ( *pFilter > iMva )
2078 				L = pVal + 1;
2079 			else if ( *pFilter < iMva )
2080 				R = pVal - 1;
2081 			else
2082 				return 1;
2083 		}
2084 		R = (const int64_t *) ( pMvaMax - 2 );
2085 	}
2086 	return 0;
2087 }
2088 
2089 
2090 //////////////////////////////////////////////////////////////////////////
2091 
2092 /// generic BITDOT() evaluator
2093 template < typename T >
2094 class Expr_Bitdot_c : public Expr_ArgVsSet_c<T>
2095 {
2096 protected:
2097 	CSphVector<ISphExpr *> m_dBitWeights;
2098 
2099 public:
2100 	/// take ownership of arg and turn points
Expr_Bitdot_c(const CSphVector<ISphExpr * > & dArgs)2101 	explicit Expr_Bitdot_c ( const CSphVector<ISphExpr *> & dArgs )
2102 		: Expr_ArgVsSet_c<T> ( dArgs[0] )
2103 	{
2104 		for ( int i=1; i<dArgs.GetLength(); i++ )
2105 			m_dBitWeights.Add ( dArgs[i] );
2106 	}
2107 
2108 protected:
2109 	/// generic evaluate
DoEval(const CSphMatch & tMatch) const2110 	virtual T DoEval ( const CSphMatch & tMatch ) const
2111 	{
2112 		int64_t uArg = this->m_pArg->Int64Eval ( tMatch ); // 'this' fixes gcc braindamage
2113 		T tRes = 0;
2114 
2115 		int iBit = 0;
2116 		while ( uArg && iBit<m_dBitWeights.GetLength() )
2117 		{
2118 			if ( uArg & 1 )
2119 				tRes += Expr_ArgVsSet_c<T>::ExprEval ( m_dBitWeights[iBit], tMatch );
2120 			uArg >>= 1;
2121 			iBit++;
2122 		}
2123 
2124 		return tRes;
2125 	}
2126 
2127 public:
Eval(const CSphMatch & tMatch) const2128 	virtual float Eval ( const CSphMatch & tMatch ) const
2129 	{
2130 		return (float) DoEval ( tMatch );
2131 	}
2132 
IntEval(const CSphMatch & tMatch) const2133 	virtual int IntEval ( const CSphMatch & tMatch ) const
2134 	{
2135 		return (int) DoEval ( tMatch );
2136 	}
2137 
Int64Eval(const CSphMatch & tMatch) const2138 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
2139 	{
2140 		return (int64_t) DoEval ( tMatch );
2141 	}
2142 
SetMVAPool(const DWORD * pMvaPool)2143 	virtual void SetMVAPool ( const DWORD * pMvaPool )
2144 	{
2145 		this->m_pArg->SetMVAPool ( pMvaPool );
2146 		ARRAY_FOREACH ( i, m_dBitWeights )
2147 			m_dBitWeights[i]->SetMVAPool ( pMvaPool );
2148 	}
2149 
SetStringPool(const BYTE * pStrings)2150 	virtual void SetStringPool ( const BYTE * pStrings )
2151 	{
2152 		this->m_pArg->SetStringPool ( pStrings );
2153 		ARRAY_FOREACH ( i, m_dBitWeights )
2154 			m_dBitWeights[i]->SetStringPool ( pStrings );
2155 	}
2156 
GetDependencyColumns(CSphVector<int> & dColumns) const2157 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
2158 	{
2159 		Expr_ArgVsSet_c<T>::GetDependencyColumns ( dColumns );
2160 		ARRAY_FOREACH ( i, m_dBitWeights )
2161 			m_dBitWeights[i]->GetDependencyColumns ( dColumns );
2162 	}
2163 };
2164 
2165 //////////////////////////////////////////////////////////////////////////
2166 
sphSqr(double v)2167 static inline double sphSqr ( double v ) { return v * v; }
2168 
CalcGeodist(float fPointLat,float fPointLon,float fAnchorLat,float fAnchorLon)2169 static inline float CalcGeodist ( float fPointLat, float fPointLon, float fAnchorLat, float fAnchorLon )
2170 {
2171 	const double R = 6384000;
2172 	double dlat = fPointLat - fAnchorLat;
2173 	double dlon = fPointLon - fAnchorLon;
2174 	double a = sphSqr ( sin ( dlat/2 ) ) + cos ( fPointLat ) * cos ( fAnchorLat ) * sphSqr ( sin ( dlon/2 ) );
2175 	double c = 2*asin ( Min ( 1, sqrt(a) ) );
2176 	return (float)(R*c);
2177 }
2178 
2179 /// geodist() - attr point, constant anchor
2180 class Expr_GeodistAttrConst_c: public ISphExpr
2181 {
2182 public:
Expr_GeodistAttrConst_c(CSphAttrLocator tLat,CSphAttrLocator tLon,float fAnchorLat,float fAnchorLon,int iLat,int iLon)2183 	Expr_GeodistAttrConst_c ( CSphAttrLocator tLat, CSphAttrLocator tLon, float fAnchorLat, float fAnchorLon, int iLat, int iLon )
2184 		: m_tLat ( tLat )
2185 		, m_tLon ( tLon )
2186 		, m_fAnchorLat ( fAnchorLat )
2187 		, m_fAnchorLon ( fAnchorLon )
2188 		, m_iLat ( iLat )
2189 		, m_iLon ( iLon )
2190 	{}
2191 
Eval(const CSphMatch & tMatch) const2192 	virtual float Eval ( const CSphMatch & tMatch ) const
2193 	{
2194 		return CalcGeodist ( tMatch.GetAttrFloat ( m_tLat ), tMatch.GetAttrFloat ( m_tLon ), m_fAnchorLat, m_fAnchorLon );
2195 	}
2196 
GetDependencyColumns(CSphVector<int> & dColumns) const2197 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
2198 	{
2199 		dColumns.Add ( m_iLat );
2200 		dColumns.Add ( m_iLon );
2201 	}
2202 
2203 private:
2204 	CSphAttrLocator	m_tLat;
2205 	CSphAttrLocator	m_tLon;
2206 
2207 	float		m_fAnchorLat;
2208 	float		m_fAnchorLon;
2209 
2210 	int			m_iLat;
2211 	int			m_iLon;
2212 };
2213 
2214 /// geodist() - expr point, constant anchor
2215 class Expr_GeodistConst_c: public ISphExpr
2216 {
2217 public:
Expr_GeodistConst_c(ISphExpr * pLat,ISphExpr * pLon,float fAnchorLat,float fAnchorLon)2218 	Expr_GeodistConst_c ( ISphExpr * pLat, ISphExpr * pLon, float fAnchorLat, float fAnchorLon )
2219 		: m_pLat ( pLat )
2220 		, m_pLon ( pLon )
2221 		, m_fAnchorLat ( fAnchorLat )
2222 		, m_fAnchorLon ( fAnchorLon )
2223 	{}
2224 
~Expr_GeodistConst_c()2225 	~Expr_GeodistConst_c ()
2226 	{
2227 		SafeRelease ( m_pLon );
2228 		SafeRelease ( m_pLat );
2229 	}
2230 
Eval(const CSphMatch & tMatch) const2231 	virtual float Eval ( const CSphMatch & tMatch ) const
2232 	{
2233 		return CalcGeodist ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_fAnchorLat, m_fAnchorLon );
2234 	}
2235 
GetDependencyColumns(CSphVector<int> & dColumns) const2236 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
2237 	{
2238 		m_pLat->GetDependencyColumns ( dColumns );
2239 		m_pLon->GetDependencyColumns ( dColumns );
2240 	}
2241 
2242 private:
2243 	ISphExpr *	m_pLat;
2244 	ISphExpr *	m_pLon;
2245 
2246 	float		m_fAnchorLat;
2247 	float		m_fAnchorLon;
2248 };
2249 
2250 /// geodist() - expr point, expr anchor
2251 class Expr_Geodist_c: public ISphExpr
2252 {
2253 public:
Expr_Geodist_c(ISphExpr * pLat,ISphExpr * pLon,ISphExpr * pAnchorLat,ISphExpr * pAnchorLon)2254 	Expr_Geodist_c ( ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pAnchorLat, ISphExpr * pAnchorLon )
2255 		: m_pLat ( pLat )
2256 		, m_pLon ( pLon )
2257 		, m_pAnchorLat ( pAnchorLat )
2258 		, m_pAnchorLon ( pAnchorLon )
2259 	{}
2260 
~Expr_Geodist_c()2261 	~Expr_Geodist_c ()
2262 	{
2263 		SafeRelease ( m_pAnchorLon );
2264 		SafeRelease ( m_pAnchorLat );
2265 		SafeRelease ( m_pLon );
2266 		SafeRelease ( m_pLat );
2267 	}
2268 
Eval(const CSphMatch & tMatch) const2269 	virtual float Eval ( const CSphMatch & tMatch ) const
2270 	{
2271 		return CalcGeodist ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_pAnchorLat->Eval(tMatch), m_pAnchorLon->Eval(tMatch) );
2272 	}
2273 
GetDependencyColumns(CSphVector<int> & dColumns) const2274 	virtual void GetDependencyColumns ( CSphVector<int> & dColumns ) const
2275 	{
2276 		m_pLat->GetDependencyColumns ( dColumns );
2277 		m_pLon->GetDependencyColumns ( dColumns );
2278 		m_pAnchorLat->GetDependencyColumns ( dColumns );
2279 		m_pAnchorLon->GetDependencyColumns ( dColumns );
2280 	}
2281 
2282 private:
2283 	ISphExpr *	m_pLat;
2284 	ISphExpr *	m_pLon;
2285 
2286 	ISphExpr *	m_pAnchorLat;
2287 	ISphExpr *	m_pAnchorLon;
2288 };
2289 
2290 //////////////////////////////////////////////////////////////////////////
2291 
GatherArgTypes(int iNode,CSphVector<int> & dTypes)2292 void ExprParser_t::GatherArgTypes ( int iNode, CSphVector<int> & dTypes )
2293 {
2294 	if ( iNode<0 )
2295 		return;
2296 
2297 	const ExprNode_t & tNode = m_dNodes[iNode];
2298 	if ( tNode.m_iToken==',' )
2299 	{
2300 		GatherArgTypes ( tNode.m_iLeft, dTypes );
2301 		GatherArgTypes ( tNode.m_iRight, dTypes );
2302 	} else
2303 	{
2304 		dTypes.Add ( tNode.m_iToken );
2305 	}
2306 }
2307 
GatherArgNodes(int iNode,CSphVector<int> & dNodes)2308 void ExprParser_t::GatherArgNodes ( int iNode, CSphVector<int> & dNodes )
2309 {
2310 	if ( iNode<0 )
2311 		return;
2312 
2313 	const ExprNode_t & tNode = m_dNodes[iNode];
2314 	if ( tNode.m_iToken==',' )
2315 	{
2316 		GatherArgNodes ( tNode.m_iLeft, dNodes );
2317 		GatherArgNodes ( tNode.m_iRight, dNodes );
2318 	} else
2319 		dNodes.Add ( iNode );
2320 }
2321 
GatherArgRetTypes(int iNode,CSphVector<ESphAttr> & dTypes)2322 void ExprParser_t::GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes )
2323 {
2324 	if ( iNode<0 )
2325 		return;
2326 
2327 	const ExprNode_t & tNode = m_dNodes[iNode];
2328 	if ( tNode.m_iToken==',' )
2329 	{
2330 		GatherArgRetTypes ( tNode.m_iLeft, dTypes );
2331 		GatherArgRetTypes ( tNode.m_iRight, dTypes );
2332 	} else
2333 	{
2334 		dTypes.Add ( tNode.m_eRetType );
2335 	}
2336 }
2337 
CheckForConstSet(int iArgsNode,int iSkip)2338 bool ExprParser_t::CheckForConstSet ( int iArgsNode, int iSkip )
2339 {
2340 	CSphVector<int> dTypes;
2341 	GatherArgTypes ( iArgsNode, dTypes );
2342 
2343 	for ( int i=iSkip; i<dTypes.GetLength(); i++ )
2344 		if ( dTypes[i]!=TOK_CONST_INT && dTypes[i]!=TOK_CONST_FLOAT )
2345 			return false;
2346 	return true;
2347 }
2348 
2349 
2350 template < typename T >
WalkTree(int iRoot,T & FUNCTOR)2351 void ExprParser_t::WalkTree ( int iRoot, T & FUNCTOR )
2352 {
2353 	if ( iRoot>=0 )
2354 	{
2355 		const ExprNode_t & tNode = m_dNodes[iRoot];
2356 		FUNCTOR.Enter ( tNode );
2357 		WalkTree ( tNode.m_iLeft, FUNCTOR );
2358 		WalkTree ( tNode.m_iRight, FUNCTOR );
2359 		FUNCTOR.Exit ( tNode );
2360 	}
2361 }
2362 
2363 
CreateIntervalNode(int iArgsNode,CSphVector<ISphExpr * > & dArgs)2364 ISphExpr * ExprParser_t::CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
2365 {
2366 	assert ( dArgs.GetLength()>=2 );
2367 
2368 	bool bConst = CheckForConstSet ( iArgsNode, 1 );
2369 	ESphAttr eAttrType = m_dNodes[iArgsNode].m_eArgType;
2370 	if ( bConst )
2371 	{
2372 		switch ( eAttrType )
2373 		{
2374 			case SPH_ATTR_INTEGER:	return new Expr_IntervalConst_c<int> ( dArgs ); break;
2375 			case SPH_ATTR_BIGINT:	return new Expr_IntervalConst_c<int64_t> ( dArgs ); break;
2376 			default:				return new Expr_IntervalConst_c<float> ( dArgs ); break;
2377 		}
2378 	} else
2379 	{
2380 		switch ( eAttrType )
2381 		{
2382 			case SPH_ATTR_INTEGER:	return new Expr_Interval_c<int> ( dArgs ); break;
2383 			case SPH_ATTR_BIGINT:	return new Expr_Interval_c<int64_t> ( dArgs ); break;
2384 			default:				return new Expr_Interval_c<float> ( dArgs ); break;
2385 		}
2386 	}
2387 }
2388 
2389 
CreateInNode(int iNode)2390 ISphExpr * ExprParser_t::CreateInNode ( int iNode )
2391 {
2392 	const ExprNode_t & tLeft = m_dNodes[m_dNodes[iNode].m_iLeft];
2393 	const ExprNode_t & tRight = m_dNodes[m_dNodes[iNode].m_iRight];
2394 
2395 	switch ( tRight.m_iToken )
2396 	{
2397 		// create IN(arg,constlist)
2398 		case TOK_CONST_LIST:
2399 			switch ( tLeft.m_iToken )
2400 			{
2401 				case TOK_ATTR_MVA32:
2402 					return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
2403 				case TOK_ATTR_MVA64:
2404 					return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
2405 				default:
2406 				{
2407 					ISphExpr * pArg = CreateTree ( m_dNodes[iNode].m_iLeft );
2408 					switch ( tRight.m_pConsts->m_eRetType )
2409 					{
2410 						case SPH_ATTR_INTEGER:	return new Expr_In_c<int> ( pArg, tRight.m_pConsts ); break;
2411 						case SPH_ATTR_BIGINT:	return new Expr_In_c<int64_t> ( pArg, tRight.m_pConsts ); break;
2412 						default:				return new Expr_In_c<float> ( pArg, tRight.m_pConsts ); break;
2413 					}
2414 				}
2415 			}
2416 			break;
2417 
2418 		// create IN(arg,uservar)
2419 		case TOK_USERVAR:
2420 		{
2421 			if ( !g_pUservarsHook )
2422 			{
2423 				m_sCreateError.SetSprintf ( "internal error: no uservars hook" );
2424 				return NULL;
2425 			}
2426 
2427 			UservarIntSet_c * pUservar = g_pUservarsHook ( m_dUservars[(int)tRight.m_iConst] );
2428 			if ( !pUservar )
2429 			{
2430 				m_sCreateError.SetSprintf ( "undefined user variable '%s'", m_dUservars[(int)tRight.m_iConst].cstr() );
2431 				return NULL;
2432 			}
2433 
2434 			switch ( tLeft.m_iToken )
2435 			{
2436 				case TOK_ATTR_MVA32:
2437 					return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
2438 				case TOK_ATTR_MVA64:
2439 					return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
2440 				default:
2441 					return new Expr_InUservar_c ( CreateTree ( m_dNodes[iNode].m_iLeft ), pUservar );
2442 			}
2443 			break;
2444 		}
2445 
2446 		// oops, unhandled case
2447 		default:
2448 			m_sCreateError = "IN() arguments must be constants (except the 1st one)";
2449 			return NULL;
2450 	}
2451 }
2452 
2453 
CreateGeodistNode(int iArgs)2454 ISphExpr * ExprParser_t::CreateGeodistNode ( int iArgs )
2455 {
2456 	CSphVector<int> dArgs;
2457 	GatherArgNodes ( iArgs, dArgs );
2458 	assert ( dArgs.GetLength()==4 );
2459 
2460 	bool bConst1 = ( IsConst ( &m_dNodes[dArgs[0]] ) && IsConst ( &m_dNodes[dArgs[1]] ) );
2461 	bool bConst2 = ( IsConst ( &m_dNodes[dArgs[2]] ) && IsConst ( &m_dNodes[dArgs[3]] ) );
2462 
2463 	if ( bConst1 && bConst2 )
2464 	{
2465 		return new Expr_GetConst_c ( CalcGeodist (
2466 			m_dNodes[dArgs[0]].FloatVal(), m_dNodes[dArgs[1]].FloatVal(),
2467 			m_dNodes[dArgs[2]].FloatVal(), m_dNodes[dArgs[3]].FloatVal() ) );
2468 	}
2469 
2470 	if ( bConst1 )
2471 	{
2472 		Swap ( dArgs[0], dArgs[2] );
2473 		Swap ( dArgs[1], dArgs[3] );
2474 		Swap ( bConst1, bConst2 );
2475 	}
2476 
2477 	if ( bConst2 )
2478 	{
2479 		// constant anchor
2480 		if ( m_dNodes[dArgs[0]].m_iToken==TOK_ATTR_FLOAT && m_dNodes[dArgs[1]].m_iToken==TOK_ATTR_FLOAT )
2481 		{
2482 			// attr point
2483 			return new Expr_GeodistAttrConst_c (
2484 				m_dNodes[dArgs[0]].m_tLocator, m_dNodes[dArgs[1]].m_tLocator,
2485 				m_dNodes[dArgs[2]].FloatVal(), m_dNodes[dArgs[3]].FloatVal(),
2486 				m_dNodes[dArgs[0]].m_iLocator, m_dNodes[dArgs[1]].m_iLocator );
2487 		} else
2488 		{
2489 			// expr point
2490 			return new Expr_GeodistConst_c (
2491 				CreateTree ( dArgs[0] ), CreateTree ( dArgs[1] ),
2492 				m_dNodes[dArgs[2]].FloatVal(), m_dNodes[dArgs[3]].FloatVal() );
2493 		}
2494 	}
2495 
2496 	// four expressions
2497 	CSphVector<ISphExpr *> dExpr;
2498 	FoldArglist ( CreateTree ( iArgs ), dExpr );
2499 	assert ( dExpr.GetLength()==4 );
2500 	return new Expr_Geodist_c ( dExpr[0], dExpr[1], dExpr[2], dExpr[3] );
2501 }
2502 
2503 
CreateBitdotNode(int iArgsNode,CSphVector<ISphExpr * > & dArgs)2504 ISphExpr * ExprParser_t::CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
2505 {
2506 	assert ( dArgs.GetLength()>=1 );
2507 
2508 	ESphAttr eAttrType = m_dNodes[iArgsNode].m_eRetType;
2509 	switch ( eAttrType )
2510 	{
2511 		case SPH_ATTR_INTEGER:	return new Expr_Bitdot_c<int> ( dArgs ); break;
2512 		case SPH_ATTR_BIGINT:	return new Expr_Bitdot_c<int64_t> ( dArgs ); break;
2513 		default:				return new Expr_Bitdot_c<float> ( dArgs ); break;
2514 	}
2515 }
2516 
2517 //////////////////////////////////////////////////////////////////////////
2518 
yylex(YYSTYPE * lvalp,ExprParser_t * pParser)2519 int yylex ( YYSTYPE * lvalp, ExprParser_t * pParser )
2520 {
2521 	return pParser->GetToken ( lvalp );
2522 }
2523 
yyerror(ExprParser_t * pParser,const char * sMessage)2524 void yyerror ( ExprParser_t * pParser, const char * sMessage )
2525 {
2526 	pParser->m_sParserError.SetSprintf ( "Sphinx expr: %s near '%s'", sMessage, pParser->m_pLastTokenStart );
2527 }
2528 
2529 #if USE_WINDOWS
2530 #pragma warning(push,1)
2531 #endif
2532 
2533 #include "yysphinxexpr.c"
2534 
2535 #if USE_WINDOWS
2536 #pragma warning(pop)
2537 #endif
2538 
2539 //////////////////////////////////////////////////////////////////////////
2540 
~ExprParser_t()2541 ExprParser_t::~ExprParser_t ()
2542 {
2543 	// i kinda own those constlists
2544 	ARRAY_FOREACH ( i, m_dNodes )
2545 		if ( m_dNodes[i].m_iToken==TOK_CONST_LIST )
2546 			SafeDelete ( m_dNodes[i].m_pConsts );
2547 
2548 	// free any UDF calls that weren't taken over
2549 	ARRAY_FOREACH ( i, m_dUdfCalls )
2550 		SafeDelete ( m_dUdfCalls[i] );
2551 }
2552 
GetWidestRet(int iLeft,int iRight)2553 ESphAttr ExprParser_t::GetWidestRet ( int iLeft, int iRight )
2554 {
2555 	ESphAttr uLeftType = ( iLeft<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iLeft].m_eRetType;
2556 	ESphAttr uRightType = ( iRight<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iRight].m_eRetType;
2557 
2558 	ESphAttr uRes = SPH_ATTR_FLOAT; // default is float
2559 	if ( ( uLeftType==SPH_ATTR_INTEGER || uLeftType==SPH_ATTR_BIGINT ) &&
2560 		( uRightType==SPH_ATTR_INTEGER || uRightType==SPH_ATTR_BIGINT ) )
2561 	{
2562 		// both types are integer (int32 or int64), compute in integers
2563 		uRes = ( uLeftType==SPH_ATTR_INTEGER && uRightType==SPH_ATTR_INTEGER )
2564 			? SPH_ATTR_INTEGER
2565 			: SPH_ATTR_BIGINT;
2566 	}
2567 	return uRes;
2568 }
2569 
AddNodeInt(int64_t iValue)2570 int ExprParser_t::AddNodeInt ( int64_t iValue )
2571 {
2572 	ExprNode_t & tNode = m_dNodes.Add ();
2573 	tNode.m_iToken = TOK_CONST_INT;
2574 	tNode.m_eRetType = GetIntType ( iValue );
2575 	tNode.m_iConst = iValue;
2576 	return m_dNodes.GetLength()-1;
2577 }
2578 
AddNodeFloat(float fValue)2579 int ExprParser_t::AddNodeFloat ( float fValue )
2580 {
2581 	ExprNode_t & tNode = m_dNodes.Add ();
2582 	tNode.m_iToken = TOK_CONST_FLOAT;
2583 	tNode.m_eRetType = SPH_ATTR_FLOAT;
2584 	tNode.m_fConst = fValue;
2585 	return m_dNodes.GetLength()-1;
2586 }
2587 
AddNodeString(int64_t iValue)2588 int ExprParser_t::AddNodeString ( int64_t iValue )
2589 {
2590 	ExprNode_t & tNode = m_dNodes.Add ();
2591 	tNode.m_iToken = TOK_CONST_STRING;
2592 	tNode.m_eRetType = SPH_ATTR_STRING;
2593 	tNode.m_iConst = iValue;
2594 	return m_dNodes.GetLength()-1;
2595 }
2596 
AddNodeAttr(int iTokenType,uint64_t uAttrLocator)2597 int ExprParser_t::AddNodeAttr ( int iTokenType, uint64_t uAttrLocator )
2598 {
2599 	assert ( iTokenType==TOK_ATTR_INT || iTokenType==TOK_ATTR_BITS || iTokenType==TOK_ATTR_FLOAT
2600 		|| iTokenType==TOK_ATTR_MVA32 || iTokenType==TOK_ATTR_MVA64 || iTokenType==TOK_ATTR_STRING );
2601 	ExprNode_t & tNode = m_dNodes.Add ();
2602 	tNode.m_iToken = iTokenType;
2603 	sphUnpackAttrLocator ( uAttrLocator, &tNode );
2604 
2605 	if ( iTokenType==TOK_ATTR_FLOAT )			tNode.m_eRetType = SPH_ATTR_FLOAT;
2606 	else if ( iTokenType==TOK_ATTR_MVA32 )		tNode.m_eRetType = SPH_ATTR_UINT32SET;
2607 	else if ( iTokenType==TOK_ATTR_MVA64 )		tNode.m_eRetType = SPH_ATTR_INT64SET;
2608 	else if ( iTokenType==TOK_ATTR_STRING )		tNode.m_eRetType = SPH_ATTR_STRING;
2609 	else if ( tNode.m_tLocator.m_iBitCount>32 )	tNode.m_eRetType = SPH_ATTR_BIGINT;
2610 	else										tNode.m_eRetType = SPH_ATTR_INTEGER;
2611 	return m_dNodes.GetLength()-1;
2612 }
2613 
AddNodeID()2614 int ExprParser_t::AddNodeID ()
2615 {
2616 	ExprNode_t & tNode = m_dNodes.Add ();
2617 	tNode.m_iToken = TOK_ID;
2618 	tNode.m_eRetType = USE_64BIT ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER;
2619 	return m_dNodes.GetLength()-1;
2620 }
2621 
AddNodeWeight()2622 int ExprParser_t::AddNodeWeight ()
2623 {
2624 	ExprNode_t & tNode = m_dNodes.Add ();
2625 	tNode.m_iToken = TOK_WEIGHT;
2626 	tNode.m_eRetType = SPH_ATTR_INTEGER;
2627 	return m_dNodes.GetLength()-1;
2628 }
2629 
AddNodeOp(int iOp,int iLeft,int iRight)2630 int ExprParser_t::AddNodeOp ( int iOp, int iLeft, int iRight )
2631 {
2632 	ExprNode_t & tNode = m_dNodes.Add ();
2633 	tNode.m_iToken = iOp;
2634 
2635 	// deduce type
2636 	tNode.m_eRetType = SPH_ATTR_FLOAT; // default to float
2637 	if ( iOp==TOK_NEG )
2638 	{
2639 		// NEG just inherits the type
2640 		tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
2641 		tNode.m_eRetType = tNode.m_eArgType;
2642 
2643 	} else if ( iOp==TOK_NOT )
2644 	{
2645 		// NOT result is integer, and its argument must be integer
2646 		tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
2647 		tNode.m_eRetType = SPH_ATTR_INTEGER;
2648 		if (!( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
2649 		{
2650 			m_sParserError.SetSprintf ( "NOT argument must be integer" );
2651 			return -1;
2652 		}
2653 
2654 	} else if ( iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE
2655 		|| iOp=='<' || iOp=='>' || iOp==TOK_AND || iOp==TOK_OR
2656 		|| iOp=='+' || iOp=='-' || iOp=='*' || iOp==','
2657 		|| iOp=='&' || iOp=='|' || iOp=='%' )
2658 	{
2659 		tNode.m_eArgType = GetWidestRet ( iLeft, iRight );
2660 
2661 		// arithmetical operations return arg type, logical return int
2662 		tNode.m_eRetType = ( iOp=='+' || iOp=='-' || iOp=='*' || iOp==',' || iOp=='&' || iOp=='|' || iOp=='%' )
2663 			? tNode.m_eArgType
2664 			: SPH_ATTR_INTEGER;
2665 
2666 		// both logical and bitwise AND/OR can only be over ints
2667 		if ( ( iOp==TOK_AND || iOp==TOK_OR || iOp=='&' || iOp=='|' )
2668 			&& !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
2669 		{
2670 			m_sParserError.SetSprintf ( "%s arguments must be integer", ( iOp==TOK_AND || iOp=='&' ) ? "AND" : "OR" );
2671 			return -1;
2672 		}
2673 
2674 		// MOD can only be over ints
2675 		if ( iOp=='%'
2676 			&& !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
2677 		{
2678 			m_sParserError.SetSprintf ( "MOD arguments must be integer" );
2679 			return -1;
2680 		}
2681 
2682 	} else
2683 	{
2684 		// check for unknown op
2685 		assert ( iOp=='/' && "unknown op in AddNodeOp() type deducer" );
2686 	}
2687 
2688 	tNode.m_iArgs = 0;
2689 	if ( iOp==',' )
2690 	{
2691 		if ( iLeft>=0 )		tNode.m_iArgs += ( m_dNodes[iLeft].m_iToken==',' ) ? m_dNodes[iLeft].m_iArgs : 1;
2692 		if ( iRight>=0 )	tNode.m_iArgs += ( m_dNodes[iRight].m_iToken==',' ) ? m_dNodes[iRight].m_iArgs : 1;
2693 	}
2694 	tNode.m_iLeft = iLeft;
2695 	tNode.m_iRight = iRight;
2696 	return m_dNodes.GetLength()-1;
2697 }
2698 
2699 
AddNodeFunc(int iFunc,int iLeft,int iRight)2700 int ExprParser_t::AddNodeFunc ( int iFunc, int iLeft, int iRight )
2701 {
2702 	// regular case, iLeft is entire arglist, iRight is -1
2703 	// special case for IN(), iLeft is arg, iRight is constlist
2704 	assert ( iFunc>=0 && iFunc<int(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])) );
2705 	Func_e eFunc = g_dFuncs[iFunc].m_eFunc;
2706 
2707 	// check args count
2708 	if ( iRight<0 || eFunc==FUNC_IN )
2709 	{
2710 		int iExpectedArgc = g_dFuncs[iFunc].m_iArgs;
2711 		int iArgc = 0;
2712 		if ( iLeft>=0 )
2713 			iArgc = ( m_dNodes[iLeft].m_iToken==',' ) ? m_dNodes[iLeft].m_iArgs : 1;
2714 		if ( iExpectedArgc<0 )
2715 		{
2716 			if ( iArgc<-iExpectedArgc )
2717 			{
2718 				m_sParserError.SetSprintf ( "%s() called with %d args, at least %d args expected", g_dFuncs[iFunc].m_sName, iArgc, -iExpectedArgc );
2719 				return -1;
2720 			}
2721 		} else if ( iArgc!=iExpectedArgc )
2722 		{
2723 			m_sParserError.SetSprintf ( "%s() called with %d args, %d args expected", g_dFuncs[iFunc].m_sName, iArgc, iExpectedArgc );
2724 			return -1;
2725 		}
2726 	}
2727 
2728 	// check arg types
2729 	//
2730 	// check for string args
2731 	// most builtin functions take numeric args only
2732 	bool bGotString = false, bGotMva = false;
2733 	if ( iRight<0 )
2734 	{
2735 		CSphVector<ESphAttr> dRetTypes;
2736 		GatherArgRetTypes ( iLeft, dRetTypes );
2737 		ARRAY_FOREACH ( i, dRetTypes )
2738 		{
2739 			bGotString |= ( dRetTypes[i]==SPH_ATTR_STRING );
2740 			bGotMva |= ( dRetTypes[i]==SPH_ATTR_UINT32SET || dRetTypes[i]==SPH_ATTR_INT64SET );
2741 		}
2742 	}
2743 	if ( bGotString && eFunc!=FUNC_CRC32 )
2744 	{
2745 		m_sParserError.SetSprintf ( "%s() arguments can not be string", g_dFuncs[iFunc].m_sName );
2746 		return -1;
2747 	}
2748 	if ( bGotMva && eFunc!=FUNC_IN )
2749 	{
2750 		m_sParserError.SetSprintf ( "%s() arguments can not be MVA", g_dFuncs[iFunc].m_sName );
2751 		return -1;
2752 	}
2753 
2754 	// check that first BITDOT arg is integer or bigint
2755 	if ( eFunc==FUNC_BITDOT )
2756 	{
2757 		int iLeftmost = iLeft;
2758 		while ( m_dNodes[iLeftmost].m_iToken==',' )
2759 			iLeftmost = m_dNodes[iLeftmost].m_iLeft;
2760 
2761 		ESphAttr eArg = m_dNodes[iLeftmost].m_eRetType;
2762 		if ( eArg!=SPH_ATTR_INTEGER && eArg!=SPH_ATTR_BIGINT )
2763 		{
2764 			m_sParserError.SetSprintf ( "first BITDOT() argument must be integer" );
2765 			return -1;
2766 		}
2767 	}
2768 
2769 	// check that first SINT or timestamp family arg is integer
2770 	if ( eFunc==FUNC_SINT || eFunc==FUNC_DAY || eFunc==FUNC_MONTH || eFunc==FUNC_YEAR || eFunc==FUNC_YEARMONTH || eFunc==FUNC_YEARMONTHDAY
2771 		|| eFunc==FUNC_FIBONACCI )
2772 	{
2773 		assert ( iLeft>=0 );
2774 		if ( m_dNodes[iLeft].m_eRetType!=SPH_ATTR_INTEGER )
2775 		{
2776 			m_sParserError.SetSprintf ( "%s() argument must be integer", g_dFuncs[iFunc].m_sName );
2777 			return -1;
2778 		}
2779 	}
2780 
2781 	// do add
2782 	ExprNode_t & tNode = m_dNodes.Add ();
2783 	tNode.m_iToken = TOK_FUNC;
2784 	tNode.m_iFunc = iFunc;
2785 	tNode.m_iLeft = iLeft;
2786 	tNode.m_iRight = iRight;
2787 	tNode.m_eArgType = ( iLeft>=0 ) ? m_dNodes[iLeft].m_eRetType : SPH_ATTR_INTEGER;
2788 	tNode.m_eRetType = g_dFuncs[iFunc].m_eRet;
2789 
2790 	// fixup return type in a few special cases
2791 	if ( eFunc==FUNC_MIN || eFunc==FUNC_MAX || eFunc==FUNC_MADD || eFunc==FUNC_MUL3 || eFunc==FUNC_ABS || eFunc==FUNC_IDIV )
2792 		tNode.m_eRetType = tNode.m_eArgType;
2793 
2794 	if ( eFunc==FUNC_BIGINT && tNode.m_eRetType==SPH_ATTR_FLOAT )
2795 		tNode.m_eRetType = SPH_ATTR_FLOAT; // enforce if we can; FIXME! silently ignores BIGINT() on floats; should warn or raise an error
2796 
2797 	if ( eFunc==FUNC_IF || eFunc==FUNC_BITDOT )
2798 		tNode.m_eRetType = GetWidestRet ( iLeft, iRight );
2799 
2800 	// all ok
2801 	assert ( tNode.m_eRetType!=SPH_ATTR_NONE );
2802 	return m_dNodes.GetLength()-1;
2803 }
2804 
AddNodeUdf(int iCall,int iArg)2805 int ExprParser_t::AddNodeUdf ( int iCall, int iArg )
2806 {
2807 	UdfCall_t * pCall = m_dUdfCalls[iCall];
2808 	SPH_UDF_INIT & tInit = pCall->m_tInit;
2809 	SPH_UDF_ARGS & tArgs = pCall->m_tArgs;
2810 
2811 	// initialize UDF right here, at AST creation stage
2812 	// just because it's easy to gather arg types here
2813 	if ( iArg>=0 )
2814 	{
2815 		// gather arg types
2816 		CSphVector<DWORD> dArgTypes;
2817 
2818 		int iCur = iArg;
2819 		while ( iCur>=0 )
2820 		{
2821 			if ( m_dNodes[iCur].m_iToken!=',' )
2822 			{
2823 				dArgTypes.Add ( m_dNodes[iCur].m_eRetType );
2824 				break;
2825 			}
2826 
2827 			int iRight = m_dNodes[iCur].m_iRight;
2828 			if ( iRight>=0 )
2829 			{
2830 				assert ( m_dNodes[iRight].m_iToken!=',' );
2831 				dArgTypes.Add ( m_dNodes[iRight].m_eRetType );
2832 			}
2833 
2834 			iCur = m_dNodes[iCur].m_iLeft;
2835 		}
2836 
2837 		assert ( dArgTypes.GetLength() );
2838 		tArgs.arg_count = dArgTypes.GetLength();
2839 		tArgs.arg_types = new sphinx_udf_argtype [ tArgs.arg_count ];
2840 
2841 		// we gathered internal type ids in right-to-left order
2842 		// reverse and remap
2843 		// FIXME! eliminate remap, maybe?
2844 		ARRAY_FOREACH ( i, dArgTypes )
2845 		{
2846 			sphinx_udf_argtype & eRes = tArgs.arg_types [ tArgs.arg_count-1-i ];
2847 			switch ( dArgTypes[i] )
2848 			{
2849 				case SPH_ATTR_INTEGER:
2850 				case SPH_ATTR_TIMESTAMP:
2851 				case SPH_ATTR_ORDINAL:
2852 				case SPH_ATTR_BOOL:
2853 				case SPH_ATTR_WORDCOUNT:
2854 					eRes = SPH_UDF_TYPE_UINT32;
2855 					break;
2856 				case SPH_ATTR_FLOAT:
2857 					eRes = SPH_UDF_TYPE_FLOAT;
2858 					break;
2859 				case SPH_ATTR_BIGINT:
2860 					eRes = SPH_UDF_TYPE_INT64;
2861 					break;
2862 				case SPH_ATTR_STRING:
2863 					eRes = SPH_UDF_TYPE_STRING;
2864 					break;
2865 				case SPH_ATTR_UINT32SET:
2866 					eRes = SPH_UDF_TYPE_UINT32SET;
2867 					break;
2868 				case SPH_ATTR_INT64SET:
2869 					eRes = SPH_UDF_TYPE_UINT64SET;
2870 					break;
2871 				default:
2872 					m_sParserError.SetSprintf ( "internal error: unmapped UDF argument type (arg=%d, type=%d)", i, dArgTypes[i] );
2873 					return -1;
2874 			}
2875 		}
2876 	}
2877 
2878 	// init
2879 	if ( pCall->m_pUdf->m_fnInit )
2880 	{
2881 		char sError [ SPH_UDF_ERROR_LEN ];
2882 		if ( pCall->m_pUdf->m_fnInit ( &tInit, &tArgs, sError ) )
2883 		{
2884 			m_sParserError = sError;
2885 			return -1;
2886 		}
2887 	}
2888 
2889 	// do add
2890 	ExprNode_t & tNode = m_dNodes.Add ();
2891 	tNode.m_iToken = TOK_UDF;
2892 	tNode.m_iFunc = iCall;
2893 	tNode.m_iLeft = iArg;
2894 	tNode.m_iRight = -1;
2895 
2896 	// deduce type
2897 	tNode.m_eArgType = ( iArg>=0 ) ? m_dNodes[iArg].m_eRetType : SPH_ATTR_INTEGER;
2898 	tNode.m_eRetType = pCall->m_pUdf->m_eRetType;
2899 	return m_dNodes.GetLength()-1;
2900 }
2901 
AddNodeConstlist(int64_t iValue)2902 int ExprParser_t::AddNodeConstlist ( int64_t iValue )
2903 {
2904 	ExprNode_t & tNode = m_dNodes.Add();
2905 	tNode.m_iToken = TOK_CONST_LIST;
2906 	tNode.m_pConsts = new ConstList_c();
2907 	tNode.m_pConsts->Add ( iValue );
2908 	return m_dNodes.GetLength()-1;
2909 }
2910 
AddNodeConstlist(float iValue)2911 int ExprParser_t::AddNodeConstlist ( float iValue )
2912 {
2913 	ExprNode_t & tNode = m_dNodes.Add();
2914 	tNode.m_iToken = TOK_CONST_LIST;
2915 	tNode.m_pConsts = new ConstList_c();
2916 	tNode.m_pConsts->Add ( iValue );
2917 	return m_dNodes.GetLength()-1;
2918 }
2919 
AppendToConstlist(int iNode,int64_t iValue)2920 void ExprParser_t::AppendToConstlist ( int iNode, int64_t iValue )
2921 {
2922 	m_dNodes[iNode].m_pConsts->Add ( iValue );
2923 }
2924 
AppendToConstlist(int iNode,float iValue)2925 void ExprParser_t::AppendToConstlist ( int iNode, float iValue )
2926 {
2927 	m_dNodes[iNode].m_pConsts->Add ( iValue );
2928 }
2929 
AddNodeUservar(int iUservar)2930 int ExprParser_t::AddNodeUservar ( int iUservar )
2931 {
2932 	ExprNode_t & tNode = m_dNodes.Add();
2933 	tNode.m_iToken = TOK_USERVAR;
2934 	tNode.m_iConst = iUservar;
2935 	return m_dNodes.GetLength()-1;
2936 }
2937 
AddNodeHookIdent(int iID)2938 int ExprParser_t::AddNodeHookIdent ( int iID )
2939 {
2940 	ExprNode_t & tNode = m_dNodes.Add();
2941 	tNode.m_iToken = TOK_HOOK_IDENT;
2942 	tNode.m_iFunc = iID;
2943 	tNode.m_eRetType = m_pHook->GetIdentType ( iID );
2944 	return m_dNodes.GetLength()-1;
2945 }
2946 
AddNodeHookFunc(int iID,int iLeft)2947 int ExprParser_t::AddNodeHookFunc ( int iID, int iLeft )
2948 {
2949 	CSphVector<ESphAttr> dArgTypes;
2950 	GatherArgRetTypes ( iLeft, dArgTypes );
2951 
2952 	ESphAttr eRet = m_pHook->GetReturnType ( iID, dArgTypes, CheckForConstSet ( iLeft, 0 ), m_sParserError );
2953 	if ( eRet==SPH_ATTR_NONE )
2954 		return -1;
2955 
2956 	ExprNode_t & tNode = m_dNodes.Add();
2957 	tNode.m_iToken = TOK_HOOK_FUNC;
2958 	tNode.m_iFunc = iID;
2959 	tNode.m_iLeft = iLeft;
2960 	tNode.m_iRight = -1;
2961 
2962 	// deduce type
2963 	tNode.m_eArgType = ( iLeft>=0 ) ? m_dNodes[iLeft].m_eRetType : SPH_ATTR_INTEGER;
2964 	tNode.m_eRetType = eRet;
2965 
2966 	return m_dNodes.GetLength()-1;
2967 }
2968 
2969 
2970 struct WeightCheck_fn
2971 {
2972 	bool * m_pRes;
2973 
WeightCheck_fnWeightCheck_fn2974 	explicit WeightCheck_fn ( bool * pRes )
2975 		: m_pRes ( pRes )
2976 	{
2977 		assert ( m_pRes );
2978 		*m_pRes = false;
2979 	}
2980 
EnterWeightCheck_fn2981 	void Enter ( const ExprNode_t & tNode )
2982 	{
2983 		if ( tNode.m_iToken==TOK_WEIGHT )
2984 			*m_pRes = true;
2985 	}
2986 
ExitWeightCheck_fn2987 	void Exit ( const ExprNode_t & )
2988 	{}
2989 };
2990 
2991 
2992 struct HookCheck_fn
2993 {
2994 	ISphExprHook * m_pHook;
2995 
HookCheck_fnHookCheck_fn2996 	explicit HookCheck_fn ( ISphExprHook * pHook )
2997 		: m_pHook ( pHook )
2998 	{}
2999 
EnterHookCheck_fn3000 	void Enter ( const ExprNode_t & tNode )
3001 	{
3002 		if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
3003 			m_pHook->CheckEnter ( tNode.m_iFunc );
3004 	}
3005 
ExitHookCheck_fn3006 	void Exit ( const ExprNode_t & tNode )
3007 	{
3008 		if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
3009 			m_pHook->CheckExit ( tNode.m_iFunc );
3010 	}
3011 };
3012 
3013 
Parse(const char * sExpr,const CSphSchema & tSchema,ESphAttr * pAttrType,bool * pUsesWeight,CSphString & sError)3014 ISphExpr * ExprParser_t::Parse ( const char * sExpr, const CSphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError )
3015 {
3016 	m_sLexerError = "";
3017 	m_sParserError = "";
3018 	m_sCreateError = "";
3019 
3020 	// setup lexer
3021 	m_sExpr = sExpr;
3022 	m_pCur = sExpr;
3023 	m_pSchema = &tSchema;
3024 
3025 	// setup constant functions
3026 	m_iConstNow = (int) time ( NULL );
3027 
3028 	// build tree
3029 	m_iParsed = -1;
3030 	yyparse ( this );
3031 
3032 	// handle errors
3033 	if ( m_iParsed<0 || !m_sLexerError.IsEmpty() || !m_sParserError.IsEmpty() )
3034 	{
3035 		sError = !m_sLexerError.IsEmpty() ? m_sLexerError : m_sParserError;
3036 		if ( sError.IsEmpty() ) sError = "general parsing error";
3037 		return NULL;
3038 	}
3039 
3040 	// deduce return type
3041 	ESphAttr eAttrType = m_dNodes[m_iParsed].m_eRetType;
3042 	assert ( eAttrType==SPH_ATTR_INTEGER || eAttrType==SPH_ATTR_BIGINT || eAttrType==SPH_ATTR_FLOAT );
3043 
3044 	// check expression stack
3045 	if ( m_dNodes.GetLength()>100 )
3046 	{
3047 		CSphVector<int> dNodes;
3048 		dNodes.Reserve ( m_dNodes.GetLength()/2 );
3049 		int iMaxHeight = 1;
3050 		int iHeight = 1;
3051 		dNodes.Add ( m_iParsed );
3052 		while ( dNodes.GetLength() )
3053 		{
3054 			const ExprNode_t & tExpr = m_dNodes[dNodes.Pop()];
3055 			iHeight += ( tExpr.m_iLeft>=0 || tExpr.m_iRight>=0 ? 1 : -1 );
3056 			iMaxHeight = Max ( iMaxHeight, iHeight );
3057 			if ( tExpr.m_iRight>=0 )
3058 				dNodes.Add ( tExpr.m_iRight );
3059 			if ( tExpr.m_iLeft>=0 )
3060 				dNodes.Add ( tExpr.m_iLeft );
3061 		}
3062 
3063 #define SPH_EXPRNODE_STACK_SIZE 160
3064 		int64_t iExprStack = sphGetStackUsed() + iMaxHeight*SPH_EXPRNODE_STACK_SIZE;
3065 		if ( g_iThreadStackSize<=iExprStack )
3066 		{
3067 			sError.SetSprintf ( "query too complex, not enough stack (thread_stack_size=%dK or higher required)",
3068 				(int)( ( iExprStack + 1024 - ( iExprStack%1024 ) ) / 1024 ) );
3069 			return NULL;
3070 		}
3071 	}
3072 
3073 	// perform optimizations
3074 	Optimize ( m_iParsed );
3075 #if 0
3076 	Dump ( m_iParsed );
3077 #endif
3078 
3079 	// create evaluator
3080 	ISphExpr * pRes = CreateTree ( m_iParsed );
3081 	if ( !m_sCreateError.IsEmpty() )
3082 	{
3083 		sError = m_sCreateError;
3084 		SafeRelease ( pRes );
3085 	} else if ( !pRes )
3086 	{
3087 		sError.SetSprintf ( "empty expression" );
3088 	}
3089 
3090 	if ( pAttrType )
3091 		*pAttrType = eAttrType;
3092 
3093 	if ( pUsesWeight )
3094 	{
3095 		WeightCheck_fn tFunctor ( pUsesWeight );
3096 		WalkTree ( m_iParsed, tFunctor );
3097 	}
3098 
3099 	if ( m_pHook )
3100 	{
3101 		HookCheck_fn tFunctor ( m_pHook );
3102 		WalkTree ( m_iParsed, tFunctor );
3103 	}
3104 
3105 	return pRes;
3106 }
3107 
3108 //////////////////////////////////////////////////////////////////////////
3109 // UDF MANAGER
3110 //////////////////////////////////////////////////////////////////////////
3111 
3112 #if USE_WINDOWS
3113 #define HAVE_DLOPEN		1
3114 #define RTLD_LAZY		0
3115 #define RTLD_LOCAL		0
3116 
dlsym(void * lib,const char * name)3117 void * dlsym ( void * lib, const char * name )
3118 {
3119 	return GetProcAddress ( (HMODULE)lib, name );
3120 }
3121 
dlopen(const char * libname,int)3122 void * dlopen ( const char * libname, int )
3123 {
3124 	return LoadLibraryEx ( libname, NULL, 0 );
3125 }
3126 
dlclose(void * lib)3127 int dlclose ( void * lib )
3128 {
3129 	return FreeLibrary ( (HMODULE)lib )
3130 		? 0
3131 		: GetLastError();
3132 }
3133 
dlerror()3134 const char * dlerror()
3135 {
3136 	static char sError[256];
3137 	DWORD uError = GetLastError();
3138 	FormatMessage ( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL,
3139 		uError, LANG_SYSTEM_DEFAULT, (LPTSTR)sError, sizeof(sError), NULL );
3140 	return sError;
3141 }
3142 #endif // USE_WINDOWS
3143 
3144 
3145 #if !HAVE_DLOPEN
3146 
sphUDFInit(const char *)3147 void sphUDFInit ( const char * )
3148 {
3149 	return;
3150 }
3151 
sphUDFCreate(const char *,const char *,ESphAttr,CSphString & sError)3152 bool sphUDFCreate ( const char *, const char *, ESphAttr, CSphString & sError )
3153 {
3154 	sError = "no dlopen(); UDF support disabled";
3155 	return false;
3156 }
3157 
sphUDFDrop(const char *,CSphString & sError)3158 bool sphUDFDrop ( const char *, CSphString & sError )
3159 {
3160 	sError = "no dlopen(); UDF support disabled";
3161 	return false;
3162 }
3163 
3164 #else
3165 
sphUDFInit(const char * sUdfDir)3166 void sphUDFInit ( const char * sUdfDir )
3167 {
3168 	if ( !sUdfDir || !*sUdfDir )
3169 		return;
3170 
3171 	g_sUdfDir = sUdfDir;
3172 	g_bUdfEnabled = true;
3173 }
3174 
3175 
sphUDFCreate(const char * szLib,const char * szFunc,ESphAttr eRetType,CSphString & sError)3176 bool sphUDFCreate ( const char * szLib, const char * szFunc, ESphAttr eRetType, CSphString & sError )
3177 {
3178 	if ( !g_bUdfEnabled )
3179 	{
3180 		sError = "UDF support disabled (requires workers=threads; and a valid plugin_dir)";
3181 		return false;
3182 	}
3183 
3184 	// validate library name
3185 	for ( const char * p = szLib; *p; p++ )
3186 		if ( *p=='/' || *p=='\\' )
3187 	{
3188 		sError = "restricted character (path delimiter) in a library file name";
3189 		return false;
3190 	}
3191 
3192 	// from here, we need a lock (we intend to update UDF hash)
3193 	g_tUdfMutex.Lock();
3194 
3195 	// validate function name
3196 	CSphString sFunc ( szFunc );
3197 	sFunc.ToLower();
3198 
3199 	if ( g_hUdfFuncs ( sFunc ) )
3200 	{
3201 		sError.SetSprintf ( "UDF '%s' already exists", sFunc.cstr() );
3202 		g_tUdfMutex.Unlock();
3203 		return false;
3204 	}
3205 
3206 	// lookup or load library
3207 	CSphString sLib;
3208 	sLib.SetSprintf ( "%s/%s", g_sUdfDir.cstr(), szLib );
3209 
3210 	UdfFunc_t tFunc;
3211 	tFunc.m_eRetType = eRetType;
3212 	tFunc.m_iUserCount = 0;
3213 	tFunc.m_bToDrop = false;
3214 
3215 	bool bLoaded = false;
3216 	void * pHandle = NULL;
3217 	tFunc.m_pLib = g_hUdfLibs ( sLib );
3218 	if ( !tFunc.m_pLib )
3219 	{
3220 		bLoaded = true;
3221 		pHandle = dlopen ( sLib.cstr(), RTLD_LAZY | RTLD_LOCAL );
3222 		if ( !pHandle )
3223 		{
3224 			const char * sDlerror = dlerror();
3225 			sError.SetSprintf ( "dlopen() failed: %s", sDlerror ? sDlerror : "(null)" );
3226 			g_tUdfMutex.Unlock();
3227 			return false;
3228 		}
3229 		sphLogDebug ( "dlopen(%s)=%p", sLib.cstr(), pHandle );
3230 
3231 	} else
3232 	{
3233 		pHandle = tFunc.m_pLib->m_pHandle;
3234 	}
3235 	assert ( pHandle );
3236 
3237 	// lookup and check function symbols
3238 	CSphString sName;
3239 	tFunc.m_fnFunc = dlsym ( pHandle, sFunc.cstr() );
3240 	tFunc.m_fnInit = (UdfInit_fn) dlsym ( pHandle, sName.SetSprintf ( "%s_init", sFunc.cstr() ).cstr() );
3241 	tFunc.m_fnDeinit = (UdfDeinit_fn) dlsym ( pHandle, sName.SetSprintf ( "%s_deinit", sFunc.cstr() ).cstr() );
3242 
3243 	if ( !tFunc.m_fnFunc || !tFunc.m_fnInit )
3244 	{
3245 		sError.SetSprintf ( "symbol '%s%s' not found in '%s'", sFunc.cstr(), tFunc.m_fnFunc ? "_init" : "", szLib );
3246 		if ( bLoaded )
3247 			dlclose ( pHandle );
3248 		g_tUdfMutex.Unlock();
3249 		return false;
3250 	}
3251 
3252 	// add library
3253 	if ( bLoaded )
3254 	{
3255 		UdfLib_t tLib;
3256 		tLib.m_iFuncs = 1;
3257 		tLib.m_pHandle = pHandle;
3258 		Verify ( g_hUdfLibs.Add ( tLib, sLib ) );
3259 		tFunc.m_pLib = g_hUdfLibs ( sLib );
3260 	} else
3261 	{
3262 		tFunc.m_pLib->m_iFuncs++;
3263 	}
3264 	tFunc.m_pLibName = g_hUdfLibs.GetKeyPtr ( sLib );
3265 	assert ( tFunc.m_pLib );
3266 
3267 	// add function
3268 	Verify ( g_hUdfFuncs.Add ( tFunc, sFunc ) );
3269 
3270 	// all ok
3271 	g_tUdfMutex.Unlock();
3272 	return true;
3273 }
3274 
3275 
sphUDFDrop(const char * szFunc,CSphString & sError)3276 bool sphUDFDrop ( const char * szFunc, CSphString & sError )
3277 {
3278 	CSphString sFunc ( szFunc );
3279 	sFunc.ToLower();
3280 
3281 	g_tUdfMutex.Lock();
3282 	UdfFunc_t * pFunc = g_hUdfFuncs ( sFunc );
3283 	if ( !pFunc || pFunc->m_bToDrop ) // handle concurrent drop in progress as "not exists"
3284 	{
3285 		sError.SetSprintf ( "UDF '%s' does not exist", sFunc.cstr() );
3286 		g_tUdfMutex.Unlock();
3287 		return false;
3288 	}
3289 
3290 	const int UDF_DROP_TIMEOUT_SEC = 30; // in seconds
3291 	int64_t tmEnd = sphMicroTimer() + UDF_DROP_TIMEOUT_SEC*1000000;
3292 
3293 	// mark function for deletion, to prevent new users
3294 	pFunc->m_bToDrop = true;
3295 	if ( pFunc->m_iUserCount )
3296 		for ( ;; )
3297 	{
3298 		// release lock and wait
3299 		// so that concurrent users could complete and release the function
3300 		g_tUdfMutex.Unlock();
3301 		sphSleepMsec ( 50 );
3302 
3303 		// re-acquire lock
3304 		g_tUdfMutex.Lock();
3305 
3306 		// everyone out? proceed with dropping
3307 		assert ( pFunc->m_iUserCount>=0 );
3308 		if ( pFunc->m_iUserCount<=0 )
3309 			break;
3310 
3311 		// timed out? clear deletion flag, and bail
3312 		if ( sphMicroTimer() > tmEnd )
3313 		{
3314 			pFunc->m_bToDrop = false;
3315 			g_tUdfMutex.Unlock();
3316 
3317 			sError.SetSprintf ( "DROP timed out in (still got %d users after waiting for %d seconds); please retry", pFunc->m_iUserCount, UDF_DROP_TIMEOUT_SEC );
3318 			return false;
3319 		}
3320 	}
3321 
3322 	UdfLib_t * pLib = pFunc->m_pLib;
3323 	const CSphString * pLibName = pFunc->m_pLibName;
3324 
3325 	Verify ( g_hUdfFuncs.Delete ( sFunc ) );
3326 	if ( --pLib->m_iFuncs<=0 )
3327 	{
3328 		// FIXME! running queries might be using this function
3329 		int iRes = dlclose ( pLib->m_pHandle );
3330 		sphLogDebug ( "dlclose(%s)=%d", pLibName->cstr(), iRes );
3331 		Verify ( g_hUdfLibs.Delete ( *pLibName ) );
3332 	}
3333 
3334 	g_tUdfMutex.Unlock();
3335 	return true;
3336 }
3337 #endif // HAVE_DLOPEN
3338 
3339 //////////////////////////////////////////////////////////////////////////
3340 // PUBLIC STUFF
3341 //////////////////////////////////////////////////////////////////////////
3342 
3343 /// parser entry point
sphExprParse(const char * sExpr,const CSphSchema & tSchema,ESphAttr * pAttrType,bool * pUsesWeight,CSphString & sError,CSphSchema * pExtra,ISphExprHook * pHook)3344 ISphExpr * sphExprParse ( const char * sExpr, const CSphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError, CSphSchema * pExtra, ISphExprHook * pHook )
3345 {
3346 	// parse into opcodes
3347 	ExprParser_t tParser ( pExtra, pHook );
3348 	return tParser.Parse ( sExpr, tSchema, pAttrType, pUsesWeight, sError );
3349 }
3350 
3351 //
3352 // $Id: sphinxexpr.cpp 4113 2013-08-26 07:43:28Z deogar $
3353 //
3354