1 //
2 // $Id$
3 //
4
5 //
6 // Copyright (c) 2001-2016, Andrew Aksyonoff
7 // Copyright (c) 2008-2016, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15
16 #include "sphinx.h"
17 #include "sphinxexpr.h"
18 #include "sphinxplugin.h"
19
20 #include "sphinxutils.h"
21 #include "sphinxint.h"
22 #include "sphinxjson.h"
23 #include <time.h>
24 #include <math.h>
25
26 #ifndef M_LOG2E
27 #define M_LOG2E 1.44269504088896340736
28 #endif
29
30 #ifndef M_LOG10E
31 #define M_LOG10E 0.434294481903251827651
32 #endif
33
34 // hack hack hack
35 UservarIntSet_c * ( *g_pUservarsHook )( const CSphString & sUservar );
36
37 //////////////////////////////////////////////////////////////////////////
38 // EVALUATION ENGINE
39 //////////////////////////////////////////////////////////////////////////
40
41 struct ExprLocatorTraits_t : public ISphExpr
42 {
43 CSphAttrLocator m_tLocator;
44 int m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
45
ExprLocatorTraits_tExprLocatorTraits_t46 ExprLocatorTraits_t ( const CSphAttrLocator & tLocator, int iLocator ) : m_tLocator ( tLocator ), m_iLocator ( iLocator ) {}
47
CommandExprLocatorTraits_t48 virtual void Command ( ESphExprCommand eCmd, void * pArg )
49 {
50 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
51 static_cast < CSphVector<int>* >(pArg)->Add ( m_iLocator );
52 }
53 };
54
55
56 struct Expr_GetInt_c : public ExprLocatorTraits_t
57 {
Expr_GetInt_cExpr_GetInt_c58 Expr_GetInt_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetInt_c59 virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); } // FIXME! OPTIMIZE!!! we can go the short route here
IntEvalExpr_GetInt_c60 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetInt_c61 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
62 };
63
64
65 struct Expr_GetBits_c : public ExprLocatorTraits_t
66 {
Expr_GetBits_cExpr_GetBits_c67 Expr_GetBits_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetBits_c68 virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); }
IntEvalExpr_GetBits_c69 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetBits_c70 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
71 };
72
73
74 struct Expr_GetSint_c : public ExprLocatorTraits_t
75 {
Expr_GetSint_cExpr_GetSint_c76 Expr_GetSint_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetSint_c77 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)(int)tMatch.GetAttr ( m_tLocator ); }
IntEvalExpr_GetSint_c78 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
Int64EvalExpr_GetSint_c79 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
80 };
81
82
83 struct Expr_GetFloat_c : public ExprLocatorTraits_t
84 {
Expr_GetFloat_cExpr_GetFloat_c85 Expr_GetFloat_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetFloat_c86 virtual float Eval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrFloat ( m_tLocator ); }
87 };
88
89
90 struct Expr_GetString_c : public ExprLocatorTraits_t
91 {
92 const BYTE * m_pStrings;
93
Expr_GetString_cExpr_GetString_c94 Expr_GetString_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetString_c95 virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
CommandExpr_GetString_c96 virtual void Command ( ESphExprCommand eCmd, void * pArg ) { if ( eCmd==SPH_EXPR_SET_STRING_POOL ) m_pStrings = (const BYTE*)pArg; }
97
StringEvalExpr_GetString_c98 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
99 {
100 SphAttr_t iOff = tMatch.GetAttr ( m_tLocator );
101 if ( iOff>0 )
102 return sphUnpackStr ( m_pStrings + iOff, ppStr );
103
104 *ppStr = NULL;
105 return 0;
106 }
107 };
108
109
110 struct Expr_GetMva_c : public ExprLocatorTraits_t
111 {
112 const DWORD * m_pMva;
113 bool m_bArenaProhibit;
114
Expr_GetMva_cExpr_GetMva_c115 Expr_GetMva_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ), m_pMva ( NULL ), m_bArenaProhibit ( false ) {}
EvalExpr_GetMva_c116 virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
CommandExpr_GetMva_c117 virtual void Command ( ESphExprCommand eCmd, void * pArg )
118 {
119 if ( eCmd==SPH_EXPR_SET_MVA_POOL )
120 {
121 const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
122 assert ( pPool );
123 m_pMva = pPool->m_pMva;
124 m_bArenaProhibit = pPool->m_bArenaProhibit;
125 }
126 }
MvaEvalExpr_GetMva_c127 virtual const DWORD * MvaEval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrMVA ( m_tLocator, m_pMva, m_bArenaProhibit ); }
128 };
129
130
131 struct Expr_GetFactorsAttr_c : public ExprLocatorTraits_t
132 {
Expr_GetFactorsAttr_cExpr_GetFactorsAttr_c133 Expr_GetFactorsAttr_c ( const CSphAttrLocator & tLocator, int iLocator ) : ExprLocatorTraits_t ( tLocator, iLocator ) {}
EvalExpr_GetFactorsAttr_c134 virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
FactorEvalExpr_GetFactorsAttr_c135 virtual const DWORD * FactorEval ( const CSphMatch & tMatch ) const { return (DWORD *)tMatch.GetAttr ( m_tLocator ); }
136 };
137
138
139 struct Expr_GetConst_c : public ISphExpr
140 {
141 float m_fValue;
Expr_GetConst_cExpr_GetConst_c142 explicit Expr_GetConst_c ( float fValue ) : m_fValue ( fValue ) {}
EvalExpr_GetConst_c143 virtual float Eval ( const CSphMatch & ) const { return m_fValue; }
IntEvalExpr_GetConst_c144 virtual int IntEval ( const CSphMatch & ) const { return (int)m_fValue; }
Int64EvalExpr_GetConst_c145 virtual int64_t Int64Eval ( const CSphMatch & ) const { return (int64_t)m_fValue; }
IsConstExpr_GetConst_c146 virtual bool IsConst () const { return true; }
147 };
148
149
150 struct Expr_GetIntConst_c : public ISphExpr
151 {
152 int m_iValue;
Expr_GetIntConst_cExpr_GetIntConst_c153 explicit Expr_GetIntConst_c ( int iValue ) : m_iValue ( iValue ) {}
EvalExpr_GetIntConst_c154 virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
IntEvalExpr_GetIntConst_c155 virtual int IntEval ( const CSphMatch & ) const { return m_iValue; }
Int64EvalExpr_GetIntConst_c156 virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
IsConstExpr_GetIntConst_c157 virtual bool IsConst () const { return true; }
158 };
159
160
161 struct Expr_GetInt64Const_c : public ISphExpr
162 {
163 int64_t m_iValue;
Expr_GetInt64Const_cExpr_GetInt64Const_c164 explicit Expr_GetInt64Const_c ( int64_t iValue ) : m_iValue ( iValue ) {}
EvalExpr_GetInt64Const_c165 virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
IntEvalExpr_GetInt64Const_c166 virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return (int)m_iValue; }
Int64EvalExpr_GetInt64Const_c167 virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
IsConstExpr_GetInt64Const_c168 virtual bool IsConst () const { return true; }
169 };
170
171
172 struct Expr_GetStrConst_c : public ISphStringExpr
173 {
174 CSphString m_sVal;
175 int m_iLen;
176
Expr_GetStrConst_cExpr_GetStrConst_c177 explicit Expr_GetStrConst_c ( const char * sVal, int iLen, bool bUnescape )
178 {
179 if ( iLen>0 )
180 {
181 if ( bUnescape )
182 SqlUnescape ( m_sVal, sVal, iLen );
183 else
184 m_sVal.SetBinary ( sVal, iLen );
185 }
186 m_iLen = m_sVal.Length();
187 }
188
StringEvalExpr_GetStrConst_c189 virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
190 {
191 *ppStr = (const BYTE*) m_sVal.cstr();
192 return m_iLen;
193 }
194
EvalExpr_GetStrConst_c195 virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
IntEvalExpr_GetStrConst_c196 virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
Int64EvalExpr_GetStrConst_c197 virtual int64_t Int64Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
IsConstExpr_GetStrConst_c198 virtual bool IsConst () const { return true; }
199 };
200
201
202 struct Expr_GetZonespanlist_c : public ISphStringExpr
203 {
204 const CSphVector<int> * m_pData;
205 mutable CSphStringBuilder m_sBuilder;
206
Expr_GetZonespanlist_cExpr_GetZonespanlist_c207 explicit Expr_GetZonespanlist_c ()
208 : m_pData ( NULL )
209 {}
210
StringEvalExpr_GetZonespanlist_c211 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
212 {
213 assert ( ppStr );
214 if ( !m_pData || !m_pData->GetLength() )
215 {
216 *ppStr = NULL;
217 return 0;
218 }
219 m_sBuilder.Clear();
220 const CSphVector<int> & dSpans = *m_pData;
221 int iStart = tMatch.m_iTag + 1; // spans[tag] contains the length, so the 1st data index is tag+1
222 int iEnd = iStart + dSpans [ tMatch.m_iTag ]; // [start,end) now covers all data indexes
223 for ( int i=iStart; i<iEnd; i+=2 )
224 m_sBuilder.Appendf ( " %d:%d", 1+dSpans[i], 1+dSpans[i+1] ); // convert our 0-based span numbers to human 1-based ones
225 *ppStr = (const BYTE *) CSphString ( m_sBuilder.cstr() ).Leak();
226 return m_sBuilder.Length();
227 }
228
CommandExpr_GetZonespanlist_c229 virtual void Command ( ESphExprCommand eCmd, void * pArg )
230 {
231 if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
232 static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_ZONESPANS, (void**)&m_pData );
233 }
234
IsStringPtrExpr_GetZonespanlist_c235 virtual bool IsStringPtr() const
236 {
237 return true;
238 }
239 };
240
241
242 struct Expr_GetRankFactors_c : public ISphStringExpr
243 {
244 /// hash type MUST BE IN SYNC with RankerState_Export_fn in sphinxsearch.cpp
245 CSphOrderedHash < CSphString, SphDocID_t, IdentityHash_fn, 256 > * m_pFactors;
246
Expr_GetRankFactors_cExpr_GetRankFactors_c247 explicit Expr_GetRankFactors_c ()
248 : m_pFactors ( NULL )
249 {}
250
StringEvalExpr_GetRankFactors_c251 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
252 {
253 assert ( ppStr );
254 if ( !m_pFactors )
255 {
256 *ppStr = NULL;
257 return 0;
258 }
259
260 CSphString * sVal = (*m_pFactors) ( tMatch.m_uDocID );
261 if ( !sVal )
262 {
263 *ppStr = NULL;
264 return 0;
265 }
266 int iLen = sVal->Length();
267 *ppStr = (const BYTE*)sVal->Leak();
268 m_pFactors->Delete ( tMatch.m_uDocID );
269 return iLen;
270 }
271
CommandExpr_GetRankFactors_c272 virtual void Command ( ESphExprCommand eCmd, void * pArg )
273 {
274 if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
275 static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_RANKFACTORS, (void**)&m_pFactors );
276 }
277
IsStringPtrExpr_GetRankFactors_c278 virtual bool IsStringPtr() const
279 {
280 return true;
281 }
282 };
283
284
285 struct Expr_GetPackedFactors_c : public ISphStringExpr
286 {
287 SphFactorHash_t * m_pHash;
288
Expr_GetPackedFactors_cExpr_GetPackedFactors_c289 explicit Expr_GetPackedFactors_c ()
290 : m_pHash ( NULL )
291 {}
292
FactorEvalExpr_GetPackedFactors_c293 virtual const DWORD * FactorEval ( const CSphMatch & tMatch ) const
294 {
295 if ( !m_pHash || !m_pHash->GetLength() )
296 return NULL;
297
298 SphFactorHashEntry_t * pEntry = (*m_pHash)[ (int)( tMatch.m_uDocID % m_pHash->GetLength() ) ];
299 assert ( pEntry );
300
301 while ( pEntry && pEntry->m_iId!=tMatch.m_uDocID )
302 pEntry = pEntry->m_pNext;
303
304 if ( !pEntry )
305 return NULL;
306
307 DWORD uDataLen = (BYTE *)pEntry - (BYTE *)pEntry->m_pData;
308
309 BYTE * pData = new BYTE[uDataLen];
310 memcpy ( pData, pEntry->m_pData, uDataLen );
311
312 return (DWORD *)pData;
313 }
314
CommandExpr_GetPackedFactors_c315 virtual void Command ( ESphExprCommand eCmd, void * pArg )
316 {
317 if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
318 static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_PACKEDFACTORS, (void**)&m_pHash );
319 }
320
IsStringPtrExpr_GetPackedFactors_c321 virtual bool IsStringPtr() const
322 {
323 return true;
324 }
325 };
326
327
328 struct Expr_BM25F_c : public ISphExpr
329 {
330 SphExtraDataRankerState_t m_tRankerState;
331 float m_fK1;
332 float m_fB;
333 float m_fWeightedAvgDocLen;
334 CSphVector<int> m_dWeights; ///< per field weights
335 SphFactorHash_t * m_pHash;
336 CSphVector<CSphNamedVariant> m_dFieldWeights;
337
Expr_BM25F_cExpr_BM25F_c338 Expr_BM25F_c ( float k1, float b, CSphVector<CSphNamedVariant> * pFieldWeights )
339 : m_pHash ( NULL )
340 {
341 // bind k1, b
342 m_fK1 = k1;
343 m_fB = b;
344 if ( pFieldWeights )
345 m_dFieldWeights.SwapData ( *pFieldWeights );
346 }
347
EvalExpr_BM25F_c348 float Eval ( const CSphMatch & tMatch ) const
349 {
350 if ( !m_pHash || !m_pHash->GetLength() )
351 return 0.0f;
352
353 SphFactorHashEntry_t * pEntry = (*m_pHash)[ (int)( tMatch.m_uDocID % m_pHash->GetLength() ) ];
354 assert ( pEntry );
355
356 while ( pEntry && pEntry->m_iId!=tMatch.m_uDocID )
357 pEntry = pEntry->m_pNext;
358
359 if ( !pEntry )
360 return 0.0f;
361
362 SPH_UDF_FACTORS tUnpacked;
363 sphinx_factors_init ( &tUnpacked );
364 #ifndef NDEBUG
365 Verify ( sphinx_factors_unpack ( (const unsigned int*)pEntry->m_pData, &tUnpacked )==0 );
366 #else
367 sphinx_factors_unpack ( (const unsigned int*)pEntry->m_pData, &tUnpacked ); // fix MSVC Release warning
368 #endif
369
370 // compute document length
371 // OPTIMIZE? could precompute and store total dl in attrs, but at a storage cost
372 // OPTIMIZE? could at least share between multiple BM25F instances, if there are many
373 float dl = 0;
374 CSphAttrLocator tLoc = m_tRankerState.m_tFieldLensLoc;
375 if ( tLoc.m_iBitOffset>=0 )
376 {
377 for ( int i=0; i<m_tRankerState.m_iFields; i++ )
378 {
379 dl += tMatch.GetAttr ( tLoc ) * m_dWeights[i];
380 tLoc.m_iBitOffset += 32;
381 }
382 }
383
384 // compute (the current instance of) BM25F
385 float fRes = 0.0f;
386 for ( int iWord=0; iWord<m_tRankerState.m_iMaxQpos; iWord++ )
387 {
388 if ( !tUnpacked.term[iWord].keyword_mask )
389 continue;
390
391 // compute weighted TF
392 float tf = 0.0f;
393 for ( int i=0; i<m_tRankerState.m_iFields; i++ )
394 {
395 tf += tUnpacked.field_tf[ iWord + 1 + i * ( 1 + m_tRankerState.m_iMaxQpos ) ] * m_dWeights[i];
396 }
397 float idf = tUnpacked.term[iWord].idf; // FIXME? zeroed out for dupes!
398 fRes += tf / ( tf + m_fK1 * ( 1.0f - m_fB + m_fB * dl / m_fWeightedAvgDocLen ) ) * idf;
399 }
400
401 sphinx_factors_deinit ( &tUnpacked );
402
403 return fRes + 0.5f; // map to [0..1] range
404 }
405
CommandExpr_BM25F_c406 virtual void Command ( ESphExprCommand eCmd, void * pArg )
407 {
408 if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
409 return;
410
411 bool bGotHash = static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_PACKEDFACTORS, (void**)&m_pHash );
412 if ( !bGotHash )
413 return;
414
415 bool bGotState = static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_RANKER_STATE, (void**)&m_tRankerState );
416 if ( !bGotState )
417 return;
418
419 // bind weights
420 m_dWeights.Resize ( m_tRankerState.m_iFields );
421 m_dWeights.Fill ( 1 );
422 if ( m_dFieldWeights.GetLength() )
423 {
424 ARRAY_FOREACH ( i, m_dFieldWeights )
425 {
426 // FIXME? report errors if field was not found?
427 CSphString & sField = m_dFieldWeights[i].m_sKey;
428 int iField = m_tRankerState.m_pSchema->GetFieldIndex ( sField.cstr() );
429 if ( iField>=0 )
430 m_dWeights[iField] = m_dFieldWeights[i].m_iValue;
431 }
432 }
433
434 // compute weighted avgdl
435 m_fWeightedAvgDocLen = 1.0f;
436 if ( m_tRankerState.m_pFieldLens )
437 {
438 m_fWeightedAvgDocLen = 0.0f;
439 ARRAY_FOREACH ( i, m_dWeights )
440 m_fWeightedAvgDocLen += m_tRankerState.m_pFieldLens[i] * m_dWeights[i];
441 }
442 m_fWeightedAvgDocLen /= m_tRankerState.m_iTotalDocuments;
443 }
444 };
445
446
447 struct Expr_GetId_c : public ISphExpr
448 {
EvalExpr_GetId_c449 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_uDocID; }
IntEvalExpr_GetId_c450 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_uDocID; }
Int64EvalExpr_GetId_c451 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_uDocID; }
452 };
453
454
455 struct Expr_GetWeight_c : public ISphExpr
456 {
EvalExpr_GetWeight_c457 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_iWeight; }
IntEvalExpr_GetWeight_c458 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_iWeight; }
Int64EvalExpr_GetWeight_c459 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_iWeight; }
460 };
461
462 //////////////////////////////////////////////////////////////////////////
463
464 struct Expr_Arglist_c : public ISphExpr
465 {
466 CSphVector<ISphExpr *> m_dArgs;
467
Expr_Arglist_cExpr_Arglist_c468 Expr_Arglist_c ( ISphExpr * pLeft, ISphExpr * pRight )
469 {
470 AddArgs ( pLeft );
471 AddArgs ( pRight );
472 }
473
~Expr_Arglist_cExpr_Arglist_c474 ~Expr_Arglist_c ()
475 {
476 ARRAY_FOREACH ( i, m_dArgs )
477 SafeRelease ( m_dArgs[i] );
478 }
479
AddArgsExpr_Arglist_c480 void AddArgs ( ISphExpr * pExpr )
481 {
482 // not an arglist? just add it
483 if ( !pExpr->IsArglist() )
484 {
485 m_dArgs.Add ( pExpr );
486 return;
487 }
488
489 // arglist? take ownership of its args, and dismiss it
490 Expr_Arglist_c * pArgs = (Expr_Arglist_c *) pExpr;
491 ARRAY_FOREACH ( i, pArgs->m_dArgs )
492 {
493 m_dArgs.Add ( pArgs->m_dArgs[i] );
494 pArgs->m_dArgs[i] = NULL;
495 }
496 SafeRelease ( pExpr );
497 }
498
IsArglistExpr_Arglist_c499 virtual bool IsArglist () const
500 {
501 return true;
502 }
503
GetArgExpr_Arglist_c504 virtual ISphExpr * GetArg ( int i ) const
505 {
506 if ( i>=m_dArgs.GetLength() )
507 return NULL;
508 return m_dArgs[i];
509 }
510
GetNumArgsExpr_Arglist_c511 virtual int GetNumArgs() const
512 {
513 return m_dArgs.GetLength();
514 }
515
EvalExpr_Arglist_c516 virtual float Eval ( const CSphMatch & ) const
517 {
518 assert ( 0 && "internal error: Eval() must not be explicitly called on arglist" );
519 return 0.0f;
520 }
521
CommandExpr_Arglist_c522 virtual void Command ( ESphExprCommand eCmd, void * pArg )
523 {
524 ARRAY_FOREACH ( i, m_dArgs )
525 m_dArgs[i]->Command ( eCmd, pArg );
526 }
527 };
528
529 //////////////////////////////////////////////////////////////////////////
530
531 struct Expr_Unary_c : public ISphExpr
532 {
533 ISphExpr * m_pFirst;
534
Expr_Unary_cExpr_Unary_c535 explicit Expr_Unary_c ( ISphExpr * p ) : m_pFirst(p) {}
~Expr_Unary_cExpr_Unary_c536 ~Expr_Unary_c() { SafeRelease ( m_pFirst ); }
537
CommandExpr_Unary_c538 virtual void Command ( ESphExprCommand eCmd, void * pArg ) { m_pFirst->Command ( eCmd, pArg ); }
539 };
540
541
542 struct Expr_Crc32_c : public Expr_Unary_c
543 {
Expr_Crc32_cExpr_Crc32_c544 explicit Expr_Crc32_c ( ISphExpr * pFirst ) : Expr_Unary_c ( pFirst ) {}
EvalExpr_Crc32_c545 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
IntEvalExpr_Crc32_c546 virtual int IntEval ( const CSphMatch & tMatch ) const
547 {
548 const BYTE * pStr;
549 int iLen = m_pFirst->StringEval ( tMatch, &pStr );
550 DWORD uCrc = sphCRC32 ( pStr, iLen );
551 if ( m_pFirst->IsStringPtr() )
552 SafeDeleteArray ( pStr );
553 return uCrc;
554 }
Int64EvalExpr_Crc32_c555 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)(DWORD)IntEval ( tMatch ); }
556 };
557
558
Fibonacci(int i)559 static inline int Fibonacci ( int i )
560 {
561 if ( i<0 )
562 return 0;
563 int f0 = 0;
564 int f1 = 1;
565 int j = 0;
566 for ( j=0; j+1<i; j+=2 )
567 {
568 f0 += f1; // f_j
569 f1 += f0; // f_{j+1}
570 }
571 return ( i & 1 ) ? f1 : f0;
572 }
573
574
575 struct Expr_Fibonacci_c : public Expr_Unary_c
576 {
Expr_Fibonacci_cExpr_Fibonacci_c577 explicit Expr_Fibonacci_c ( ISphExpr * pFirst ) : Expr_Unary_c ( pFirst ) {}
578
EvalExpr_Fibonacci_c579 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
IntEvalExpr_Fibonacci_c580 virtual int IntEval ( const CSphMatch & tMatch ) const { return Fibonacci ( m_pFirst->IntEval ( tMatch ) ); }
Int64EvalExpr_Fibonacci_c581 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
582 };
583
584
585 struct Expr_ToString_c : public Expr_Unary_c
586 {
587 protected:
588 ESphAttr m_eArg;
589 mutable CSphStringBuilder m_sBuilder;
590
591 public:
Expr_ToString_cExpr_ToString_c592 Expr_ToString_c ( ISphExpr * pArg, ESphAttr eArg )
593 : Expr_Unary_c ( pArg )
594 , m_eArg ( eArg )
595 {}
596
EvalExpr_ToString_c597 virtual float Eval ( const CSphMatch & ) const
598 {
599 assert ( 0 );
600 return 0.0f;
601 }
602
StringEvalExpr_ToString_c603 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
604 {
605 m_sBuilder.Clear();
606 switch ( m_eArg )
607 {
608 case SPH_ATTR_INTEGER: m_sBuilder.Appendf ( "%u", m_pFirst->IntEval ( tMatch ) ); break;
609 case SPH_ATTR_BIGINT: m_sBuilder.Appendf ( INT64_FMT, m_pFirst->Int64Eval ( tMatch ) ); break;
610 case SPH_ATTR_FLOAT: m_sBuilder.Appendf ( "%f", m_pFirst->Eval ( tMatch ) ); break;
611 case SPH_ATTR_UINT32SET:
612 case SPH_ATTR_INT64SET:
613 {
614 const DWORD * pValues = m_pFirst->MvaEval ( tMatch );
615 if ( !pValues || !*pValues )
616 break;
617
618 DWORD nValues = *pValues++;
619 assert (!( m_eArg==SPH_ATTR_INT64SET && ( nValues & 1 ) ));
620
621 // OPTIMIZE? minibuffer on stack, less allocs, manual formatting vs printf, etc
622 if ( m_eArg==SPH_ATTR_UINT32SET )
623 {
624 while ( nValues-- )
625 {
626 if ( m_sBuilder.Length() )
627 m_sBuilder += ",";
628 m_sBuilder.Appendf ( "%u", *pValues++ );
629 }
630 } else
631 {
632 for ( ; nValues; nValues-=2, pValues+=2 )
633 {
634 if ( m_sBuilder.Length() )
635 m_sBuilder += ",";
636 m_sBuilder.Appendf ( INT64_FMT, MVA_UPSIZE ( pValues ) );
637 }
638 }
639 }
640 break;
641 case SPH_ATTR_STRINGPTR:
642 return m_pFirst->StringEval ( tMatch, ppStr );
643
644 default:
645 assert ( 0 && "unhandled arg type in TO_STRING()" );
646 break;
647 }
648 if ( !m_sBuilder.Length() )
649 {
650 *ppStr = NULL;
651 return 0;
652 }
653 *ppStr = (const BYTE *) CSphString ( m_sBuilder.cstr() ).Leak();
654 return m_sBuilder.Length();
655 }
656
IsStringPtrExpr_ToString_c657 virtual bool IsStringPtr() const
658 {
659 return true;
660 }
661 };
662
663 //////////////////////////////////////////////////////////////////////////
664
665 /// generic JSON value evaluation
666 /// can handle arbitrary stacks of jsoncol.key1.arr2[indexexpr3].key4[keynameexpr5]
667 /// m_dArgs holds the expressions that return actual accessors (either keynames or indexes)
668 /// m_dRetTypes holds their respective types
669 struct Expr_JsonField_c : public ExprLocatorTraits_t
670 {
671 protected:
672 const BYTE * m_pStrings;
673 CSphVector<ISphExpr *> m_dArgs;
674 CSphVector<ESphAttr> m_dRetTypes;
675
676 public:
677 /// takes over the expressions
Expr_JsonField_cExpr_JsonField_c678 Expr_JsonField_c ( const CSphAttrLocator & tLocator, int iLocator, CSphVector<ISphExpr*> & dArgs, CSphVector<ESphAttr> & dRetTypes )
679 : ExprLocatorTraits_t ( tLocator, iLocator )
680 , m_pStrings ( NULL )
681 {
682 assert ( dArgs.GetLength()==dRetTypes.GetLength() );
683 m_dArgs.SwapData ( dArgs );
684 m_dRetTypes.SwapData ( dRetTypes );
685 }
686
~Expr_JsonField_cExpr_JsonField_c687 ~Expr_JsonField_c ()
688 {
689 ARRAY_FOREACH ( i, m_dArgs )
690 SafeRelease ( m_dArgs[i] );
691 }
692
CommandExpr_JsonField_c693 virtual void Command ( ESphExprCommand eCmd, void * pArg )
694 {
695 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
696 m_pStrings = (const BYTE*)pArg;
697 else if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS && m_iLocator!=-1 )
698 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
699 ARRAY_FOREACH ( i, m_dArgs )
700 if ( m_dArgs[i] )
701 m_dArgs[i]->Command ( eCmd, pArg );
702 }
703
EvalExpr_JsonField_c704 virtual float Eval ( const CSphMatch & ) const
705 {
706 assert ( 0 && "one just does not simply evaluate a JSON as float" );
707 return 0;
708 }
709
DoEvalExpr_JsonField_c710 virtual int64_t DoEval ( ESphJsonType eJson, const BYTE * pVal, const CSphMatch & tMatch ) const
711 {
712 int iLen;
713 const BYTE * pStr;
714
715 ARRAY_FOREACH ( i, m_dRetTypes )
716 {
717 switch ( m_dRetTypes[i] )
718 {
719 case SPH_ATTR_INTEGER: eJson = sphJsonFindByIndex ( eJson, &pVal, m_dArgs[i]->IntEval ( tMatch ) ); break;
720 case SPH_ATTR_BIGINT: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)m_dArgs[i]->Int64Eval ( tMatch ) ); break;
721 case SPH_ATTR_FLOAT: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)m_dArgs[i]->Eval ( tMatch ) ); break;
722 case SPH_ATTR_STRING:
723 // is this assert will fail someday it's ok
724 // just remove it and add this code instead to handle possible memory leak
725 // if ( m_dArgv[i]->IsStringPtr() ) SafeDeleteArray ( pStr );
726 assert ( !m_dArgs[i]->IsStringPtr() );
727 iLen = m_dArgs[i]->StringEval ( tMatch, &pStr );
728 eJson = sphJsonFindByKey ( eJson, &pVal, (const void *)pStr, iLen, sphJsonKeyMask ( (const char *)pStr, iLen ) );
729 break;
730 case SPH_ATTR_JSON_FIELD: // handle cases like "json.a [ json.b ]"
731 {
732 uint64_t uValue = m_dArgs[i]->Int64Eval ( tMatch );
733 const BYTE * p = m_pStrings + ( uValue & 0xffffffff );
734 ESphJsonType eType = (ESphJsonType)( uValue >> 32 );
735
736 switch ( eType )
737 {
738 case JSON_INT32: eJson = sphJsonFindByIndex ( eJson, &pVal, sphJsonLoadInt ( &p ) ); break;
739 case JSON_INT64: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)sphJsonLoadBigint ( &p ) ); break;
740 case JSON_DOUBLE: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)sphQW2D ( sphJsonLoadBigint ( &p ) ) ); break;
741 case JSON_STRING:
742 iLen = sphJsonUnpackInt ( &p );
743 eJson = sphJsonFindByKey ( eJson, &pVal, (const void *)p, iLen, sphJsonKeyMask ( (const char *)p, iLen ) );
744 break;
745 default:
746 return 0;
747 }
748 break;
749 }
750 default:
751 return 0;
752 }
753
754 if ( eJson==JSON_EOF )
755 return 0;
756 }
757
758 // keep actual attribute type and offset to data packed
759 int64_t iPacked = ( ( (int64_t)( pVal-m_pStrings ) ) | ( ( (int64_t)eJson )<<32 ) );
760 return iPacked;
761 }
762
Int64EvalExpr_JsonField_c763 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
764 {
765 if ( !m_pStrings )
766 return 0;
767
768 uint64_t uOffset = tMatch.GetAttr ( m_tLocator );
769 if ( !uOffset )
770 return 0;
771
772 if ( m_tLocator.m_bDynamic )
773 {
774 // extends precalculated (aliased) field
775 const BYTE * pVal = m_pStrings + ( uOffset & 0xffffffff );
776 ESphJsonType eJson = (ESphJsonType)( uOffset >> 32 );
777 return DoEval ( eJson, pVal, tMatch );
778 }
779
780 const BYTE * pVal = NULL;
781 sphUnpackStr ( m_pStrings + uOffset, &pVal );
782 if ( !pVal )
783 return 0;
784
785 ESphJsonType eJson = sphJsonFindFirst ( &pVal );
786 return DoEval ( eJson, pVal, tMatch );
787 }
788 };
789
790
791 /// fastpath (instead of generic JsonField_c) for jsoncol.key access by a static key name
792 struct Expr_JsonFastKey_c : public ExprLocatorTraits_t
793 {
794 protected:
795 const BYTE * m_pStrings;
796 CSphString m_sKey;
797 int m_iKeyLen;
798 DWORD m_uKeyBloom;
799
800 public:
801 /// takes over the expressions
Expr_JsonFastKey_cExpr_JsonFastKey_c802 Expr_JsonFastKey_c ( const CSphAttrLocator & tLocator, int iLocator, ISphExpr * pArg )
803 : ExprLocatorTraits_t ( tLocator, iLocator )
804 , m_pStrings ( NULL )
805 {
806 assert ( ( tLocator.m_iBitOffset % ROWITEM_BITS )==0 );
807 assert ( tLocator.m_iBitCount==ROWITEM_BITS );
808
809 Expr_GetStrConst_c * pKey = (Expr_GetStrConst_c*)pArg;
810 m_sKey = pKey->m_sVal;
811 m_iKeyLen = pKey->m_iLen;
812 m_uKeyBloom = sphJsonKeyMask ( m_sKey.cstr(), m_iKeyLen );
813 SafeRelease ( pArg );
814 }
815
CommandExpr_JsonFastKey_c816 virtual void Command ( ESphExprCommand eCmd, void * pArg )
817 {
818 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
819 m_pStrings = (const BYTE*)pArg;
820 }
821
EvalExpr_JsonFastKey_c822 virtual float Eval ( const CSphMatch & ) const
823 {
824 assert ( 0 && "one just does not simply evaluate a JSON as float" );
825 return 0;
826 }
827
Int64EvalExpr_JsonFastKey_c828 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
829 {
830 // get pointer to JSON blob data
831 assert ( m_pStrings );
832 DWORD uOffset = m_tLocator.m_bDynamic
833 ? tMatch.m_pDynamic [ m_tLocator.m_iBitOffset >> ROWITEM_SHIFT ]
834 : tMatch.m_pStatic [ m_tLocator.m_iBitOffset >> ROWITEM_SHIFT ];
835 if ( !uOffset )
836 return 0;
837 const BYTE * pJson;
838 sphUnpackStr ( m_pStrings + uOffset, &pJson );
839
840 // all root objects start with a Bloom mask; quickly check it
841 if ( ( sphGetDword(pJson) & m_uKeyBloom )!=m_uKeyBloom )
842 return 0;
843
844 // OPTIMIZE? FindByKey does an extra (redundant) bloom check inside
845 ESphJsonType eJson = sphJsonFindByKey ( JSON_ROOT, &pJson, m_sKey.cstr(), m_iKeyLen, m_uKeyBloom );
846 if ( eJson==JSON_EOF )
847 return 0;
848
849 // keep actual attribute type and offset to data packed
850 int64_t iPacked = ( ( (int64_t)( pJson-m_pStrings ) ) | ( ( (int64_t)eJson )<<32 ) );
851 return iPacked;
852 }
853 };
854
855
856 struct Expr_JsonFieldConv_c : public ISphExpr
857 {
858 protected:
859 const BYTE * m_pStrings;
860 ISphExpr * m_pArg;
861
862 public:
Expr_JsonFieldConv_cExpr_JsonFieldConv_c863 explicit Expr_JsonFieldConv_c ( ISphExpr * pArg )
864 : m_pStrings ( NULL )
865 , m_pArg ( pArg )
866 {}
867
~Expr_JsonFieldConv_cExpr_JsonFieldConv_c868 ~Expr_JsonFieldConv_c()
869 {
870 SafeRelease ( m_pArg );
871 }
872
CommandExpr_JsonFieldConv_c873 virtual void Command ( ESphExprCommand eCmd, void * pArg )
874 {
875 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
876 m_pStrings = (const BYTE*)pArg;
877 if ( m_pArg )
878 m_pArg->Command ( eCmd, pArg );
879 }
880
881 protected:
GetKeyExpr_JsonFieldConv_c882 virtual ESphJsonType GetKey ( const BYTE ** ppKey, const CSphMatch & tMatch ) const
883 {
884 assert ( ppKey );
885 if ( !m_pStrings )
886 return JSON_EOF;
887 uint64_t uValue = m_pArg->Int64Eval ( tMatch );
888 *ppKey = m_pStrings + ( uValue & 0xffffffff );
889 return (ESphJsonType)( uValue >> 32 );
890 }
891
892 // generic evaluate
893 template < typename T >
DoEvalExpr_JsonFieldConv_c894 T DoEval ( const CSphMatch & tMatch ) const
895 {
896 const BYTE * pVal = NULL;
897 ESphJsonType eJson = GetKey ( &pVal, tMatch );
898 switch ( eJson )
899 {
900 case JSON_INT32: return (T)sphJsonLoadInt ( &pVal );
901 case JSON_INT64: return (T)sphJsonLoadBigint ( &pVal );
902 case JSON_DOUBLE: return (T)sphQW2D ( sphJsonLoadBigint ( &pVal ) );
903 case JSON_TRUE: return 1;
904 case JSON_STRING:
905 {
906 if ( !g_bJsonAutoconvNumbers )
907 return 0;
908 int iLen = sphJsonUnpackInt ( &pVal );
909 int64_t iVal;
910 double fVal;
911 ESphJsonType eType;
912 if ( sphJsonStringToNumber ( (const char*)pVal, iLen, eType, iVal, fVal ) )
913 return eType==JSON_DOUBLE ? (T)fVal : (T)iVal;
914 }
915 default: return 0;
916 }
917 }
918
919 public:
StringEvalExpr_JsonFieldConv_c920 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
921 {
922 const BYTE * pVal = NULL;
923 ESphJsonType eJson = GetKey ( &pVal, tMatch );
924 return ( eJson==JSON_STRING ) ? sphUnpackStr ( pVal, ppStr ) : 0;
925 }
EvalExpr_JsonFieldConv_c926 virtual float Eval ( const CSphMatch & tMatch ) const { return DoEval<float> ( tMatch ); }
IntEvalExpr_JsonFieldConv_c927 virtual int IntEval ( const CSphMatch & tMatch ) const { return DoEval<int> ( tMatch ); }
Int64EvalExpr_JsonFieldConv_c928 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return DoEval<int64_t> ( tMatch ); }
929 };
930
931
932 struct Expr_JsonFieldAggr_c : public Expr_JsonFieldConv_c
933 {
934 protected:
935 ESphAggrFunc m_eFunc;
936
937 public:
Expr_JsonFieldAggr_cExpr_JsonFieldAggr_c938 Expr_JsonFieldAggr_c ( ISphExpr * pArg, ESphAggrFunc eFunc )
939 : Expr_JsonFieldConv_c ( pArg )
940 , m_eFunc ( eFunc )
941 {}
942
IntEvalExpr_JsonFieldAggr_c943 virtual int IntEval ( const CSphMatch & tMatch ) const
944 {
945 const BYTE * pVal = NULL;
946 ESphJsonType eJson = GetKey ( &pVal, tMatch );
947 switch ( eJson )
948 {
949 case JSON_INT32_VECTOR:
950 {
951 int iVals = sphJsonUnpackInt ( &pVal );
952 if ( iVals==0 )
953 return 0;
954
955 const int * p = (const int*) pVal;
956 int iRes = *p; // first value
957
958 switch ( m_eFunc )
959 {
960 case SPH_AGGR_MIN: while ( --iVals ) if ( *++p<iRes ) iRes = *p; break;
961 case SPH_AGGR_MAX: while ( --iVals ) if ( *++p>iRes ) iRes = *p; break;
962 default:
963 return 0;
964 }
965 return iRes;
966 }
967 default:
968 return 0;
969 }
970 }
971
StringEvalExpr_JsonFieldAggr_c972 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
973 {
974 CSphString sBuf;
975 *ppStr = NULL;
976 const BYTE * pVal = NULL;
977 ESphJsonType eJson = GetKey ( &pVal, tMatch );
978 switch ( eJson )
979 {
980 case JSON_INT32_VECTOR:
981 sBuf.SetSprintf ( "%u", IntEval ( tMatch ) );
982 *ppStr = (const BYTE *) sBuf.Leak();
983 return strlen ( (const char*) *ppStr );
984
985 case JSON_STRING_VECTOR:
986 {
987 sphJsonUnpackInt ( &pVal ); // skip node length
988
989 int iVals = sphJsonUnpackInt ( &pVal );
990 if ( iVals==0 )
991 return 0;
992
993 switch ( m_eFunc )
994 {
995 case SPH_AGGR_MIN:
996 case SPH_AGGR_MAX:
997 {
998 // first value
999 int iLen = sphJsonUnpackInt ( &pVal );
1000 const char *pRes = (const char*) pVal;
1001 int iResLen = iLen;
1002
1003 while ( --iVals )
1004 {
1005 pVal += iLen;
1006 iLen = sphJsonUnpackInt ( &pVal );
1007
1008 // binary string comparison
1009 int iCmp = memcmp ( pRes, (const char*)pVal, iLen<iResLen ? iLen : iResLen );
1010 if ( iCmp==0 && iLen!=iResLen )
1011 iCmp = iResLen-iLen;
1012
1013 if ( ( m_eFunc==SPH_AGGR_MIN && iCmp>0 ) || ( m_eFunc==SPH_AGGR_MAX && iCmp<0 ) )
1014 {
1015 pRes = (const char*)pVal;
1016 iResLen = iLen;
1017 }
1018 }
1019
1020 sBuf.SetBinary ( pRes, iResLen );
1021 *ppStr = (const BYTE *) sBuf.Leak();
1022 return iResLen;
1023 }
1024 default:
1025 return 0;
1026 }
1027 }
1028 default:
1029 return 0;
1030 }
1031 }
1032
EvalExpr_JsonFieldAggr_c1033 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_JsonFieldAggr_c1034 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
IsStringPtrExpr_JsonFieldAggr_c1035 virtual bool IsStringPtr() const { return true; }
1036 };
1037
1038
1039 struct Expr_JsonFieldLength_c : public Expr_JsonFieldConv_c
1040 {
1041 public:
Expr_JsonFieldLength_cExpr_JsonFieldLength_c1042 explicit Expr_JsonFieldLength_c ( ISphExpr * pArg )
1043 : Expr_JsonFieldConv_c ( pArg )
1044 {}
1045
IntEvalExpr_JsonFieldLength_c1046 virtual int IntEval ( const CSphMatch & tMatch ) const
1047 {
1048 const BYTE * pVal = NULL;
1049 ESphJsonType eJson = GetKey ( &pVal, tMatch );
1050 return sphJsonFieldLength ( eJson, pVal );
1051 }
1052
EvalExpr_JsonFieldLength_c1053 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_JsonFieldLength_c1054 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1055 };
1056
1057
1058 struct Expr_Time_c : public ISphExpr
1059 {
1060 bool m_bUTC;
1061 bool m_bDate;
1062
Expr_Time_cExpr_Time_c1063 explicit Expr_Time_c ( bool bUTC, bool bDate )
1064 : m_bUTC ( bUTC )
1065 , m_bDate ( bDate )
1066 {}
1067
IntEvalExpr_Time_c1068 virtual int IntEval ( const CSphMatch & ) const
1069 {
1070 struct tm s; // can't get non-UTC timestamp without mktime
1071 time_t t = time ( NULL );
1072 if ( m_bUTC )
1073 gmtime_r ( &t, &s );
1074 else
1075 localtime_r ( &t, &s );
1076 return (int) mktime ( &s );
1077 }
1078
StringEvalExpr_Time_c1079 virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
1080 {
1081 CSphString sVal;
1082 struct tm s;
1083 time_t t = time ( NULL );
1084 if ( m_bUTC )
1085 gmtime_r ( &t, &s );
1086 else
1087 localtime_r ( &t, &s );
1088 if ( m_bDate )
1089 sVal.SetSprintf ( "%04d-%02d-%02d %02d:%02d:%02d", s.tm_year+1900, s.tm_mon+1, s.tm_mday, s.tm_hour, s.tm_min, s.tm_sec );
1090 else
1091 sVal.SetSprintf ( "%02d:%02d:%02d", s.tm_hour, s.tm_min, s.tm_sec );
1092 *ppStr = (const BYTE*) sVal.Leak();
1093 return sVal.Length();
1094 }
1095
EvalExpr_Time_c1096 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_Time_c1097 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
IsStringPtrExpr_Time_c1098 virtual bool IsStringPtr () const { return true; }
1099 };
1100
1101
1102 struct Expr_TimeDiff_c : public ISphExpr
1103 {
1104 ISphExpr * m_pFirst;
1105 ISphExpr * m_pSecond;
1106
Expr_TimeDiff_cExpr_TimeDiff_c1107 Expr_TimeDiff_c ( ISphExpr * pFirst, ISphExpr * pSecond )
1108 : m_pFirst ( pFirst )
1109 , m_pSecond ( pSecond )
1110 {}
1111
~Expr_TimeDiff_cExpr_TimeDiff_c1112 ~Expr_TimeDiff_c()
1113 {
1114 SafeRelease ( m_pFirst );
1115 SafeRelease ( m_pSecond );
1116 }
1117
IntEvalExpr_TimeDiff_c1118 virtual int IntEval ( const CSphMatch & tMatch ) const
1119 {
1120 assert ( m_pFirst && m_pSecond );
1121 return m_pFirst->IntEval ( tMatch )-m_pSecond->IntEval ( tMatch );
1122 }
1123
StringEvalExpr_TimeDiff_c1124 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
1125 {
1126 int iVal = IntEval ( tMatch );
1127 CSphString sVal;
1128 int t = iVal<0 ? -iVal : iVal;
1129 sVal.SetSprintf ( "%s%02d:%02d:%02d", iVal<0 ? "-" : "", t/60/60, (t/60)%60, t%60 );
1130 *ppStr = (const BYTE*) sVal.Leak();
1131 return sVal.Length();
1132 }
1133
EvalExpr_TimeDiff_c1134 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_TimeDiff_c1135 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
IsStringPtrExpr_TimeDiff_c1136 virtual bool IsStringPtr () const { return true; }
1137 };
1138
1139
1140 struct Expr_Iterator_c : Expr_JsonField_c
1141 {
1142 SphAttr_t * m_pData;
1143
Expr_Iterator_cExpr_Iterator_c1144 Expr_Iterator_c ( const CSphAttrLocator & tLocator, int iLocator, CSphVector<ISphExpr*> & dArgs, CSphVector<ESphAttr> & dRetTypes, SphAttr_t * pData )
1145 : Expr_JsonField_c ( tLocator, iLocator, dArgs, dRetTypes )
1146 , m_pData ( pData )
1147 {}
1148
Int64EvalExpr_Iterator_c1149 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
1150 {
1151 uint64_t uValue = m_pData ? *m_pData : 0;
1152 const BYTE * p = m_pStrings + ( uValue & 0xffffffff );
1153 ESphJsonType eType = (ESphJsonType)( uValue >> 32 );
1154 return DoEval ( eType, p, tMatch );
1155 }
1156 };
1157
1158
1159 struct Expr_ForIn_c : public Expr_JsonFieldConv_c
1160 {
1161 ISphExpr * m_pExpr;
1162 bool m_bStrict;
1163 bool m_bIndex;
1164 mutable uint64_t m_uData;
1165
Expr_ForIn_cExpr_ForIn_c1166 Expr_ForIn_c ( ISphExpr * pArg, bool bStrict, bool bIndex )
1167 : Expr_JsonFieldConv_c ( pArg )
1168 , m_pExpr ( NULL )
1169 , m_bStrict ( bStrict )
1170 , m_bIndex ( bIndex )
1171 {}
1172
~Expr_ForIn_cExpr_ForIn_c1173 ~Expr_ForIn_c ()
1174 {
1175 SafeRelease ( m_pExpr );
1176 }
1177
GetRefExpr_ForIn_c1178 SphAttr_t * GetRef ()
1179 {
1180 return (SphAttr_t*)&m_uData;
1181 }
1182
SetExprExpr_ForIn_c1183 void SetExpr ( ISphExpr * pExpr )
1184 {
1185 m_pExpr = pExpr;
1186 }
1187
CommandExpr_ForIn_c1188 virtual void Command ( ESphExprCommand eCmd, void * pArg )
1189 {
1190 Expr_JsonFieldConv_c::Command ( eCmd, pArg );
1191 if ( m_pExpr )
1192 m_pExpr->Command ( eCmd, pArg );
1193 }
1194
ExprEvalExpr_ForIn_c1195 bool ExprEval ( int * pResult, const CSphMatch & tMatch, int iIndex, ESphJsonType eType, const BYTE * pVal ) const
1196 {
1197 m_uData = ( ( (int64_t)( pVal-m_pStrings ) ) | ( ( (int64_t)eType )<<32 ) );
1198 bool bMatch = m_pExpr->Eval ( tMatch )!=0;
1199 *pResult = bMatch ? ( m_bIndex ? iIndex : 1 ) : ( m_bIndex ? -1 : 0 );
1200 return m_bStrict ? bMatch : !bMatch;
1201 }
1202
IntEvalExpr_ForIn_c1203 virtual int IntEval ( const CSphMatch & tMatch ) const
1204 {
1205 int iResult = m_bIndex ? -1 : 0;
1206
1207 if ( !m_pExpr )
1208 return iResult;
1209
1210 const BYTE * p = NULL;
1211 ESphJsonType eJson = GetKey ( &p, tMatch );
1212
1213 switch ( eJson )
1214 {
1215 case JSON_INT32_VECTOR:
1216 case JSON_INT64_VECTOR:
1217 case JSON_DOUBLE_VECTOR:
1218 {
1219 int iSize = eJson==JSON_INT32_VECTOR ? 4 : 8;
1220 ESphJsonType eType = eJson==JSON_INT32_VECTOR ? JSON_INT32
1221 : eJson==JSON_INT64_VECTOR ? JSON_INT64
1222 : JSON_DOUBLE;
1223 int iLen = sphJsonUnpackInt ( &p );
1224 for ( int i=0; i<iLen; i++, p+=iSize )
1225 if ( !ExprEval ( &iResult, tMatch, i, eType, p ) )
1226 break;
1227 break;
1228 }
1229 case JSON_STRING_VECTOR:
1230 {
1231 sphJsonUnpackInt ( &p );
1232 int iLen = sphJsonUnpackInt ( &p );
1233 for ( int i=0;i<iLen;i++ )
1234 {
1235 if ( !ExprEval ( &iResult, tMatch, i, JSON_STRING, p ) )
1236 break;
1237 sphJsonSkipNode ( JSON_STRING, &p );
1238 }
1239 break;
1240 }
1241 case JSON_MIXED_VECTOR:
1242 {
1243 sphJsonUnpackInt ( &p );
1244 int iLen = sphJsonUnpackInt ( &p );
1245 for ( int i=0; i<iLen; i++ )
1246 {
1247 ESphJsonType eType = (ESphJsonType)*p++;
1248 if ( !ExprEval ( &iResult, tMatch, i, eType, p ) )
1249 break;
1250 sphJsonSkipNode ( eType, &p );
1251 }
1252 break;
1253 }
1254 default:
1255 break;
1256 }
1257
1258 return iResult;
1259 }
1260
EvalExpr_ForIn_c1261 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_ForIn_c1262 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1263 };
1264
1265
GetCollationFn(ESphCollation eCollation)1266 SphStringCmp_fn GetCollationFn ( ESphCollation eCollation )
1267 {
1268 switch ( eCollation )
1269 {
1270 case SPH_COLLATION_LIBC_CS: return sphCollateLibcCS;
1271 case SPH_COLLATION_UTF8_GENERAL_CI: return sphCollateUtf8GeneralCI;
1272 case SPH_COLLATION_BINARY: return sphCollateBinary;
1273 default: return sphCollateLibcCI;
1274 }
1275 }
1276
1277
1278 struct Expr_StrEq_c : public ISphExpr
1279 {
1280 ISphExpr * m_pLeft;
1281 ISphExpr * m_pRight;
1282 SphStringCmp_fn m_fnStrCmp;
1283
Expr_StrEq_cExpr_StrEq_c1284 Expr_StrEq_c ( ISphExpr * pLeft, ISphExpr * pRight, ESphCollation eCollation )
1285 : m_pLeft ( pLeft )
1286 , m_pRight ( pRight )
1287 {
1288 m_fnStrCmp = GetCollationFn ( eCollation );
1289 }
1290
~Expr_StrEq_cExpr_StrEq_c1291 ~Expr_StrEq_c ()
1292 {
1293 SafeRelease ( m_pLeft );
1294 SafeRelease ( m_pRight );
1295 }
1296
CommandExpr_StrEq_c1297 virtual void Command ( ESphExprCommand eCmd, void * pArg )
1298 {
1299 assert ( m_pLeft && m_pRight );
1300 m_pLeft->Command ( eCmd, pArg );
1301 m_pRight->Command ( eCmd, pArg );
1302 }
1303
IntEvalExpr_StrEq_c1304 virtual int IntEval ( const CSphMatch & tMatch ) const
1305 {
1306 const BYTE * pLeft;
1307 const BYTE * pRight;
1308 int iLeft = m_pLeft->StringEval ( tMatch, &pLeft );
1309 int iRight = m_pRight->StringEval ( tMatch, &pRight );
1310
1311 CSphString sStr1 ( iLeft ? (const char*)pLeft : "", iLeft );
1312 CSphString sStr2 ( iRight ? (const char*)pRight : "", iRight );
1313
1314 bool bEq = m_fnStrCmp ( (const BYTE*)sStr1.cstr(), (const BYTE*)sStr2.cstr(), false )==0;
1315
1316 if ( m_pLeft->IsStringPtr() ) SafeDeleteArray ( pLeft );
1317 if ( m_pRight->IsStringPtr() ) SafeDeleteArray ( pRight );
1318 return (int)bEq;
1319 }
1320
EvalExpr_StrEq_c1321 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_StrEq_c1322 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1323 };
1324
1325
1326 struct Expr_JsonFieldIsNull_c : public Expr_JsonFieldConv_c
1327 {
1328 bool m_bEquals;
1329
Expr_JsonFieldIsNull_cExpr_JsonFieldIsNull_c1330 explicit Expr_JsonFieldIsNull_c ( ISphExpr * pArg, bool bEquals )
1331 : Expr_JsonFieldConv_c ( pArg )
1332 , m_bEquals ( bEquals )
1333 {}
1334
IntEvalExpr_JsonFieldIsNull_c1335 virtual int IntEval ( const CSphMatch & tMatch ) const
1336 {
1337 const BYTE * pVal = NULL;
1338 ESphJsonType eJson = GetKey ( &pVal, tMatch );
1339 return m_bEquals ^ ( eJson!=JSON_EOF && eJson!=JSON_NULL );
1340 }
1341
EvalExpr_JsonFieldIsNull_c1342 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
Int64EvalExpr_JsonFieldIsNull_c1343 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
1344 };
1345
1346 //////////////////////////////////////////////////////////////////////////
1347
1348 struct Expr_MinTopWeight : public ISphExpr
1349 {
1350 int * m_pWeight;
1351
Expr_MinTopWeightExpr_MinTopWeight1352 Expr_MinTopWeight() : m_pWeight ( NULL ) {}
1353
IntEvalExpr_MinTopWeight1354 virtual int IntEval ( const CSphMatch & ) const { return m_pWeight ? *m_pWeight : -INT_MAX; }
EvalExpr_MinTopWeight1355 virtual float Eval ( const CSphMatch & ) const { return m_pWeight ? (float)*m_pWeight : -FLT_MAX; }
Int64EvalExpr_MinTopWeight1356 virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_pWeight ? *m_pWeight : -LLONG_MAX; }
1357
CommandExpr_MinTopWeight1358 virtual void Command ( ESphExprCommand eCmd, void * pArg )
1359 {
1360 CSphMatch * pWorst;
1361 if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
1362 return;
1363 if ( static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_QUEUE_WORST, (void**)&pWorst ) )
1364 m_pWeight = &pWorst->m_iWeight;
1365 }
1366 };
1367
1368 struct Expr_MinTopSortval : public ISphExpr
1369 {
1370 CSphMatch * m_pWorst;
1371 int m_iSortval;
1372
Expr_MinTopSortvalExpr_MinTopSortval1373 Expr_MinTopSortval()
1374 : m_pWorst ( NULL )
1375 , m_iSortval ( -1 )
1376 {}
1377
EvalExpr_MinTopSortval1378 virtual float Eval ( const CSphMatch & ) const
1379 {
1380 if ( m_pWorst && m_pWorst->m_pDynamic && m_iSortval>=0 )
1381 return *(float*)( m_pWorst->m_pDynamic + m_iSortval );
1382 return -FLT_MAX;
1383 }
1384
CommandExpr_MinTopSortval1385 virtual void Command ( ESphExprCommand eCmd, void * pArg )
1386 {
1387 if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
1388 return;
1389 ISphExtra * p = (ISphExtra*)pArg;
1390 if ( !p->ExtraData ( EXTRA_GET_QUEUE_WORST, (void**)&m_pWorst )
1391 || !p->ExtraData ( EXTRA_GET_QUEUE_SORTVAL, (void**)&m_iSortval ) )
1392 {
1393 m_pWorst = NULL;
1394 }
1395 }
1396 };
1397
1398 //////////////////////////////////////////////////////////////////////////
1399
1400 #define FIRST m_pFirst->Eval(tMatch)
1401 #define SECOND m_pSecond->Eval(tMatch)
1402 #define THIRD m_pThird->Eval(tMatch)
1403
1404 #define INTFIRST m_pFirst->IntEval(tMatch)
1405 #define INTSECOND m_pSecond->IntEval(tMatch)
1406 #define INTTHIRD m_pThird->IntEval(tMatch)
1407
1408 #define INT64FIRST m_pFirst->Int64Eval(tMatch)
1409 #define INT64SECOND m_pSecond->Int64Eval(tMatch)
1410 #define INT64THIRD m_pThird->Int64Eval(tMatch)
1411
1412 #define DECLARE_UNARY_TRAITS(_classname) \
1413 struct _classname : public Expr_Unary_c \
1414 { \
1415 explicit _classname ( ISphExpr * pFirst ) : Expr_Unary_c ( pFirst ) {}
1416
1417 #define DECLARE_END() };
1418
1419 #define DECLARE_UNARY_FLT(_classname,_expr) \
1420 DECLARE_UNARY_TRAITS ( _classname ) \
1421 virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1422 };
1423
1424 #define DECLARE_UNARY_INT(_classname,_expr,_expr2,_expr3) \
1425 DECLARE_UNARY_TRAITS ( _classname ) \
1426 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)_expr; } \
1427 virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
1428 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
1429 };
1430
1431 #define IABS(_arg) ( (_arg)>0 ? (_arg) : (-_arg) )
1432
1433 DECLARE_UNARY_INT ( Expr_Neg_c, -FIRST, -INTFIRST, -INT64FIRST )
DECLARE_UNARY_INT(Expr_Abs_c,fabs (FIRST),IABS (INTFIRST),IABS (INT64FIRST))1434 DECLARE_UNARY_INT ( Expr_Abs_c, fabs(FIRST), IABS(INTFIRST), IABS(INT64FIRST) )
1435 DECLARE_UNARY_INT ( Expr_Ceil_c, float(ceil(FIRST)), int(ceil(FIRST)), int64_t(ceil(FIRST)) )
1436 DECLARE_UNARY_INT ( Expr_Floor_c, float(floor(FIRST)), int(floor(FIRST)), int64_t(floor(FIRST)) )
1437
1438 DECLARE_UNARY_FLT ( Expr_Sin_c, float(sin(FIRST)) )
1439 DECLARE_UNARY_FLT ( Expr_Cos_c, float(cos(FIRST)) )
1440 DECLARE_UNARY_FLT ( Expr_Exp_c, float(exp(FIRST)) )
1441
1442 DECLARE_UNARY_INT ( Expr_NotInt_c, (float)(INTFIRST?0:1), INTFIRST?0:1, INTFIRST?0:1 )
1443 DECLARE_UNARY_INT ( Expr_NotInt64_c, (float)(INT64FIRST?0:1), INT64FIRST?0:1, INT64FIRST?0:1 )
1444 DECLARE_UNARY_INT ( Expr_Sint_c, (float)(INTFIRST), INTFIRST, INTFIRST )
1445
1446 DECLARE_UNARY_TRAITS ( Expr_Ln_c )
1447 virtual float Eval ( const CSphMatch & tMatch ) const
1448 {
1449 float fFirst = m_pFirst->Eval ( tMatch );
1450 // ideally this would be SQLNULL instead of plain 0.0f
1451 return fFirst>0.0f ? (float)log ( fFirst ) : 0.0f;
1452 }
1453 DECLARE_END()
1454
DECLARE_UNARY_TRAITS(Expr_Log2_c)1455 DECLARE_UNARY_TRAITS ( Expr_Log2_c )
1456 virtual float Eval ( const CSphMatch & tMatch ) const
1457 {
1458 float fFirst = m_pFirst->Eval ( tMatch );
1459 // ideally this would be SQLNULL instead of plain 0.0f
1460 return fFirst>0.0f ? (float)( log ( fFirst )*M_LOG2E ) : 0.0f;
1461 }
1462 DECLARE_END()
1463
DECLARE_UNARY_TRAITS(Expr_Log10_c)1464 DECLARE_UNARY_TRAITS ( Expr_Log10_c )
1465 virtual float Eval ( const CSphMatch & tMatch ) const
1466 {
1467 float fFirst = m_pFirst->Eval ( tMatch );
1468 // ideally this would be SQLNULL instead of plain 0.0f
1469 return fFirst>0.0f ? (float)( log ( fFirst )*M_LOG10E ) : 0.0f;
1470 }
1471 DECLARE_END()
1472
DECLARE_UNARY_TRAITS(Expr_Sqrt_c)1473 DECLARE_UNARY_TRAITS ( Expr_Sqrt_c )
1474 virtual float Eval ( const CSphMatch & tMatch ) const
1475 {
1476 float fFirst = m_pFirst->Eval ( tMatch );
1477 // ideally this would be SQLNULL instead of plain 0.0f in case of negative argument
1478 // MEGA optimization: do not call sqrt for 0.0f
1479 return fFirst>0.0f ? (float)sqrt ( fFirst ) : 0.0f;
1480 }
1481 DECLARE_END()
1482
1483 //////////////////////////////////////////////////////////////////////////
1484
1485 #define DECLARE_BINARY_TRAITS(_classname) \
1486 struct _classname : public ISphExpr \
1487 { \
1488 ISphExpr * m_pFirst; \
1489 ISphExpr * m_pSecond; \
1490 _classname ( ISphExpr * pFirst, ISphExpr * pSecond ) : m_pFirst ( pFirst ), m_pSecond ( pSecond ) {} \
1491 ~_classname () { SafeRelease ( m_pFirst ); SafeRelease ( m_pSecond ); } \
1492 virtual void Command ( ESphExprCommand eCmd, void * pArg ) { m_pFirst->Command ( eCmd, pArg ); m_pSecond->Command ( eCmd, pArg ); }
1493
1494 #define DECLARE_BINARY_FLT(_classname,_expr) \
1495 DECLARE_BINARY_TRAITS ( _classname ) \
1496 virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1497 };
1498
1499 #define DECLARE_BINARY_INT(_classname,_expr,_expr2,_expr3) \
1500 DECLARE_BINARY_TRAITS ( _classname ) \
1501 virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1502 virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
1503 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
1504 };
1505
1506 #define DECLARE_BINARY_POLY(_classname,_expr,_expr2,_expr3) \
1507 DECLARE_BINARY_INT ( _classname##Float_c, _expr, (int)Eval(tMatch), (int64_t)Eval(tMatch ) ) \
1508 DECLARE_BINARY_INT ( _classname##Int_c, (float)IntEval(tMatch), _expr2, (int64_t)IntEval(tMatch) ) \
1509 DECLARE_BINARY_INT ( _classname##Int64_c, (float)Int64Eval(tMatch), (int)Int64Eval(tMatch), _expr3 )
1510
1511 #define IFFLT(_expr) ( (_expr) ? 1.0f : 0.0f )
1512 #define IFINT(_expr) ( (_expr) ? 1 : 0 )
1513
1514 DECLARE_BINARY_INT ( Expr_Add_c, FIRST + SECOND, (DWORD)INTFIRST + (DWORD)INTSECOND, (uint64_t)INT64FIRST + (uint64_t)INT64SECOND )
1515 DECLARE_BINARY_INT ( Expr_Sub_c, FIRST - SECOND, (DWORD)INTFIRST - (DWORD)INTSECOND, (uint64_t)INT64FIRST - (uint64_t)INT64SECOND )
1516 DECLARE_BINARY_INT ( Expr_Mul_c, FIRST * SECOND, (DWORD)INTFIRST * (DWORD)INTSECOND, (uint64_t)INT64FIRST * (uint64_t)INT64SECOND )
1517 DECLARE_BINARY_INT ( Expr_BitAnd_c, (float)(int(FIRST)&int(SECOND)), INTFIRST & INTSECOND, INT64FIRST & INT64SECOND )
1518 DECLARE_BINARY_INT ( Expr_BitOr_c, (float)(int(FIRST)|int(SECOND)), INTFIRST | INTSECOND, INT64FIRST | INT64SECOND )
1519 DECLARE_BINARY_INT ( Expr_Mod_c, (float)(int(FIRST)%int(SECOND)), INTFIRST % INTSECOND, INT64FIRST % INT64SECOND )
1520
DECLARE_BINARY_TRAITS(Expr_Div_c)1521 DECLARE_BINARY_TRAITS ( Expr_Div_c )
1522 virtual float Eval ( const CSphMatch & tMatch ) const
1523 {
1524 float fSecond = m_pSecond->Eval ( tMatch );
1525 // ideally this would be SQLNULL instead of plain 0.0f
1526 return fSecond ? m_pFirst->Eval ( tMatch )/fSecond : 0.0f;
1527 }
1528 DECLARE_END()
1529
DECLARE_BINARY_TRAITS(Expr_Idiv_c)1530 DECLARE_BINARY_TRAITS ( Expr_Idiv_c )
1531 virtual float Eval ( const CSphMatch & tMatch ) const
1532 {
1533 int iSecond = int(SECOND);
1534 // ideally this would be SQLNULL instead of plain 0.0f
1535 return iSecond ? float(int(FIRST)/iSecond) : 0.0f;
1536 }
1537
IntEval(const CSphMatch & tMatch) const1538 virtual int IntEval ( const CSphMatch & tMatch ) const
1539 {
1540 int iSecond = INTSECOND;
1541 // ideally this would be SQLNULL instead of plain 0
1542 return iSecond ? ( INTFIRST / iSecond ) : 0;
1543 }
1544
Int64Eval(const CSphMatch & tMatch) const1545 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
1546 {
1547 int64_t iSecond = INT64SECOND;
1548 // ideally this would be SQLNULL instead of plain 0
1549 return iSecond ? ( INT64FIRST / iSecond ) : 0;
1550 }
1551 DECLARE_END()
1552
1553 DECLARE_BINARY_POLY ( Expr_Lt, IFFLT ( FIRST<SECOND ), IFINT ( INTFIRST<INTSECOND ), IFINT ( INT64FIRST<INT64SECOND ) )
1554 DECLARE_BINARY_POLY ( Expr_Gt, IFFLT ( FIRST>SECOND ), IFINT ( INTFIRST>INTSECOND ), IFINT ( INT64FIRST>INT64SECOND ) )
1555 DECLARE_BINARY_POLY ( Expr_Lte, IFFLT ( FIRST<=SECOND ), IFINT ( INTFIRST<=INTSECOND ), IFINT ( INT64FIRST<=INT64SECOND ) )
1556 DECLARE_BINARY_POLY ( Expr_Gte, IFFLT ( FIRST>=SECOND ), IFINT ( INTFIRST>=INTSECOND ), IFINT ( INT64FIRST>=INT64SECOND ) )
1557 DECLARE_BINARY_POLY ( Expr_Eq, IFFLT ( fabs ( FIRST-SECOND )<=1e-6 ), IFINT ( INTFIRST==INTSECOND ), IFINT ( INT64FIRST==INT64SECOND ) )
1558 DECLARE_BINARY_POLY ( Expr_Ne, IFFLT ( fabs ( FIRST-SECOND )>1e-6 ), IFINT ( INTFIRST!=INTSECOND ), IFINT ( INT64FIRST!=INT64SECOND ) )
1559
1560 DECLARE_BINARY_INT ( Expr_Min_c, Min ( FIRST, SECOND ), Min ( INTFIRST, INTSECOND ), Min ( INT64FIRST, INT64SECOND ) )
1561 DECLARE_BINARY_INT ( Expr_Max_c, Max ( FIRST, SECOND ), Max ( INTFIRST, INTSECOND ), Max ( INT64FIRST, INT64SECOND ) )
1562 DECLARE_BINARY_FLT ( Expr_Pow_c, float ( pow ( FIRST, SECOND ) ) )
1563
1564 DECLARE_BINARY_POLY ( Expr_And, FIRST!=0.0f && SECOND!=0.0f, IFINT ( INTFIRST && INTSECOND ), IFINT ( INT64FIRST && INT64SECOND ) )
1565 DECLARE_BINARY_POLY ( Expr_Or, FIRST!=0.0f || SECOND!=0.0f, IFINT ( INTFIRST || INTSECOND ), IFINT ( INT64FIRST || INT64SECOND ) )
1566
1567 DECLARE_BINARY_FLT ( Expr_Atan2_c, float ( atan2 ( FIRST, SECOND ) ) )
1568
1569 //////////////////////////////////////////////////////////////////////////
1570
1571 /// boring base stuff
1572 struct ExprThreeway_c : public ISphExpr
1573 {
1574 ISphExpr * m_pFirst;
1575 ISphExpr * m_pSecond;
1576 ISphExpr * m_pThird;
1577
ExprThreeway_cExprThreeway_c1578 ExprThreeway_c ( ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird )
1579 : m_pFirst ( pFirst )
1580 , m_pSecond ( pSecond )
1581 , m_pThird ( pThird )
1582 {}
1583
~ExprThreeway_cExprThreeway_c1584 ~ExprThreeway_c()
1585 {
1586 SafeRelease ( m_pFirst );
1587 SafeRelease ( m_pSecond );
1588 SafeRelease ( m_pThird );
1589 }
1590
CommandExprThreeway_c1591 virtual void Command ( ESphExprCommand eCmd, void * pArg )
1592 {
1593 m_pFirst->Command ( eCmd, pArg );
1594 m_pSecond->Command ( eCmd, pArg );
1595 m_pThird->Command ( eCmd, pArg );
1596 }
1597 };
1598
1599 #define DECLARE_TERNARY(_classname,_expr,_expr2,_expr3) \
1600 struct _classname : public ExprThreeway_c \
1601 { \
1602 _classname ( ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird ) \
1603 : ExprThreeway_c ( pFirst, pSecond, pThird ) {} \
1604 \
1605 virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
1606 virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
1607 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
1608 };
1609
1610 DECLARE_TERNARY ( Expr_If_c, ( FIRST!=0.0f ) ? SECOND : THIRD, INTFIRST ? INTSECOND : INTTHIRD, INT64FIRST ? INT64SECOND : INT64THIRD )
1611 DECLARE_TERNARY ( Expr_Madd_c, FIRST*SECOND+THIRD, INTFIRST*INTSECOND + INTTHIRD, INT64FIRST*INT64SECOND + INT64THIRD )
DECLARE_TERNARY(Expr_Mul3_c,FIRST * SECOND * THIRD,INTFIRST * INTSECOND * INTTHIRD,INT64FIRST * INT64SECOND * INT64THIRD)1612 DECLARE_TERNARY ( Expr_Mul3_c, FIRST*SECOND*THIRD, INTFIRST*INTSECOND*INTTHIRD, INT64FIRST*INT64SECOND*INT64THIRD )
1613
1614 //////////////////////////////////////////////////////////////////////////
1615
1616 #define DECLARE_TIMESTAMP(_classname,_expr) \
1617 DECLARE_UNARY_TRAITS ( _classname ) \
1618 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval(tMatch); } \
1619 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval(tMatch); } \
1620 virtual int IntEval ( const CSphMatch & tMatch ) const \
1621 { \
1622 time_t ts = (time_t)INTFIRST; \
1623 struct tm s; \
1624 localtime_r ( &ts, &s ); \
1625 return _expr; \
1626 } \
1627 };
1628
1629 DECLARE_TIMESTAMP ( Expr_Day_c, s.tm_mday )
1630 DECLARE_TIMESTAMP ( Expr_Month_c, s.tm_mon+1 )
1631 DECLARE_TIMESTAMP ( Expr_Year_c, s.tm_year+1900 )
1632 DECLARE_TIMESTAMP ( Expr_YearMonth_c, (s.tm_year+1900)*100+s.tm_mon+1 )
1633 DECLARE_TIMESTAMP ( Expr_YearMonthDay_c, (s.tm_year+1900)*10000+(s.tm_mon+1)*100+s.tm_mday )
1634
1635 //////////////////////////////////////////////////////////////////////////
1636 // UDF CALL SITE
1637 //////////////////////////////////////////////////////////////////////////
1638
1639 void * UdfMalloc ( int iLen )
1640 {
1641 return new BYTE [ iLen ];
1642 }
1643
1644 /// UDF call site
1645 struct UdfCall_t
1646 {
1647 const PluginUDF_c * m_pUdf;
1648 SPH_UDF_INIT m_tInit;
1649 SPH_UDF_ARGS m_tArgs;
1650 CSphVector<int> m_dArgs2Free; // these args should be freed explicitly
1651
UdfCall_tUdfCall_t1652 UdfCall_t()
1653 {
1654 m_pUdf = NULL;
1655 m_tInit.func_data = NULL;
1656 m_tInit.is_const = false;
1657 m_tArgs.arg_count = 0;
1658 m_tArgs.arg_types = NULL;
1659 m_tArgs.arg_values = NULL;
1660 m_tArgs.arg_names = NULL;
1661 m_tArgs.str_lengths = NULL;
1662 m_tArgs.fn_malloc = UdfMalloc;
1663 }
1664
~UdfCall_tUdfCall_t1665 ~UdfCall_t ()
1666 {
1667 if ( m_pUdf )
1668 m_pUdf->Release();
1669 SafeDeleteArray ( m_tArgs.arg_types );
1670 SafeDeleteArray ( m_tArgs.arg_values );
1671 SafeDeleteArray ( m_tArgs.arg_names );
1672 SafeDeleteArray ( m_tArgs.str_lengths );
1673 }
1674 };
1675
1676 //////////////////////////////////////////////////////////////////////////
1677 // PARSER INTERNALS
1678 //////////////////////////////////////////////////////////////////////////
1679 class ExprParser_t;
1680
1681 #ifdef CMAKE_GENERATED_GRAMMAR
1682 #include "bissphinxexpr.h"
1683 #else
1684 #include "yysphinxexpr.h"
1685 #endif
1686
1687
1688 /// known functions
1689 enum Func_e
1690 {
1691 FUNC_NOW=0,
1692
1693 FUNC_ABS,
1694 FUNC_CEIL,
1695 FUNC_FLOOR,
1696 FUNC_SIN,
1697 FUNC_COS,
1698 FUNC_LN,
1699 FUNC_LOG2,
1700 FUNC_LOG10,
1701 FUNC_EXP,
1702 FUNC_SQRT,
1703 FUNC_BIGINT,
1704 FUNC_SINT,
1705 FUNC_CRC32,
1706 FUNC_FIBONACCI,
1707
1708 FUNC_DAY,
1709 FUNC_MONTH,
1710 FUNC_YEAR,
1711 FUNC_YEARMONTH,
1712 FUNC_YEARMONTHDAY,
1713
1714 FUNC_MIN,
1715 FUNC_MAX,
1716 FUNC_POW,
1717 FUNC_IDIV,
1718
1719 FUNC_IF,
1720 FUNC_MADD,
1721 FUNC_MUL3,
1722
1723 FUNC_INTERVAL,
1724 FUNC_IN,
1725 FUNC_BITDOT,
1726 FUNC_REMAP,
1727
1728 FUNC_GEODIST,
1729 FUNC_EXIST,
1730 FUNC_POLY2D,
1731 FUNC_GEOPOLY2D,
1732 FUNC_CONTAINS,
1733 FUNC_ZONESPANLIST,
1734 FUNC_TO_STRING,
1735 FUNC_RANKFACTORS,
1736 FUNC_PACKEDFACTORS,
1737 FUNC_FACTORS,
1738 FUNC_BM25F,
1739 FUNC_INTEGER,
1740 FUNC_DOUBLE,
1741 FUNC_LENGTH,
1742 FUNC_LEAST,
1743 FUNC_GREATEST,
1744 FUNC_UINT,
1745
1746 FUNC_CURTIME,
1747 FUNC_UTC_TIME,
1748 FUNC_UTC_TIMESTAMP,
1749 FUNC_TIMEDIFF,
1750 FUNC_CURRENT_USER,
1751 FUNC_CONNECTION_ID,
1752 FUNC_ALL,
1753 FUNC_ANY,
1754 FUNC_INDEXOF,
1755
1756 FUNC_MIN_TOP_WEIGHT,
1757 FUNC_MIN_TOP_SORTVAL,
1758
1759 FUNC_ATAN2
1760 };
1761
1762
1763 struct FuncDesc_t
1764 {
1765 const char * m_sName;
1766 int m_iArgs;
1767 Func_e m_eFunc;
1768 ESphAttr m_eRet;
1769 };
1770
1771
1772 static FuncDesc_t g_dFuncs[] =
1773 {
1774 { "now", 0, FUNC_NOW, SPH_ATTR_INTEGER },
1775
1776 { "abs", 1, FUNC_ABS, SPH_ATTR_NONE },
1777 { "ceil", 1, FUNC_CEIL, SPH_ATTR_INTEGER },
1778 { "floor", 1, FUNC_FLOOR, SPH_ATTR_INTEGER },
1779 { "sin", 1, FUNC_SIN, SPH_ATTR_FLOAT },
1780 { "cos", 1, FUNC_COS, SPH_ATTR_FLOAT },
1781 { "ln", 1, FUNC_LN, SPH_ATTR_FLOAT },
1782 { "log2", 1, FUNC_LOG2, SPH_ATTR_FLOAT },
1783 { "log10", 1, FUNC_LOG10, SPH_ATTR_FLOAT },
1784 { "exp", 1, FUNC_EXP, SPH_ATTR_FLOAT },
1785 { "sqrt", 1, FUNC_SQRT, SPH_ATTR_FLOAT },
1786 { "bigint", 1, FUNC_BIGINT, SPH_ATTR_BIGINT }, // type-enforcer special as-if-function
1787 { "sint", 1, FUNC_SINT, SPH_ATTR_BIGINT }, // type-enforcer special as-if-function
1788 { "crc32", 1, FUNC_CRC32, SPH_ATTR_INTEGER },
1789 { "fibonacci", 1, FUNC_FIBONACCI, SPH_ATTR_INTEGER },
1790
1791 { "day", 1, FUNC_DAY, SPH_ATTR_INTEGER },
1792 { "month", 1, FUNC_MONTH, SPH_ATTR_INTEGER },
1793 { "year", 1, FUNC_YEAR, SPH_ATTR_INTEGER },
1794 { "yearmonth", 1, FUNC_YEARMONTH, SPH_ATTR_INTEGER },
1795 { "yearmonthday", 1, FUNC_YEARMONTHDAY, SPH_ATTR_INTEGER },
1796
1797 { "min", 2, FUNC_MIN, SPH_ATTR_NONE },
1798 { "max", 2, FUNC_MAX, SPH_ATTR_NONE },
1799 { "pow", 2, FUNC_POW, SPH_ATTR_FLOAT },
1800 { "idiv", 2, FUNC_IDIV, SPH_ATTR_NONE },
1801
1802 { "if", 3, FUNC_IF, SPH_ATTR_NONE },
1803 { "madd", 3, FUNC_MADD, SPH_ATTR_NONE },
1804 { "mul3", 3, FUNC_MUL3, SPH_ATTR_NONE },
1805
1806 { "interval", -2, FUNC_INTERVAL, SPH_ATTR_INTEGER },
1807 { "in", -1, FUNC_IN, SPH_ATTR_INTEGER },
1808 { "bitdot", -1, FUNC_BITDOT, SPH_ATTR_NONE },
1809 { "remap", 4, FUNC_REMAP, SPH_ATTR_INTEGER },
1810
1811 { "geodist", -4, FUNC_GEODIST, SPH_ATTR_FLOAT },
1812 { "exist", 2, FUNC_EXIST, SPH_ATTR_NONE },
1813 { "poly2d", -1, FUNC_POLY2D, SPH_ATTR_POLY2D },
1814 { "geopoly2d", -1, FUNC_GEOPOLY2D, SPH_ATTR_POLY2D },
1815 { "contains", 3, FUNC_CONTAINS, SPH_ATTR_INTEGER },
1816 { "zonespanlist", 0, FUNC_ZONESPANLIST, SPH_ATTR_STRINGPTR },
1817 { "to_string", 1, FUNC_TO_STRING, SPH_ATTR_STRINGPTR },
1818 { "rankfactors", 0, FUNC_RANKFACTORS, SPH_ATTR_STRINGPTR },
1819 { "packedfactors", 0, FUNC_PACKEDFACTORS, SPH_ATTR_FACTORS },
1820 { "factors", 0, FUNC_FACTORS, SPH_ATTR_FACTORS }, // just an alias for PACKEDFACTORS()
1821 { "bm25f", -2, FUNC_BM25F, SPH_ATTR_FLOAT },
1822 { "integer", 1, FUNC_INTEGER, SPH_ATTR_BIGINT },
1823 { "double", 1, FUNC_DOUBLE, SPH_ATTR_FLOAT },
1824 { "length", 1, FUNC_LENGTH, SPH_ATTR_INTEGER },
1825 { "least", 1, FUNC_LEAST, SPH_ATTR_STRINGPTR },
1826 { "greatest", 1, FUNC_GREATEST, SPH_ATTR_STRINGPTR },
1827 { "uint", 1, FUNC_UINT, SPH_ATTR_INTEGER },
1828
1829 { "curtime", 0, FUNC_CURTIME, SPH_ATTR_STRINGPTR },
1830 { "utc_time", 0, FUNC_UTC_TIME, SPH_ATTR_STRINGPTR },
1831 { "utc_timestamp", 0, FUNC_UTC_TIMESTAMP, SPH_ATTR_STRINGPTR },
1832 { "timediff", 2, FUNC_TIMEDIFF, SPH_ATTR_STRINGPTR },
1833 { "current_user", 0, FUNC_CURRENT_USER, SPH_ATTR_INTEGER },
1834 { "connection_id", 0, FUNC_CONNECTION_ID, SPH_ATTR_INTEGER },
1835 { "all", -1, FUNC_ALL, SPH_ATTR_INTEGER },
1836 { "any", -1, FUNC_ANY, SPH_ATTR_INTEGER },
1837 { "indexof", -1, FUNC_INDEXOF, SPH_ATTR_BIGINT },
1838
1839 { "min_top_weight", 0, FUNC_MIN_TOP_WEIGHT, SPH_ATTR_INTEGER },
1840 { "min_top_sortval", 0, FUNC_MIN_TOP_SORTVAL, SPH_ATTR_FLOAT },
1841
1842 { "atan2", 2, FUNC_ATAN2, SPH_ATTR_FLOAT }
1843 };
1844
1845
1846 // helper to generate input data for gperf
1847 // run this, run gperf, that will generate a C program
1848 // copy dAsso from asso_values in that C source
1849 // modify iHash switch according to that C source, if needed
1850 // copy dIndexes from the program output
1851 #if 0
1852 int HashGen()
1853 {
1854 printf ( "struct func { char *name; int num; };\n%%%%\n" );
1855 for ( int i=0; i<int( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0] )); i++ )
1856 printf ( "%s, %d\n", g_dFuncs[i].m_sName, i );
1857 printf ( "%%%%\n" );
1858 printf ( "void main()\n" );
1859 printf ( "{\n" );
1860 printf ( "\tint i;\n" );
1861 printf ( "\tfor ( i=0; i<=MAX_HASH_VALUE; i++ )\n" );
1862 printf ( "\t\tprintf ( \"%%d,%%s\", wordlist[i].name[0] ? wordlist[i].num : -1, (i%%10)==9 ? \"\\n\" : \" \" );\n" );
1863 printf ( "}\n" );
1864 printf ( "// gperf -Gt 1.p > 1.c\n" );
1865 exit ( 0 );
1866 return 0;
1867 }
1868
1869 static int G_HASHGEN = HashGen();
1870 #endif
1871
1872
1873 // FIXME? can remove this by preprocessing the assoc table
FuncHashLower(BYTE u)1874 static inline BYTE FuncHashLower ( BYTE u )
1875 {
1876 return ( u>='A' && u<='Z' ) ? ( u | 0x20 ) : u;
1877 }
1878
1879
FuncHashLookup(const char * sKey)1880 static int FuncHashLookup ( const char * sKey )
1881 {
1882 assert ( sKey && sKey[0] );
1883
1884 static BYTE dAsso[] =
1885 {
1886 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1887 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1888 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1889 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1890 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1891 10, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1892 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1893 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1894 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1895 109, 109, 109, 109, 109, 5, 109, 35, 0, 0,
1896 50, 5, 20, 30, 109, 10, 109, 109, 5, 0,
1897 10, 15, 5, 25, 0, 55, 0, 0, 109, 21,
1898 45, 20, 0, 109, 109, 109, 109, 109, 109, 109,
1899 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1900 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1901 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1902 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1903 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1904 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1905 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1906 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1907 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1908 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1909 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1910 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
1911 109, 109, 109, 109, 109, 109
1912 };
1913
1914 const BYTE * s = (const BYTE*) sKey;
1915 int iHash = strlen ( sKey );
1916 switch ( iHash )
1917 {
1918 default: iHash += dAsso [ FuncHashLower ( s[2] ) ];
1919 case 2: iHash += dAsso [ FuncHashLower ( s[1] ) ];
1920 case 1: iHash += dAsso [ FuncHashLower ( s[0] ) ];
1921 }
1922
1923 static int dIndexes[] =
1924 {
1925 -1, -1, -1, -1, -1, 13, -1, 48, 49, 26,
1926 30, -1, 52, 50, -1, 41, 29, 6, 51, 2,
1927 -1, -1, 28, 20, 47, -1, 44, 42, 27, 37,
1928 16, 33, 24, 35, 57, 58, -1, 36, 53, 14,
1929 -1, -1, -1, 46, 22, 3, 11, -1, 54, 0,
1930 45, -1, -1, 39, 7, 8, 38, 31, 9, 34,
1931 -1, -1, 40, -1, 17, 32, -1, -1, 55, 18,
1932 -1, 43, 19, 5, 23, 59, -1, 56, 4, 12,
1933 -1, -1, -1, 21, 10, -1, -1, -1, -1, 25,
1934 -1, -1, -1, 1, -1, -1, -1, -1, -1, -1,
1935 -1, -1, -1, -1, -1, -1, -1, -1, 15
1936 };
1937
1938 if ( iHash<0 || iHash>=(int)(sizeof(dIndexes)/sizeof(dIndexes[0])) )
1939 return -1;
1940
1941 int iFunc = dIndexes[iHash];
1942 if ( iFunc>=0 && strcasecmp ( g_dFuncs[iFunc].m_sName, sKey )==0 )
1943 return iFunc;
1944 return -1;
1945 }
1946
1947
FuncHashCheck()1948 static int FuncHashCheck()
1949 {
1950 for ( int i=0; i<(int)(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])); i++ )
1951 {
1952 CSphString sKey ( g_dFuncs[i].m_sName );
1953 sKey.ToLower();
1954 if ( FuncHashLookup ( sKey.cstr() )!=i )
1955 sphDie ( "INTERNAL ERROR: lookup for %s() failed, rebuild function hash", sKey.cstr() );
1956 sKey.ToUpper();
1957 if ( FuncHashLookup ( sKey.cstr() )!=i )
1958 sphDie ( "INTERNAL ERROR: lookup for %s() failed, rebuild function hash", sKey.cstr() );
1959 if ( g_dFuncs[i].m_eFunc!=i )
1960 sphDie ( "INTERNAL ERROR: function hash entry %s() at index %d maps to Func_e entry %d, sync Func_e and g_dFuncs",
1961 sKey.cstr(), i, g_dFuncs[i].m_eFunc );
1962 }
1963 if ( FuncHashLookup("A")!=-1 )
1964 sphDie ( "INTERNAL ERROR: lookup for A() succeeded, rebuild function hash" );
1965 return 1;
1966 }
1967
1968
1969 static int G_FUNC_HASH_CHECK = FuncHashCheck();
1970
1971 //////////////////////////////////////////////////////////////////////////
1972
1973 /// check whether the type is numeric
IsNumeric(ESphAttr eType)1974 static inline bool IsNumeric ( ESphAttr eType )
1975 {
1976 return eType==SPH_ATTR_INTEGER || eType==SPH_ATTR_BIGINT || eType==SPH_ATTR_FLOAT;
1977 }
1978
1979 /// check for type based on int value
GetIntType(int64_t iValue)1980 static inline ESphAttr GetIntType ( int64_t iValue )
1981 {
1982 return ( iValue>=(int64_t)INT_MIN && iValue<=(int64_t)INT_MAX ) ? SPH_ATTR_INTEGER : SPH_ATTR_BIGINT;
1983 }
1984
1985 /// get the widest numeric type of the two
WidestType(ESphAttr a,ESphAttr b)1986 static inline ESphAttr WidestType ( ESphAttr a, ESphAttr b )
1987 {
1988 assert ( IsNumeric(a) && IsNumeric(b) );
1989 if ( a==SPH_ATTR_FLOAT || b==SPH_ATTR_FLOAT )
1990 return SPH_ATTR_FLOAT;
1991 if ( a==SPH_ATTR_BIGINT || b==SPH_ATTR_BIGINT )
1992 return SPH_ATTR_BIGINT;
1993 return SPH_ATTR_INTEGER;
1994 }
1995
1996 /// list of constants
1997 class ConstList_c
1998 {
1999 public:
2000 CSphVector<int64_t> m_dInts; ///< dword/int64 storage
2001 CSphVector<float> m_dFloats; ///< float storage
2002 ESphAttr m_eRetType; ///< SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_STRING, or SPH_ATTR_FLOAT
2003 CSphString m_sExpr; ///< m_sExpr copy for TOK_CONST_STRING evaluation
2004
2005 public:
ConstList_c()2006 ConstList_c ()
2007 : m_eRetType ( SPH_ATTR_INTEGER )
2008 {}
2009
Add(int64_t iValue)2010 void Add ( int64_t iValue )
2011 {
2012 if ( m_eRetType==SPH_ATTR_FLOAT )
2013 {
2014 m_dFloats.Add ( (float)iValue );
2015 } else
2016 {
2017 m_eRetType = WidestType ( m_eRetType, GetIntType ( iValue ) );
2018 m_dInts.Add ( iValue );
2019 }
2020 }
2021
Add(float fValue)2022 void Add ( float fValue )
2023 {
2024 if ( m_eRetType!=SPH_ATTR_FLOAT )
2025 {
2026 assert ( m_dFloats.GetLength()==0 );
2027 ARRAY_FOREACH ( i, m_dInts )
2028 m_dFloats.Add ( (float)m_dInts[i] );
2029 m_dInts.Reset ();
2030 m_eRetType = SPH_ATTR_FLOAT;
2031 }
2032 m_dFloats.Add ( fValue );
2033 }
2034 };
2035
2036
2037 /// {title=2, body=1}
2038 /// {in=deg, out=mi}
2039 /// argument to functions like BM25F() and GEODIST()
2040 class MapArg_c
2041 {
2042 public:
2043 CSphVector<CSphNamedVariant> m_dPairs;
2044
2045 public:
Add(const char * sKey,const char * sValue,int64_t iValue)2046 void Add ( const char * sKey, const char * sValue, int64_t iValue )
2047 {
2048 CSphNamedVariant & t = m_dPairs.Add();
2049 t.m_sKey = sKey;
2050 if ( sValue )
2051 t.m_sValue = sValue;
2052 else
2053 t.m_iValue = (int)iValue;
2054 }
2055 };
2056
2057
2058 /// expression tree node
2059 /// used to build an AST (Abstract Syntax Tree)
2060 struct ExprNode_t
2061 {
2062 int m_iToken; ///< token type, including operators
2063 ESphAttr m_eRetType; ///< result type
2064 ESphAttr m_eArgType; ///< args type
2065 CSphAttrLocator m_tLocator; ///< attribute locator, for TOK_ATTR type
2066 int m_iLocator; ///< index of attribute locator in schema
2067
2068 union
2069 {
2070 int64_t m_iConst; ///< constant value, for TOK_CONST_INT type
2071 float m_fConst; ///< constant value, for TOK_CONST_FLOAT type
2072 int m_iFunc; ///< built-in function id, for TOK_FUNC type
2073 int m_iArgs; ///< args count, for arglist (token==',') type
2074 ConstList_c * m_pConsts; ///< constants list, for TOK_CONST_LIST type
2075 MapArg_c * m_pMapArg; ///< map argument (maps name to const or name to expr), for TOK_MAP_ARG type
2076 const char * m_sIdent; ///< pointer to const char, for TOK_IDENT type
2077 SphAttr_t * m_pAttr; ///< pointer to 64-bit value, for TOK_ITERATOR type
2078 };
2079 int m_iLeft;
2080 int m_iRight;
2081
ExprNode_tExprNode_t2082 ExprNode_t () : m_iToken ( 0 ), m_eRetType ( SPH_ATTR_NONE ), m_eArgType ( SPH_ATTR_NONE ),
2083 m_iLocator ( -1 ), m_iLeft ( -1 ), m_iRight ( -1 ) {}
2084 };
2085
2086 struct StackNode_t
2087 {
2088 int m_iNode;
2089 int m_iLeft;
2090 int m_iRight;
2091 };
2092
2093 /// expression parser
2094 class ExprParser_t
2095 {
2096 friend int yylex ( YYSTYPE * lvalp, ExprParser_t * pParser );
2097 friend int yyparse ( ExprParser_t * pParser );
2098 friend void yyerror ( ExprParser_t * pParser, const char * sMessage );
2099
2100 public:
ExprParser_t(ISphExprHook * pHook,CSphQueryProfile * pProfiler,ESphCollation eCollation)2101 ExprParser_t ( ISphExprHook * pHook, CSphQueryProfile * pProfiler, ESphCollation eCollation )
2102 : m_pHook ( pHook )
2103 , m_pProfiler ( pProfiler )
2104 , m_bHasZonespanlist ( false )
2105 , m_uPackedFactorFlags ( SPH_FACTOR_DISABLE )
2106 , m_eEvalStage ( SPH_EVAL_FINAL ) // be default compute as late as possible
2107 , m_eCollation ( eCollation )
2108 {
2109 m_dGatherStack.Reserve ( 64 );
2110 }
2111
2112 ~ExprParser_t ();
2113 ISphExpr * Parse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError );
2114
2115 protected:
2116 int m_iParsed; ///< filled by yyparse() at the very end
2117 CSphString m_sLexerError;
2118 CSphString m_sParserError;
2119 CSphString m_sCreateError;
2120 ISphExprHook * m_pHook;
2121 CSphQueryProfile * m_pProfiler;
2122
2123 protected:
2124 ESphAttr GetWidestRet ( int iLeft, int iRight );
2125
2126 int AddNodeInt ( int64_t iValue );
2127 int AddNodeFloat ( float fValue );
2128 int AddNodeString ( int64_t iValue );
2129 int AddNodeAttr ( int iTokenType, uint64_t uAttrLocator );
2130 int AddNodeID ();
2131 int AddNodeWeight ();
2132 int AddNodeOp ( int iOp, int iLeft, int iRight );
2133 int AddNodeFunc ( int iFunc, int iFirst, int iSecond=-1, int iThird=-1, int iFourth=-1 );
2134 int AddNodeUdf ( int iCall, int iArg );
2135 int AddNodePF ( int iFunc, int iArg );
2136 int AddNodeConstlist ( int64_t iValue );
2137 int AddNodeConstlist ( float iValue );
2138 void AppendToConstlist ( int iNode, int64_t iValue );
2139 void AppendToConstlist ( int iNode, float iValue );
2140 int AddNodeUservar ( int iUservar );
2141 int AddNodeHookIdent ( int iID );
2142 int AddNodeHookFunc ( int iID, int iLeft );
2143 int AddNodeMapArg ( const char * sKey, const char * sValue, int64_t iValue );
2144 void AppendToMapArg ( int iNode, const char * sKey, const char * sValue, int64_t iValue );
2145 const char * Attr2Ident ( uint64_t uAttrLoc );
2146 int AddNodeJsonField ( uint64_t uAttrLocator, int iLeft );
2147 int AddNodeJsonSubkey ( int64_t iValue );
2148 int AddNodeDotNumber ( int64_t iValue );
2149 int AddNodeIdent ( const char * sKey, int iLeft );
2150
2151 private:
2152 const char * m_sExpr;
2153 const char * m_pCur;
2154 const char * m_pLastTokenStart;
2155 const ISphSchema * m_pSchema;
2156 CSphVector<ExprNode_t> m_dNodes;
2157 CSphVector<CSphString> m_dUservars;
2158 CSphVector<char*> m_dIdents;
2159 int m_iConstNow;
2160 CSphVector<StackNode_t> m_dGatherStack;
2161 CSphVector<UdfCall_t*> m_dUdfCalls;
2162
2163 public:
2164 bool m_bHasZonespanlist;
2165 DWORD m_uPackedFactorFlags;
2166 ESphEvalStage m_eEvalStage;
2167 ESphCollation m_eCollation;
2168
2169 private:
2170 int GetToken ( YYSTYPE * lvalp );
2171
2172 void GatherArgTypes ( int iNode, CSphVector<int> & dTypes );
2173 void GatherArgNodes ( int iNode, CSphVector<int> & dNodes );
2174 void GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes );
2175 template < typename T >
2176 void GatherArgT ( int iNode, T & FUNCTOR );
2177
2178 bool CheckForConstSet ( int iArgsNode, int iSkip );
2179 int ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp );
2180
2181 template < typename T >
2182 void WalkTree ( int iRoot, T & FUNCTOR );
2183
2184 void Optimize ( int iNode );
2185 void CanonizePass ( int iNode );
2186 void ConstantFoldPass ( int iNode );
2187 void VariousOptimizationsPass ( int iNode );
2188 void Dump ( int iNode );
2189
2190 ISphExpr * CreateTree ( int iNode );
2191 ISphExpr * CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
2192 ISphExpr * CreateInNode ( int iNode );
2193 ISphExpr * CreateLengthNode ( const ExprNode_t & tNode, ISphExpr * pLeft );
2194 ISphExpr * CreateGeodistNode ( int iArgs );
2195 ISphExpr * CreatePFNode ( int iArg );
2196 ISphExpr * CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
2197 ISphExpr * CreateUdfNode ( int iCall, ISphExpr * pLeft );
2198 ISphExpr * CreateExistNode ( const ExprNode_t & tNode );
2199 ISphExpr * CreateContainsNode ( const ExprNode_t & tNode );
2200 ISphExpr * CreateAggregateNode ( const ExprNode_t & tNode, ESphAggrFunc eFunc, ISphExpr * pLeft );
2201 ISphExpr * CreateForInNode ( int iNode );
2202 void FixupIterators ( int iNode, const char * sKey, SphAttr_t * pAttr );
2203
GetError() const2204 bool GetError () const { return !( m_sLexerError.IsEmpty() && m_sParserError.IsEmpty() && m_sCreateError.IsEmpty() ); }
2205 };
2206
2207 //////////////////////////////////////////////////////////////////////////
2208
2209 /// parse that numeric constant (e.g. "123", ".03")
ParseNumeric(YYSTYPE * lvalp,const char ** ppStr)2210 static int ParseNumeric ( YYSTYPE * lvalp, const char ** ppStr )
2211 {
2212 assert ( lvalp && ppStr && *ppStr );
2213
2214 // try float route
2215 char * pEnd = NULL;
2216 float fRes = (float) strtod ( *ppStr, &pEnd );
2217
2218 // try int route
2219 uint64_t uRes = 0; // unsigned overflow is better than signed overflow
2220 bool bInt = true;
2221 for ( const char * p=(*ppStr); p<pEnd; p++ && bInt )
2222 {
2223 if ( isdigit(*p) )
2224 uRes = uRes*10 + (int)( (*p)-'0' ); // FIXME! missing overflow check, missing octal/hex handling
2225 else
2226 bInt = false;
2227 }
2228
2229 // choose your destiny
2230 *ppStr = pEnd;
2231 if ( bInt )
2232 {
2233 lvalp->iConst = (int64_t)uRes;
2234 return TOK_CONST_INT;
2235 } else
2236 {
2237 lvalp->fConst = fRes;
2238 return TOK_CONST_FLOAT;
2239 }
2240 }
2241
2242 // used to store in 8 bytes in Bison lvalp variable
sphPackAttrLocator(const CSphAttrLocator & tLoc,int iLocator)2243 static uint64_t sphPackAttrLocator ( const CSphAttrLocator & tLoc, int iLocator )
2244 {
2245 assert ( iLocator>=0 && iLocator<=0x7fff );
2246 uint64_t uIndex = 0;
2247 uIndex = ( tLoc.m_iBitOffset<<16 ) + tLoc.m_iBitCount + ( (uint64_t)iLocator<<32 );
2248 if ( tLoc.m_bDynamic )
2249 uIndex |= ( U64C(1)<<63 );
2250
2251 return uIndex;
2252 }
2253
sphUnpackAttrLocator(uint64_t uIndex,ExprNode_t * pNode)2254 static void sphUnpackAttrLocator ( uint64_t uIndex, ExprNode_t * pNode )
2255 {
2256 assert ( pNode );
2257 pNode->m_tLocator.m_iBitOffset = (int)( ( uIndex>>16 ) & 0xffff );
2258 pNode->m_tLocator.m_iBitCount = (int)( uIndex & 0xffff );
2259 pNode->m_tLocator.m_bDynamic = ( ( uIndex & ( U64C(1)<<63 ) )!=0 );
2260
2261 pNode->m_iLocator = (int)( ( uIndex>>32 ) & 0x7fff );
2262 }
2263
ParseAttr(int iAttr,const char * sTok,YYSTYPE * lvalp)2264 int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
2265 {
2266 // check attribute type and width
2267 const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iAttr );
2268
2269 int iRes = -1;
2270 switch ( tCol.m_eAttrType )
2271 {
2272 case SPH_ATTR_FLOAT: iRes = TOK_ATTR_FLOAT; break;
2273 case SPH_ATTR_UINT32SET: iRes = TOK_ATTR_MVA32; break;
2274 case SPH_ATTR_INT64SET: iRes = TOK_ATTR_MVA64; break;
2275 case SPH_ATTR_STRING: iRes = TOK_ATTR_STRING; break;
2276 case SPH_ATTR_JSON: iRes = TOK_ATTR_JSON; break;
2277 case SPH_ATTR_JSON_FIELD: iRes = TOK_ATTR_JSON; break;
2278 case SPH_ATTR_FACTORS: iRes = TOK_ATTR_FACTORS; break;
2279 case SPH_ATTR_INTEGER:
2280 case SPH_ATTR_TIMESTAMP:
2281 case SPH_ATTR_BOOL:
2282 case SPH_ATTR_BIGINT:
2283 case SPH_ATTR_TOKENCOUNT:
2284 iRes = tCol.m_tLocator.IsBitfield() ? TOK_ATTR_BITS : TOK_ATTR_INT;
2285 break;
2286 default:
2287 m_sLexerError.SetSprintf ( "attribute '%s' is of unsupported type (type=%d)", sTok, tCol.m_eAttrType );
2288 return -1;
2289 }
2290
2291 lvalp->iAttrLocator = sphPackAttrLocator ( tCol.m_tLocator, iAttr );
2292 return iRes;
2293 }
2294
2295
2296 /// a lexer of my own
2297 /// returns token id and fills lvalp on success
2298 /// returns -1 and fills sError on failure
GetToken(YYSTYPE * lvalp)2299 int ExprParser_t::GetToken ( YYSTYPE * lvalp )
2300 {
2301 // skip whitespace, check eof
2302 while ( isspace ( *m_pCur ) ) m_pCur++;
2303 m_pLastTokenStart = m_pCur;
2304 if ( !*m_pCur ) return 0;
2305
2306 // check for constant
2307 if ( isdigit ( m_pCur[0] ) )
2308 return ParseNumeric ( lvalp, &m_pCur );
2309
2310 // check for field, function, or magic name
2311 if ( sphIsAttr ( m_pCur[0] )
2312 || ( m_pCur[0]=='@' && sphIsAttr ( m_pCur[1] ) && !isdigit ( m_pCur[1] ) ) )
2313 {
2314 // get token
2315 const char * pStart = m_pCur++;
2316 while ( sphIsAttr ( *m_pCur ) ) m_pCur++;
2317
2318 CSphString sTok;
2319 sTok.SetBinary ( pStart, m_pCur-pStart );
2320 CSphString sTokMixedCase = sTok;
2321 sTok.ToLower ();
2322
2323 // check for magic name
2324 if ( sTok=="@id" ) return TOK_ATID;
2325 if ( sTok=="@weight" ) return TOK_ATWEIGHT;
2326 if ( sTok=="id" ) return TOK_ID;
2327 if ( sTok=="weight" ) return TOK_WEIGHT;
2328 if ( sTok=="groupby" ) return TOK_GROUPBY;
2329 if ( sTok=="distinct" ) return TOK_DISTINCT;
2330 if ( sTok=="@geodist" )
2331 {
2332 int iGeodist = m_pSchema->GetAttrIndex("@geodist");
2333 if ( iGeodist==-1 )
2334 {
2335 m_sLexerError = "geoanchor is not set, @geodist expression unavailable";
2336 return -1;
2337 }
2338 const CSphAttrLocator & tLoc = m_pSchema->GetAttr ( iGeodist ).m_tLocator;
2339 lvalp->iAttrLocator = sphPackAttrLocator ( tLoc, iGeodist );
2340 return TOK_ATTR_FLOAT;
2341 }
2342
2343 // check for uservar
2344 if ( pStart[0]=='@' )
2345 {
2346 lvalp->iNode = m_dUservars.GetLength();
2347 m_dUservars.Add ( sTok );
2348 return TOK_USERVAR;
2349 }
2350
2351 // check for keyword
2352 if ( sTok=="and" ) { return TOK_AND; }
2353 if ( sTok=="or" ) { return TOK_OR; }
2354 if ( sTok=="not" ) { return TOK_NOT; }
2355 if ( sTok=="div" ) { return TOK_DIV; }
2356 if ( sTok=="mod" ) { return TOK_MOD; }
2357 if ( sTok=="for" ) { return TOK_FOR; }
2358 if ( sTok=="is" ) { return TOK_IS; }
2359 if ( sTok=="null" ) { return TOK_NULL; }
2360
2361 // in case someone used 'count' as a name for an attribute
2362 if ( sTok=="count" )
2363 {
2364 int iAttr = m_pSchema->GetAttrIndex ( "count" );
2365 if ( iAttr>=0 )
2366 ParseAttr ( iAttr, sTok.cstr(), lvalp );
2367 return TOK_COUNT;
2368 }
2369
2370 // check for attribute
2371 int iAttr = m_pSchema->GetAttrIndex ( sTok.cstr() );
2372 if ( iAttr>=0 )
2373 return ParseAttr ( iAttr, sTok.cstr(), lvalp );
2374
2375 // hook might replace built-in function
2376 int iHookFunc = -1;
2377 if ( m_pHook )
2378 iHookFunc = m_pHook->IsKnownFunc ( sTok.cstr() );
2379
2380 // check for function
2381 int iFunc = FuncHashLookup ( sTok.cstr() );
2382 if ( iFunc>=0 && iHookFunc==-1 )
2383 {
2384 assert ( !strcasecmp ( g_dFuncs[iFunc].m_sName, sTok.cstr() ) );
2385 lvalp->iFunc = iFunc;
2386 if ( iFunc==FUNC_IN )
2387 return TOK_FUNC_IN;
2388 if ( iFunc==FUNC_REMAP )
2389 return TOK_FUNC_REMAP;
2390 if ( iFunc==FUNC_PACKEDFACTORS || iFunc==FUNC_FACTORS )
2391 return TOK_FUNC_PF;
2392 return TOK_FUNC;
2393 }
2394
2395 // ask hook
2396 if ( m_pHook )
2397 {
2398 int iID = m_pHook->IsKnownIdent ( sTok.cstr() );
2399 if ( iID>=0 )
2400 {
2401 lvalp->iNode = iID;
2402 return TOK_HOOK_IDENT;
2403 }
2404
2405 iID = iHookFunc;
2406 if ( iID>=0 )
2407 {
2408 lvalp->iNode = iID;
2409 return TOK_HOOK_FUNC;
2410 }
2411 }
2412
2413 // check for UDF
2414 const PluginUDF_c * pUdf = (const PluginUDF_c *) sphPluginGet ( PLUGIN_FUNCTION, sTok.cstr() );
2415 if ( pUdf )
2416 {
2417 lvalp->iNode = m_dUdfCalls.GetLength();
2418 m_dUdfCalls.Add ( new UdfCall_t() );
2419 m_dUdfCalls.Last()->m_pUdf = pUdf;
2420 return TOK_UDF;
2421 }
2422
2423 // arbitrary identifier, then
2424 m_dIdents.Add ( sTokMixedCase.Leak() );
2425 lvalp->sIdent = m_dIdents.Last();
2426 return TOK_IDENT;
2427 }
2428
2429 // check for known operators, then
2430 switch ( *m_pCur )
2431 {
2432 case '+':
2433 case '-':
2434 case '*':
2435 case '/':
2436 case '(':
2437 case ')':
2438 case ',':
2439 case '&':
2440 case '|':
2441 case '%':
2442 case '{':
2443 case '}':
2444 case '[':
2445 case ']':
2446 case '`':
2447 return *m_pCur++;
2448
2449 case '<':
2450 m_pCur++;
2451 if ( *m_pCur=='>' ) { m_pCur++; return TOK_NE; }
2452 if ( *m_pCur=='=' ) { m_pCur++; return TOK_LTE; }
2453 return '<';
2454
2455 case '>':
2456 m_pCur++;
2457 if ( *m_pCur=='=' ) { m_pCur++; return TOK_GTE; }
2458 return '>';
2459
2460 case '=':
2461 m_pCur++;
2462 if ( *m_pCur=='=' ) m_pCur++;
2463 return TOK_EQ;
2464
2465 // special case for leading dots (float values without leading zero, JSON key names, etc)
2466 case '.':
2467 {
2468 int iBeg = (int)( m_pCur-m_sExpr+1 );
2469 bool bDigit = isdigit ( m_pCur[1] )!=0;
2470
2471 // handle dots followed by a digit
2472 // aka, a float value without leading zero
2473 if ( bDigit )
2474 {
2475 char * pEnd = NULL;
2476 float fValue = (float) strtod ( m_pCur, &pEnd );
2477 lvalp->fConst = fValue;
2478
2479 if ( pEnd && !sphIsAttr(*pEnd) )
2480 m_pCur = pEnd;
2481 else // fallback to subkey (e.g. ".1234a")
2482 bDigit = false;
2483 }
2484
2485 // handle dots followed by a non-digit
2486 // for cases like jsoncol.keyname
2487 if ( !bDigit )
2488 {
2489 m_pCur++;
2490 while ( isspace ( *m_pCur ) )
2491 m_pCur++;
2492 iBeg = (int)( m_pCur-m_sExpr );
2493 while ( sphIsAttr(*m_pCur) )
2494 m_pCur++;
2495 }
2496
2497 // return packed string after the dot
2498 int iLen = (int)( m_pCur-m_sExpr ) - iBeg;
2499 lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
2500 return bDigit ? TOK_DOT_NUMBER : TOK_SUBKEY;
2501 }
2502
2503 case '\'':
2504 case '"':
2505 {
2506 const char cEnd = *m_pCur;
2507 for ( const char * s = m_pCur+1; *s; s++ )
2508 {
2509 if ( *s==cEnd )
2510 {
2511 int iBeg = (int)( m_pCur-m_sExpr );
2512 int iLen = (int)( s-m_sExpr ) - iBeg + 1;
2513 lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
2514 m_pCur = s+1;
2515 return TOK_CONST_STRING;
2516
2517 } else if ( *s=='\\' )
2518 {
2519 s++;
2520 if ( !*s )
2521 break;
2522 }
2523 }
2524 m_sLexerError.SetSprintf ( "unterminated string constant near '%s'", m_pCur );
2525 return -1;
2526 }
2527 }
2528
2529 m_sLexerError.SetSprintf ( "unknown operator '%c' near '%s'", *m_pCur, m_pCur );
2530 return -1;
2531 }
2532
2533 /// is add/sub?
IsAddSub(const ExprNode_t * pNode)2534 static inline bool IsAddSub ( const ExprNode_t * pNode )
2535 {
2536 return pNode->m_iToken=='+' || pNode->m_iToken=='-';
2537 }
2538
2539 /// is unary operator?
IsUnary(const ExprNode_t * pNode)2540 static inline bool IsUnary ( const ExprNode_t * pNode )
2541 {
2542 return pNode->m_iToken==TOK_NEG || pNode->m_iToken==TOK_NOT;
2543 }
2544
2545 /// is arithmetic?
IsAri(const ExprNode_t * pNode)2546 static inline bool IsAri ( const ExprNode_t * pNode )
2547 {
2548 int iTok = pNode->m_iToken;
2549 return iTok=='+' || iTok=='-' || iTok=='*' || iTok=='/';
2550 }
2551
2552 /// is constant?
IsConst(const ExprNode_t * pNode)2553 static inline bool IsConst ( const ExprNode_t * pNode )
2554 {
2555 return pNode->m_iToken==TOK_CONST_INT || pNode->m_iToken==TOK_CONST_FLOAT;
2556 }
2557
2558 /// float value of a constant
FloatVal(const ExprNode_t * pNode)2559 static inline float FloatVal ( const ExprNode_t * pNode )
2560 {
2561 assert ( IsConst(pNode) );
2562 return pNode->m_iToken==TOK_CONST_INT
2563 ? (float)pNode->m_iConst
2564 : pNode->m_fConst;
2565 }
2566
CanonizePass(int iNode)2567 void ExprParser_t::CanonizePass ( int iNode )
2568 {
2569 if ( iNode<0 )
2570 return;
2571
2572 CanonizePass ( m_dNodes [ iNode ].m_iLeft );
2573 CanonizePass ( m_dNodes [ iNode ].m_iRight );
2574
2575 ExprNode_t * pRoot = &m_dNodes [ iNode ];
2576 ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
2577 ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
2578
2579 // canonize (expr op const), move const to the left
2580 if ( IsAri ( pRoot ) && !IsConst ( pLeft ) && IsConst ( pRight ) )
2581 {
2582 Swap ( pRoot->m_iLeft, pRoot->m_iRight );
2583 Swap ( pLeft, pRight );
2584
2585 // fixup (expr-const) to ((-const)+expr)
2586 if ( pRoot->m_iToken=='-' )
2587 {
2588 pRoot->m_iToken = '+';
2589 if ( pLeft->m_iToken==TOK_CONST_INT )
2590 pLeft->m_iConst *= -1;
2591 else
2592 pLeft->m_fConst *= -1;
2593 }
2594
2595 // fixup (expr/const) to ((1/const)*expr)
2596 if ( pRoot->m_iToken=='/' )
2597 {
2598 pRoot->m_iToken = '*';
2599 pLeft->m_fConst = 1.0f / FloatVal ( pLeft );
2600 pLeft->m_iToken = TOK_CONST_FLOAT;
2601 }
2602 }
2603
2604 // promote children constants
2605 if ( IsAri ( pRoot ) && IsAri ( pLeft ) && IsAddSub ( pLeft )==IsAddSub ( pRoot ) &&
2606 IsConst ( &m_dNodes [ pLeft->m_iLeft ] ) )
2607 {
2608 // ((const op lr) op2 right) gets replaced with (const op (lr op2/op right))
2609 // constant gets promoted one level up
2610 int iConst = pLeft->m_iLeft;
2611 pLeft->m_iLeft = pLeft->m_iRight;
2612 pLeft->m_iRight = pRoot->m_iRight; // (c op lr) -> (lr ... r)
2613
2614 switch ( pLeft->m_iToken )
2615 {
2616 case '+':
2617 case '*':
2618 // (c + lr) op r -> c + (lr op r)
2619 // (c * lr) op r -> c * (lr op r)
2620 Swap ( pLeft->m_iToken, pRoot->m_iToken );
2621 break;
2622
2623 case '-':
2624 // (c - lr) + r -> c - (lr - r)
2625 // (c - lr) - r -> c - (lr + r)
2626 pLeft->m_iToken = ( pRoot->m_iToken=='+' ? '-' : '+' );
2627 pRoot->m_iToken = '-';
2628 break;
2629
2630 case '/':
2631 // (c / lr) * r -> c * (r / lr)
2632 // (c / lr) / r -> c / (r * lr)
2633 Swap ( pLeft->m_iLeft, pLeft->m_iRight );
2634 pLeft->m_iToken = ( pRoot->m_iToken=='*' ) ? '/' : '*';
2635 break;
2636
2637 default:
2638 assert ( 0 && "internal error: unhandled op in left-const promotion" );
2639 }
2640
2641 pRoot->m_iRight = pRoot->m_iLeft;
2642 pRoot->m_iLeft = iConst;
2643
2644 pLeft = &m_dNodes [ pRoot->m_iLeft ];
2645 pRight = &m_dNodes [ pRoot->m_iRight ];
2646 }
2647
2648 // MySQL Workbench fixup
2649 if ( pRoot->m_iToken==TOK_FUNC && ( pRoot->m_iFunc==FUNC_CURRENT_USER || pRoot->m_iFunc==FUNC_CONNECTION_ID ) )
2650 {
2651 pRoot->m_iToken = TOK_CONST_INT;
2652 pRoot->m_iConst = 0;
2653 return;
2654 }
2655 }
2656
ConstantFoldPass(int iNode)2657 void ExprParser_t::ConstantFoldPass ( int iNode )
2658 {
2659 if ( iNode<0 )
2660 return;
2661
2662 ConstantFoldPass ( m_dNodes [ iNode ].m_iLeft );
2663 ConstantFoldPass ( m_dNodes [ iNode ].m_iRight );
2664
2665 ExprNode_t * pRoot = &m_dNodes [ iNode ];
2666 ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
2667 ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
2668
2669 // unary arithmetic expression with constant
2670 if ( IsUnary ( pRoot ) && IsConst ( pLeft ) )
2671 {
2672 if ( pLeft->m_iToken==TOK_CONST_INT )
2673 {
2674 switch ( pRoot->m_iToken )
2675 {
2676 case TOK_NEG: pRoot->m_iConst = -pLeft->m_iConst; break;
2677 case TOK_NOT: pRoot->m_iConst = !pLeft->m_iConst; break;
2678 default: assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
2679 }
2680
2681 } else
2682 {
2683 switch ( pRoot->m_iToken )
2684 {
2685 case TOK_NEG: pRoot->m_fConst = -pLeft->m_fConst; break;
2686 case TOK_NOT: pRoot->m_fConst = !pLeft->m_fConst; break;
2687 default: assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
2688 }
2689 }
2690
2691 pRoot->m_iToken = pLeft->m_iToken;
2692 pRoot->m_iLeft = -1;
2693 return;
2694 }
2695
2696 // arithmetic expression with constants
2697 if ( IsAri ( pRoot ) )
2698 {
2699 assert ( pLeft && pRight );
2700
2701 // optimize fully-constant expressions
2702 if ( IsConst ( pLeft ) && IsConst ( pRight ) )
2703 {
2704 if ( pLeft->m_iToken==TOK_CONST_INT && pRight->m_iToken==TOK_CONST_INT && pRoot->m_iToken!='/' )
2705 {
2706 switch ( pRoot->m_iToken )
2707 {
2708 case '+': pRoot->m_iConst = pLeft->m_iConst + pRight->m_iConst; break;
2709 case '-': pRoot->m_iConst = pLeft->m_iConst - pRight->m_iConst; break;
2710 case '*': pRoot->m_iConst = pLeft->m_iConst * pRight->m_iConst; break;
2711 default: assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
2712 }
2713 pRoot->m_iToken = TOK_CONST_INT;
2714
2715 } else
2716 {
2717 float fLeft = FloatVal ( pLeft );
2718 float fRight = FloatVal ( pRight );
2719 switch ( pRoot->m_iToken )
2720 {
2721 case '+': pRoot->m_fConst = fLeft + fRight; break;
2722 case '-': pRoot->m_fConst = fLeft - fRight; break;
2723 case '*': pRoot->m_fConst = fLeft * fRight; break;
2724 case '/': pRoot->m_fConst = fRight ? fLeft / fRight : 0.0f; break;
2725 default: assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
2726 }
2727 pRoot->m_iToken = TOK_CONST_FLOAT;
2728 }
2729 pRoot->m_iLeft = -1;
2730 pRoot->m_iRight = -1;
2731 return;
2732 }
2733
2734 // optimize compatible operations with constants
2735 if ( IsConst ( pLeft ) && IsAri ( pRight ) && IsAddSub ( pRoot )==IsAddSub ( pRight ) &&
2736 IsConst ( &m_dNodes [ pRight->m_iLeft ] ) )
2737 {
2738 ExprNode_t * pConst = &m_dNodes [ pRight->m_iLeft ];
2739 ExprNode_t * pExpr = &m_dNodes [ pRight->m_iRight ];
2740 assert ( !IsConst ( pExpr ) ); // must had been optimized
2741
2742 // optimize (left op (const op2 expr)) to ((left op const) op*op2 expr)
2743 if ( IsAddSub ( pRoot ) )
2744 {
2745 // fold consts
2746 int iSign = ( ( pRoot->m_iToken=='+' ) ? 1 : -1 );
2747 if ( pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
2748 {
2749 pLeft->m_iConst += iSign*pConst->m_iConst;
2750 } else
2751 {
2752 pLeft->m_fConst = FloatVal ( pLeft ) + iSign*FloatVal ( pConst );
2753 pLeft->m_iToken = TOK_CONST_FLOAT;
2754 }
2755
2756 // fold ops
2757 pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '+' : '-';
2758
2759 } else
2760 {
2761 // fols consts
2762 if ( pRoot->m_iToken=='*' && pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
2763 {
2764 pLeft->m_iConst *= pConst->m_iConst;
2765 } else
2766 {
2767 if ( pRoot->m_iToken=='*' )
2768 pLeft->m_fConst = FloatVal ( pLeft ) * FloatVal ( pConst );
2769 else
2770 pLeft->m_fConst = FloatVal ( pLeft ) / FloatVal ( pConst );
2771 pLeft->m_iToken = TOK_CONST_FLOAT;
2772 }
2773
2774 // fold ops
2775 pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '*' : '/';
2776 }
2777
2778 // promote expr arg
2779 pRoot->m_iRight = pRight->m_iRight;
2780 pRight = pExpr;
2781 }
2782 }
2783
2784 // unary function from a constant
2785 if ( pRoot->m_iToken==TOK_FUNC && g_dFuncs [ pRoot->m_iFunc ].m_iArgs==1 && IsConst ( pLeft ) )
2786 {
2787 float fArg = pLeft->m_iToken==TOK_CONST_FLOAT ? pLeft->m_fConst : float ( pLeft->m_iConst );
2788 switch ( pRoot->m_iFunc )
2789 {
2790 case FUNC_ABS:
2791 pRoot->m_iToken = pLeft->m_iToken;
2792 pRoot->m_iLeft = -1;
2793 if ( pLeft->m_iToken==TOK_CONST_INT )
2794 pRoot->m_iConst = IABS ( pLeft->m_iConst );
2795 else
2796 pRoot->m_fConst = (float)fabs ( fArg );
2797 break;
2798 case FUNC_CEIL: pRoot->m_iToken = TOK_CONST_INT; pRoot->m_iLeft = -1; pRoot->m_iConst = (int)ceil ( fArg ); break;
2799 case FUNC_FLOOR: pRoot->m_iToken = TOK_CONST_INT; pRoot->m_iLeft = -1; pRoot->m_iConst = (int)floor ( fArg ); break;
2800 case FUNC_SIN: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( sin ( fArg) ); break;
2801 case FUNC_COS: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( cos ( fArg ) ); break;
2802 case FUNC_LN: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float) log(fArg) : 0.0f; break;
2803 case FUNC_LOG2: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)( log(fArg)*M_LOG2E ) : 0.0f; break;
2804 case FUNC_LOG10: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)( log(fArg)*M_LOG10E ) : 0.0f; break;
2805 case FUNC_EXP: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( exp ( fArg ) ); break;
2806 case FUNC_SQRT: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)sqrt(fArg) : 0.0f; break;
2807 default: break;
2808 }
2809 return;
2810 }
2811
2812 // constant function (such as NOW())
2813 if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_NOW )
2814 {
2815 pRoot->m_iToken = TOK_CONST_INT;
2816 pRoot->m_iConst = m_iConstNow;
2817 }
2818 }
2819
VariousOptimizationsPass(int iNode)2820 void ExprParser_t::VariousOptimizationsPass ( int iNode )
2821 {
2822 if ( iNode<0 )
2823 return;
2824
2825 VariousOptimizationsPass ( m_dNodes [ iNode ].m_iLeft );
2826 VariousOptimizationsPass ( m_dNodes [ iNode ].m_iRight );
2827
2828 ExprNode_t * pRoot = &m_dNodes [ iNode ];
2829 ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
2830 ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
2831
2832 // madd, mul3
2833 // FIXME! separate pass for these? otherwise (2+(a*b))+3 won't get const folding
2834 if ( ( pRoot->m_iToken=='+' || pRoot->m_iToken=='*' ) && ( pLeft->m_iToken=='*' || pRight->m_iToken=='*' ) )
2835 {
2836 if ( pLeft->m_iToken!='*' )
2837 {
2838 Swap ( pRoot->m_iLeft, pRoot->m_iRight );
2839 Swap ( pLeft, pRight );
2840 }
2841
2842 pLeft->m_iToken = ',';
2843
2844 int iLeft = pRoot->m_iLeft;
2845 int iRight = pRoot->m_iRight;
2846
2847 pRoot->m_iFunc = ( pRoot->m_iToken=='+' ) ? FUNC_MADD : FUNC_MUL3;
2848 pRoot->m_iToken = TOK_FUNC;
2849 pRoot->m_iLeft = m_dNodes.GetLength();
2850 pRoot->m_iRight = -1;
2851
2852 ExprNode_t & tArgs = m_dNodes.Add(); // invalidates all pointers!
2853 tArgs.m_iToken = ',';
2854 tArgs.m_iLeft = iLeft;
2855 tArgs.m_iRight = iRight;
2856 return;
2857 }
2858
2859 // division by a constant (replace with multiplication by inverse)
2860 if ( pRoot->m_iToken=='/' && pRight->m_iToken==TOK_CONST_FLOAT )
2861 {
2862 pRight->m_fConst = 1.0f / pRight->m_fConst;
2863 pRoot->m_iToken = '*';
2864 return;
2865 }
2866
2867
2868 // SINT(int-attr)
2869 if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_SINT )
2870 {
2871 assert ( pLeft );
2872
2873 if ( pLeft->m_iToken==TOK_ATTR_INT || pLeft->m_iToken==TOK_ATTR_BITS )
2874 {
2875 pRoot->m_iToken = TOK_ATTR_SINT;
2876 pRoot->m_tLocator = pLeft->m_tLocator;
2877 pRoot->m_iLeft = -1;
2878 }
2879 }
2880 }
2881
2882 /// optimize subtree
Optimize(int iNode)2883 void ExprParser_t::Optimize ( int iNode )
2884 {
2885 CanonizePass ( iNode );
2886 ConstantFoldPass ( iNode );
2887 VariousOptimizationsPass ( iNode );
2888 }
2889
2890
2891 // debug dump
Dump(int iNode)2892 void ExprParser_t::Dump ( int iNode )
2893 {
2894 if ( iNode<0 )
2895 return;
2896
2897 ExprNode_t & tNode = m_dNodes[iNode];
2898 switch ( tNode.m_iToken )
2899 {
2900 case TOK_CONST_INT:
2901 printf ( INT64_FMT, tNode.m_iConst );
2902 break;
2903
2904 case TOK_CONST_FLOAT:
2905 printf ( "%f", tNode.m_fConst );
2906 break;
2907
2908 case TOK_ATTR_INT:
2909 case TOK_ATTR_SINT:
2910 printf ( "row[%d]", tNode.m_tLocator.m_iBitOffset/32 );
2911 break;
2912
2913 default:
2914 printf ( "(" );
2915 Dump ( tNode.m_iLeft );
2916 printf ( ( tNode.m_iToken<256 ) ? " %c " : " op-%d ", tNode.m_iToken );
2917 Dump ( tNode.m_iRight );
2918 printf ( ")" );
2919 break;
2920 }
2921 }
2922
2923
2924 /// fold arglist into array
FoldArglist(ISphExpr * pLeft,CSphVector<ISphExpr * > & dArgs)2925 static void FoldArglist ( ISphExpr * pLeft, CSphVector<ISphExpr *> & dArgs )
2926 {
2927 if ( !pLeft || !pLeft->IsArglist() )
2928 {
2929 dArgs.Add ( pLeft );
2930 return;
2931 }
2932
2933 Expr_Arglist_c * pArgs = (Expr_Arglist_c *)pLeft;
2934 Swap ( dArgs, pArgs->m_dArgs );
2935 SafeRelease ( pLeft );
2936 }
2937
2938
2939 typedef sphinx_int64_t ( *UdfInt_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
2940 typedef double ( *UdfDouble_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
2941 typedef char * ( *UdfCharptr_fn) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
2942
2943
2944 class Expr_Udf_c : public ISphExpr
2945 {
2946 public:
2947 CSphVector<ISphExpr*> m_dArgs;
2948 CSphVector<int> m_dArgs2Free;
2949
2950 protected:
2951 UdfCall_t * m_pCall;
2952 mutable CSphVector<int64_t> m_dArgvals;
2953 mutable char m_bError;
2954 CSphQueryProfile * m_pProfiler;
2955
2956 public:
Expr_Udf_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)2957 explicit Expr_Udf_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
2958 : m_pCall ( pCall )
2959 , m_bError ( 0 )
2960 , m_pProfiler ( pProfiler )
2961 {
2962 SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
2963
2964 assert ( tArgs.arg_values==NULL );
2965 tArgs.arg_values = new char * [ tArgs.arg_count ];
2966 tArgs.str_lengths = new int [ tArgs.arg_count ];
2967
2968 m_dArgs2Free = pCall->m_dArgs2Free;
2969 m_dArgvals.Resize ( tArgs.arg_count );
2970 ARRAY_FOREACH ( i, m_dArgvals )
2971 tArgs.arg_values[i] = (char*) &m_dArgvals[i];
2972 }
2973
~Expr_Udf_c()2974 ~Expr_Udf_c ()
2975 {
2976 if ( m_pCall->m_pUdf->m_fnDeinit )
2977 m_pCall->m_pUdf->m_fnDeinit ( &m_pCall->m_tInit );
2978 SafeDelete ( m_pCall );
2979
2980 ARRAY_FOREACH ( i, m_dArgs )
2981 SafeRelease ( m_dArgs[i] );
2982 }
2983
FillArgs(const CSphMatch & tMatch) const2984 void FillArgs ( const CSphMatch & tMatch ) const
2985 {
2986 // FIXME? a cleaner way to reinterpret?
2987 SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
2988 ARRAY_FOREACH ( i, m_dArgs )
2989 {
2990 switch ( tArgs.arg_types[i] )
2991 {
2992 case SPH_UDF_TYPE_UINT32: *(DWORD*)&m_dArgvals[i] = m_dArgs[i]->IntEval ( tMatch ); break;
2993 case SPH_UDF_TYPE_INT64: m_dArgvals[i] = m_dArgs[i]->Int64Eval ( tMatch ); break;
2994 case SPH_UDF_TYPE_FLOAT: *(float*)&m_dArgvals[i] = m_dArgs[i]->Eval ( tMatch ); break;
2995 case SPH_UDF_TYPE_STRING: tArgs.str_lengths[i] = m_dArgs[i]->StringEval ( tMatch, (const BYTE**)&tArgs.arg_values[i] ); break;
2996 case SPH_UDF_TYPE_UINT32SET: tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
2997 case SPH_UDF_TYPE_UINT64SET: tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
2998 case SPH_UDF_TYPE_FACTORS: tArgs.arg_values[i] = (char*) m_dArgs[i]->FactorEval ( tMatch ); break;
2999 default: assert ( 0 ); m_dArgvals[i] = 0; break;
3000 }
3001 }
3002 }
3003
FreeArgs() const3004 void FreeArgs() const
3005 {
3006 ARRAY_FOREACH ( i, m_dArgs2Free )
3007 {
3008 int iAttr = m_dArgs2Free[i];
3009 SafeDeleteArray ( m_pCall->m_tArgs.arg_values[iAttr] );
3010 }
3011 }
3012
Command(ESphExprCommand eCmd,void * pArg)3013 virtual void Command ( ESphExprCommand eCmd, void * pArg )
3014 {
3015 if ( eCmd==SPH_EXPR_GET_UDF )
3016 {
3017 *((bool*)pArg) = true;
3018 return;
3019 }
3020 ARRAY_FOREACH ( i, m_dArgs )
3021 m_dArgs[i]->Command ( eCmd, pArg );
3022 }
3023 };
3024
3025
3026 class Expr_UdfInt_c : public Expr_Udf_c
3027 {
3028 public:
Expr_UdfInt_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)3029 explicit Expr_UdfInt_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
3030 : Expr_Udf_c ( pCall, pProfiler )
3031 {
3032 assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_INTEGER || pCall->m_pUdf->m_eRetType==SPH_ATTR_BIGINT );
3033 }
3034
Int64Eval(const CSphMatch & tMatch) const3035 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
3036 {
3037 ESphQueryState eOld = SPH_QSTATE_TOTAL;
3038 if ( m_pProfiler )
3039 eOld = m_pProfiler->Switch ( SPH_QSTATE_EVAL_UDF );
3040
3041 if ( m_bError )
3042 {
3043 if ( m_pProfiler )
3044 m_pProfiler->Switch ( eOld );
3045 return 0;
3046 }
3047
3048 FillArgs ( tMatch );
3049 UdfInt_fn pFn = (UdfInt_fn) m_pCall->m_pUdf->m_fnFunc;
3050 int64_t iRes = pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
3051 FreeArgs();
3052
3053 if ( m_pProfiler )
3054 m_pProfiler->Switch ( eOld );
3055 return iRes;
3056 }
3057
IntEval(const CSphMatch & tMatch) const3058 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Int64Eval ( tMatch ); }
Eval(const CSphMatch & tMatch) const3059 virtual float Eval ( const CSphMatch & tMatch ) const { return (float) Int64Eval ( tMatch ); }
3060 };
3061
3062
3063 class Expr_UdfFloat_c : public Expr_Udf_c
3064 {
3065 public:
Expr_UdfFloat_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)3066 explicit Expr_UdfFloat_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
3067 : Expr_Udf_c ( pCall, pProfiler )
3068 {
3069 assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_FLOAT );
3070 }
3071
Eval(const CSphMatch & tMatch) const3072 virtual float Eval ( const CSphMatch & tMatch ) const
3073 {
3074 ESphQueryState eOld = SPH_QSTATE_TOTAL;
3075 if ( m_pProfiler )
3076 eOld = m_pProfiler->Switch ( SPH_QSTATE_EVAL_UDF );
3077
3078 if ( m_bError )
3079 {
3080 if ( m_pProfiler )
3081 m_pProfiler->Switch ( eOld );
3082 return 0;
3083 }
3084
3085 FillArgs ( tMatch );
3086 UdfDouble_fn pFn = (UdfDouble_fn) m_pCall->m_pUdf->m_fnFunc;
3087 float fRes = (float) pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
3088 FreeArgs();
3089
3090 if ( m_pProfiler )
3091 m_pProfiler->Switch ( eOld );
3092 return fRes;
3093 }
3094
IntEval(const CSphMatch & tMatch) const3095 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Eval ( tMatch ); }
Int64Eval(const CSphMatch & tMatch) const3096 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t) Eval ( tMatch ); }
3097 };
3098
3099
3100 class Expr_UdfStringptr_c : public Expr_Udf_c
3101 {
3102 public:
Expr_UdfStringptr_c(UdfCall_t * pCall,CSphQueryProfile * pProfiler)3103 explicit Expr_UdfStringptr_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
3104 : Expr_Udf_c ( pCall, pProfiler )
3105 {
3106 assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_STRINGPTR );
3107 }
3108
Eval(const CSphMatch &) const3109 virtual float Eval ( const CSphMatch & ) const
3110 {
3111 assert ( 0 && "internal error: stringptr udf evaluated as float" );
3112 return 0.0f;
3113 }
3114
IntEval(const CSphMatch &) const3115 virtual int IntEval ( const CSphMatch & ) const
3116 {
3117 assert ( 0 && "internal error: stringptr udf evaluated as int" );
3118 return 0;
3119 }
3120
Int64Eval(const CSphMatch &) const3121 virtual int64_t Int64Eval ( const CSphMatch & ) const
3122 {
3123 assert ( 0 && "internal error: stringptr udf evaluated as bigint" );
3124 return 0;
3125 }
3126
StringEval(const CSphMatch & tMatch,const BYTE ** ppStr) const3127 virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
3128 {
3129 ESphQueryState eOld = SPH_QSTATE_TOTAL;
3130 if ( m_pProfiler )
3131 eOld = m_pProfiler->Switch ( SPH_QSTATE_EVAL_UDF );
3132
3133 FillArgs ( tMatch );
3134 UdfCharptr_fn pFn = (UdfCharptr_fn) m_pCall->m_pUdf->m_fnFunc;
3135 char * pRes = pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError ); // owned now!
3136 *ppStr = (const BYTE*) pRes;
3137 int iLen = ( pRes ? strlen(pRes) : 0 );
3138 FreeArgs();
3139
3140 if ( m_pProfiler )
3141 m_pProfiler->Switch ( eOld );
3142
3143 return iLen;
3144 }
3145
IsStringPtr() const3146 virtual bool IsStringPtr() const
3147 {
3148 return true;
3149 }
3150 };
3151
3152
CreateUdfNode(int iCall,ISphExpr * pLeft)3153 ISphExpr * ExprParser_t::CreateUdfNode ( int iCall, ISphExpr * pLeft )
3154 {
3155 Expr_Udf_c * pRes = NULL;
3156 switch ( m_dUdfCalls[iCall]->m_pUdf->m_eRetType )
3157 {
3158 case SPH_ATTR_INTEGER:
3159 case SPH_ATTR_BIGINT:
3160 pRes = new Expr_UdfInt_c ( m_dUdfCalls[iCall], m_pProfiler );
3161 break;
3162 case SPH_ATTR_FLOAT:
3163 pRes = new Expr_UdfFloat_c ( m_dUdfCalls[iCall], m_pProfiler );
3164 break;
3165 case SPH_ATTR_STRINGPTR:
3166 pRes = new Expr_UdfStringptr_c ( m_dUdfCalls[iCall], m_pProfiler );
3167 break;
3168 default:
3169 m_sCreateError.SetSprintf ( "internal error: unhandled type %d in CreateUdfNode()", m_dUdfCalls[iCall]->m_pUdf->m_eRetType );
3170 break;
3171 }
3172 if ( pRes )
3173 {
3174 if ( pLeft )
3175 FoldArglist ( pLeft, pRes->m_dArgs );
3176 m_dUdfCalls[iCall] = NULL; // evaluator owns it now
3177 }
3178 return pRes;
3179 }
3180
3181
CreateExistNode(const ExprNode_t & tNode)3182 ISphExpr * ExprParser_t::CreateExistNode ( const ExprNode_t & tNode )
3183 {
3184 assert ( m_dNodes[tNode.m_iLeft].m_iToken==',' );
3185 int iAttrName = m_dNodes[tNode.m_iLeft].m_iLeft;
3186 int iAttrDefault = m_dNodes[tNode.m_iLeft].m_iRight;
3187 assert ( iAttrName>=0 && iAttrName<m_dNodes.GetLength()
3188 && iAttrDefault>=0 && iAttrDefault<m_dNodes.GetLength() );
3189
3190 int iNameStart = (int)( m_dNodes[iAttrName].m_iConst>>32 );
3191 int iNameLen = (int)( m_dNodes[iAttrName].m_iConst & 0xffffffffUL );
3192 // skip head and tail non attribute name symbols
3193 while ( m_sExpr[iNameStart]!='\0' && ( m_sExpr[iNameStart]=='\'' || m_sExpr[iNameStart]==' ' ) && iNameLen )
3194 {
3195 iNameStart++;
3196 iNameLen--;
3197 }
3198 while ( m_sExpr[iNameStart+iNameLen-1]!='\0'
3199 && ( m_sExpr[iNameStart+iNameLen-1]=='\'' || m_sExpr[iNameStart+iNameLen-1]==' ' )
3200 && iNameLen )
3201 {
3202 iNameLen--;
3203 }
3204
3205 if ( iNameLen<=0 )
3206 {
3207 m_sCreateError.SetSprintf ( "first EXIST() argument must be valid string" );
3208 return NULL;
3209 }
3210
3211 assert ( iNameStart>=0 && iNameLen>0 && iNameStart+iNameLen<=(int)strlen ( m_sExpr ) );
3212
3213 CSphString sAttr ( m_sExpr+iNameStart, iNameLen );
3214 sphColumnToLowercase ( const_cast<char *>( sAttr.cstr() ) );
3215 int iLoc = m_pSchema->GetAttrIndex ( sAttr.cstr() );
3216
3217 if ( iLoc>=0 )
3218 {
3219 const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iLoc );
3220 if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET || tCol.m_eAttrType==SPH_ATTR_STRING )
3221 {
3222 m_sCreateError = "MVA and STRING in EXIST() prohibited";
3223 return NULL;
3224 }
3225
3226 const CSphAttrLocator & tLoc = tCol.m_tLocator;
3227 if ( tNode.m_eRetType==SPH_ATTR_FLOAT )
3228 return new Expr_GetFloat_c ( tLoc, iLoc );
3229 else
3230 return new Expr_GetInt_c ( tLoc, iLoc );
3231 } else
3232 {
3233 if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
3234 return new Expr_GetIntConst_c ( (int)m_dNodes[iAttrDefault].m_iConst );
3235 else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
3236 return new Expr_GetInt64Const_c ( m_dNodes[iAttrDefault].m_iConst );
3237 else
3238 return new Expr_GetConst_c ( m_dNodes[iAttrDefault].m_fConst );
3239 }
3240 }
3241
3242 //////////////////////////////////////////////////////////////////////////
3243
3244 class Expr_Contains_c : public ISphExpr
3245 {
3246 protected:
3247 ISphExpr * m_pLat;
3248 ISphExpr * m_pLon;
3249
Contains(float x,float y,int n,const float * p)3250 static bool Contains ( float x, float y, int n, const float * p )
3251 {
3252 bool bIn = false;
3253 for ( int ii=0; ii<n; ii+=2 )
3254 {
3255 // get that edge
3256 float ax = p[ii];
3257 float ay = p[ii+1];
3258 float bx = ( ii==n-2 ) ? p[0] : p[ii+2];
3259 float by = ( ii==n-2 ) ? p[1] : p[ii+3];
3260
3261 // check point vs edge
3262 float t1 = (x-ax)*(by-ay);
3263 float t2 = (y-ay)*(bx-ax);
3264 if ( t1==t2 && !( ax==bx && ay==by ) )
3265 {
3266 // so AP and AB are colinear
3267 // because (AP dot (-AB.y, AB.x)) aka (t1-t2) is 0
3268 // check (AP dot AB) vs (AB dot AB) then
3269 float t3 = (x-ax)*(bx-ax) + (y-ay)*(by-ay); // AP dot AP
3270 float t4 = (bx-ax)*(bx-ax) + (by-ay)*(by-ay); // AB dot AB
3271 if ( t3>=0 && t3<=t4 )
3272 return true;
3273 }
3274
3275 // count edge crossings
3276 if ( ( ay>y )!=(by>y) )
3277 if ( ( t1<t2 ) ^ ( by<ay ) )
3278 bIn = !bIn;
3279 }
3280 return bIn;
3281 }
3282
3283 public:
Expr_Contains_c(ISphExpr * pLat,ISphExpr * pLon)3284 Expr_Contains_c ( ISphExpr * pLat, ISphExpr * pLon )
3285 : m_pLat ( pLat )
3286 , m_pLon ( pLon )
3287 {}
3288
~Expr_Contains_c()3289 ~Expr_Contains_c()
3290 {
3291 SafeRelease ( m_pLat );
3292 SafeRelease ( m_pLon );
3293 }
3294
Eval(const CSphMatch & tMatch) const3295 virtual float Eval ( const CSphMatch & tMatch ) const
3296 {
3297 return (float)IntEval ( tMatch );
3298 }
3299
Int64Eval(const CSphMatch & tMatch) const3300 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
3301 {
3302 return IntEval ( tMatch );
3303 }
3304
Command(ESphExprCommand eCmd,void * pArg)3305 virtual void Command ( ESphExprCommand eCmd, void * pArg )
3306 {
3307 m_pLat->Command ( eCmd, pArg );
3308 m_pLon->Command ( eCmd, pArg );
3309 }
3310
3311 // FIXME! implement SetStringPool?
3312 };
3313
3314 //////////////////////////////////////////////////////////////////////////
3315 // GEODISTANCE
3316 //////////////////////////////////////////////////////////////////////////
3317
3318 // conversions between degrees and radians
3319 static const double PI = 3.14159265358979323846;
3320 static const double TO_RAD = PI / 180.0;
3321 static const double TO_RAD2 = PI / 360.0;
3322 static const double TO_DEG = 180.0 / PI;
3323 static const float TO_RADF = (float)( PI / 180.0 );
3324 static const float TO_RADF2 = (float)( PI / 360.0 );
3325 static const float TO_DEGF = (float)( 180.0 / PI );
3326
3327 const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
3328 const int GEODIST_TABLE_ASIN = 512;
3329 const int GEODIST_TABLE_K = 1024;
3330
3331 static float g_GeoCos[GEODIST_TABLE_COS+1]; ///< cos(x) table
3332 static float g_GeoAsin[GEODIST_TABLE_ASIN+1]; ///< asin(sqrt(x)) table
3333 static float g_GeoFlatK[GEODIST_TABLE_K+1][2]; ///< GeodistAdaptive() flat ellipsoid method k1,k2 coeffs table
3334
3335
GeodistInit()3336 void GeodistInit()
3337 {
3338 for ( int i=0; i<=GEODIST_TABLE_COS; i++ )
3339 g_GeoCos[i] = (float)cos ( 2*PI*i/GEODIST_TABLE_COS ); // [0, 2pi] -> [0, COSTABLE]
3340
3341 for ( int i=0; i<=GEODIST_TABLE_ASIN; i++ )
3342 g_GeoAsin[i] = (float)asin ( sqrt ( double(i)/GEODIST_TABLE_ASIN ) ); // [0, 1] -> [0, ASINTABLE]
3343
3344 for ( int i=0; i<=GEODIST_TABLE_K; i++ )
3345 {
3346 double x = PI*i/GEODIST_TABLE_K - PI*0.5; // [-pi/2, pi/2] -> [0, KTABLE]
3347 g_GeoFlatK[i][0] = (float) sqr ( 111132.09 - 566.05*cos ( 2*x ) + 1.20*cos ( 4*x ) );
3348 g_GeoFlatK[i][1] = (float) sqr ( 111415.13*cos(x) - 94.55*cos ( 3*x ) + 0.12*cos ( 5*x ) );
3349 }
3350 }
3351
3352
GeodistSphereRad(float lat1,float lon1,float lat2,float lon2)3353 inline float GeodistSphereRad ( float lat1, float lon1, float lat2, float lon2 )
3354 {
3355 static const double D = 2*6384000;
3356 double dlat2 = 0.5*( lat1 - lat2 );
3357 double dlon2 = 0.5*( lon1 - lon2 );
3358 double a = sqr ( sin(dlat2) ) + cos(lat1)*cos(lat2)*sqr ( sin(dlon2) );
3359 double c = asin ( Min ( 1.0, sqrt(a) ) );
3360 return (float)(D*c);
3361 }
3362
3363
GeodistSphereDeg(float lat1,float lon1,float lat2,float lon2)3364 inline float GeodistSphereDeg ( float lat1, float lon1, float lat2, float lon2 )
3365 {
3366 static const double D = 2*6384000;
3367 double dlat2 = TO_RAD2*( lat1 - lat2 );
3368 double dlon2 = TO_RAD2*( lon1 - lon2 );
3369 double a = sqr ( sin(dlat2) ) + cos ( TO_RAD*lat1 )*cos ( TO_RAD*lat2 )*sqr ( sin(dlon2) );
3370 double c = asin ( Min ( 1.0, sqrt(a) ) );
3371 return (float)(D*c);
3372 }
3373
3374
GeodistDegDiff(float f)3375 static inline float GeodistDegDiff ( float f )
3376 {
3377 f = (float)fabs(f);
3378 while ( f>360 )
3379 f -= 360;
3380 if ( f>180 )
3381 f = 360-f;
3382 return f;
3383 }
3384
3385
GeodistFlatDeg(float fLat1,float fLon1,float fLat2,float fLon2)3386 float GeodistFlatDeg ( float fLat1, float fLon1, float fLat2, float fLon2 )
3387 {
3388 double c1 = cos ( TO_RAD2*( fLat1+fLat2 ) );
3389 double c2 = 2*c1*c1-1; // cos(2*t)
3390 double c3 = c1*(2*c2-1); // cos(3*t)
3391 double k1 = 111132.09 - 566.05*c2;
3392 double k2 = 111415.13*c1 - 94.55*c3;
3393 float dlat = GeodistDegDiff ( fLat1-fLat2 );
3394 float dlon = GeodistDegDiff ( fLon1-fLon2 );
3395 return (float)sqrt ( k1*k1*dlat*dlat + k2*k2*dlon*dlon );
3396 }
3397
3398
GeodistFastCos(float x)3399 static inline float GeodistFastCos ( float x )
3400 {
3401 float y = (float)(fabs(x)*GEODIST_TABLE_COS/PI/2);
3402 int i = int(y);
3403 y -= i;
3404 i &= ( GEODIST_TABLE_COS-1 );
3405 return g_GeoCos[i] + ( g_GeoCos[i+1]-g_GeoCos[i] )*y;
3406 }
3407
3408
GeodistFastSin(float x)3409 static inline float GeodistFastSin ( float x )
3410 {
3411 float y = float(fabs(x)*GEODIST_TABLE_COS/PI/2);
3412 int i = int(y);
3413 y -= i;
3414 i = ( i - GEODIST_TABLE_COS/4 ) & ( GEODIST_TABLE_COS-1 ); // cos(x-pi/2)=sin(x), costable/4=pi/2
3415 return g_GeoCos[i] + ( g_GeoCos[i+1]-g_GeoCos[i] )*y;
3416 }
3417
3418
3419 /// fast implementation of asin(sqrt(x))
3420 /// max error in floats 0.00369%, in doubles 0.00072%
GeodistFastAsinSqrt(float x)3421 static inline float GeodistFastAsinSqrt ( float x )
3422 {
3423 if ( x<0.122 )
3424 {
3425 // distance under 4546km, Taylor error under 0.00072%
3426 float y = (float)sqrt(x);
3427 return y + x*y*0.166666666666666f + x*x*y*0.075f + x*x*x*y*0.044642857142857f;
3428 }
3429 if ( x<0.948 )
3430 {
3431 // distance under 17083km, 512-entry LUT error under 0.00072%
3432 x *= GEODIST_TABLE_ASIN;
3433 int i = int(x);
3434 return g_GeoAsin[i] + ( g_GeoAsin[i+1] - g_GeoAsin[i] )*( x-i );
3435 }
3436 return (float)asin ( sqrt(x) ); // distance over 17083km, just compute honestly
3437 }
3438
3439
GeodistAdaptiveDeg(float lat1,float lon1,float lat2,float lon2)3440 inline float GeodistAdaptiveDeg ( float lat1, float lon1, float lat2, float lon2 )
3441 {
3442 float dlat = GeodistDegDiff ( lat1-lat2 );
3443 float dlon = GeodistDegDiff ( lon1-lon2 );
3444
3445 if ( dlon<13 )
3446 {
3447 // points are close enough; use flat ellipsoid model
3448 // interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint
3449 float m = ( lat1+lat2+180 )*GEODIST_TABLE_K/360; // [-90, 90] degrees -> [0, KTABLE] indexes
3450 int i = int(m);
3451 i &= ( GEODIST_TABLE_K-1 );
3452 float kk1 = g_GeoFlatK[i][0] + ( g_GeoFlatK[i+1][0] - g_GeoFlatK[i][0] )*( m-i );
3453 float kk2 = g_GeoFlatK[i][1] + ( g_GeoFlatK[i+1][1] - g_GeoFlatK[i][1] )*( m-i );
3454 return (float)sqrt ( kk1*dlat*dlat + kk2*dlon*dlon );
3455 } else
3456 {
3457 // points too far away; use haversine
3458 static const float D = 2*6371000;
3459 float a = fsqr ( GeodistFastSin ( dlat*TO_RADF2 ) ) + GeodistFastCos ( lat1*TO_RADF ) * GeodistFastCos ( lat2*TO_RADF ) * fsqr ( GeodistFastSin ( dlon*TO_RADF2 ) );
3460 return (float)( D*GeodistFastAsinSqrt(a) );
3461 }
3462 }
3463
3464
GeodistAdaptiveRad(float lat1,float lon1,float lat2,float lon2)3465 inline float GeodistAdaptiveRad ( float lat1, float lon1, float lat2, float lon2 )
3466 {
3467 // cut-paste-optimize, maybe?
3468 return GeodistAdaptiveDeg ( lat1*TO_DEGF, lon1*TO_DEGF, lat2*TO_DEGF, lon2*TO_DEGF );
3469 }
3470
3471
GeoTesselate(CSphVector<float> & dIn)3472 static inline void GeoTesselate ( CSphVector<float> & dIn )
3473 {
3474 // 1 minute of latitude, max
3475 // (it varies from 1842.9 to 1861.57 at 0 to 90 respectively)
3476 static const float LAT_MINUTE = 1861.57f;
3477
3478 // 1 minute of longitude in metres, at different latitudes
3479 static const float LON_MINUTE[] =
3480 {
3481 1855.32f, 1848.31f, 1827.32f, 1792.51f, // 0, 5, 10, 15
3482 1744.12f, 1682.50f, 1608.10f, 1521.47f, // 20, 25, 30, 35
3483 1423.23f, 1314.11f, 1194.93f, 1066.57f, // 40, 45, 50, 55
3484 930.00f, 786.26f, 636.44f, 481.70f, // 60, 65 70, 75
3485 323.22f, 162.24f, 0.0f // 80, 85, 90
3486 };
3487
3488 // tesselation threshold
3489 // FIXME! make this configurable?
3490 static const float TESSELATE_TRESH = 500000.0f; // 500 km, error under 150m or 0.03%
3491
3492 CSphVector<float> dOut;
3493 for ( int i=0; i<dIn.GetLength(); i+=2 )
3494 {
3495 // add the current vertex in any event
3496 dOut.Add ( dIn[i] );
3497 dOut.Add ( dIn[i+1] );
3498
3499 // get edge lat/lon, convert to radians
3500 bool bLast = ( i==dIn.GetLength()-2 );
3501 float fLat1 = dIn[i];
3502 float fLon1 = dIn[i+1];
3503 float fLat2 = dIn [ bLast ? 0 : (i+2) ];
3504 float fLon2 = dIn [ bLast ? 1 : (i+3) ];
3505
3506 // quick rough geodistance estimation
3507 float fMinLat = Min ( fLat1, fLat2 );
3508 int iLatBand = (int) floor ( fabs ( fMinLat ) / 5.0f );
3509 iLatBand = iLatBand % 18;
3510
3511 float d = (float) (60.0f*( LAT_MINUTE*fabs ( fLat1-fLat2 ) + LON_MINUTE [ iLatBand ]*fabs ( fLon1-fLon2 ) ) );
3512 if ( d<=TESSELATE_TRESH )
3513 continue;
3514
3515 // convert to radians
3516 // FIXME! make units configurable
3517 fLat1 *= TO_RADF;
3518 fLon1 *= TO_RADF;
3519 fLat2 *= TO_RADF;
3520 fLon2 *= TO_RADF;
3521
3522 // compute precise geodistance
3523 d = GeodistSphereRad ( fLat1, fLon1, fLat2, fLon2 );
3524 if ( d<=TESSELATE_TRESH )
3525 continue;
3526 int iSegments = (int) ceil ( d / TESSELATE_TRESH );
3527
3528 // compute arc distance
3529 // OPTIMIZE! maybe combine with CalcGeodist?
3530 d = (float)acos ( sin(fLat1)*sin(fLat2) + cos(fLat1)*cos(fLat2)*cos(fLon1-fLon2) );
3531 const float isd = (float)(1.0f / sin(d));
3532 const float clat1 = (float)cos(fLat1);
3533 const float slat1 = (float)sin(fLat1);
3534 const float clon1 = (float)cos(fLon1);
3535 const float slon1 = (float)sin(fLon1);
3536 const float clat2 = (float)cos(fLat2);
3537 const float slat2 = (float)sin(fLat2);
3538 const float clon2 = (float)cos(fLon2);
3539 const float slon2 = (float)sin(fLon2);
3540
3541 for ( int j=1; j<iSegments; j++ )
3542 {
3543 float f = float(j) / float(iSegments); // needed distance fraction
3544 float a = (float)sin ( (1-f)*d ) * isd;
3545 float b = (float)sin ( f*d ) * isd;
3546 float x = a*clat1*clon1 + b*clat2*clon2;
3547 float y = a*clat1*slon1 + b*clat2*slon2;
3548 float z = a*slat1 + b*slat2;
3549 dOut.Add ( (float)( TO_DEG * atan2 ( z, sqrt ( x*x+y*y ) ) ) );
3550 dOut.Add ( (float)( TO_DEG * atan2 ( y, x ) ) );
3551 }
3552 }
3553
3554 // swap 'em results
3555 dIn.SwapData ( dOut );
3556 }
3557
3558 //////////////////////////////////////////////////////////////////////////
3559
3560 class Expr_ContainsConstvec_c : public Expr_Contains_c
3561 {
3562 protected:
3563 CSphVector<float> m_dPoly;
3564 float m_fMinX;
3565 float m_fMinY;
3566 float m_fMaxX;
3567 float m_fMaxY;
3568
3569 public:
Expr_ContainsConstvec_c(ISphExpr * pLat,ISphExpr * pLon,const CSphVector<int> & dNodes,const ExprNode_t * pNodes,bool bGeoTesselate)3570 Expr_ContainsConstvec_c ( ISphExpr * pLat, ISphExpr * pLon, const CSphVector<int> & dNodes, const ExprNode_t * pNodes, bool bGeoTesselate )
3571 : Expr_Contains_c ( pLat, pLon )
3572 {
3573 // copy polygon data
3574 assert ( dNodes.GetLength()>=6 );
3575 m_dPoly.Resize ( dNodes.GetLength() );
3576
3577 ARRAY_FOREACH ( i, dNodes )
3578 m_dPoly[i] = FloatVal ( &pNodes[dNodes[i]] );
3579
3580 // handle (huge) geosphere polygons
3581 if ( bGeoTesselate )
3582 GeoTesselate ( m_dPoly );
3583
3584 // compute bbox
3585 m_fMinX = m_fMaxX = m_dPoly[0];
3586 for ( int i=2; i<m_dPoly.GetLength(); i+=2 )
3587 {
3588 m_fMinX = Min ( m_fMinX, m_dPoly[i] );
3589 m_fMaxX = Max ( m_fMaxX, m_dPoly[i] );
3590 }
3591
3592 m_fMinY = m_fMaxY = m_dPoly[1];
3593 for ( int i=3; i<m_dPoly.GetLength(); i+=2 )
3594 {
3595 m_fMinY = Min ( m_fMinY, m_dPoly[i] );
3596 m_fMaxY = Max ( m_fMaxY, m_dPoly[i] );
3597 }
3598 }
3599
IntEval(const CSphMatch & tMatch) const3600 virtual int IntEval ( const CSphMatch & tMatch ) const
3601 {
3602 // eval args, do bbox check
3603 float fLat = m_pLat->Eval(tMatch);
3604 if ( fLat<m_fMinX || fLat>m_fMaxX )
3605 return 0;
3606
3607 float fLon = m_pLon->Eval(tMatch);
3608 if ( fLon<m_fMinY || fLon>m_fMaxY )
3609 return 0;
3610
3611 // do the polygon check
3612 return Contains ( fLat, fLon, m_dPoly.GetLength(), m_dPoly.Begin() );
3613 }
3614 };
3615
3616
3617 class Expr_ContainsExprvec_c : public Expr_Contains_c
3618 {
3619 protected:
3620 mutable CSphVector<float> m_dPoly;
3621 CSphVector<ISphExpr*> m_dExpr;
3622
3623 public:
Expr_ContainsExprvec_c(ISphExpr * pLat,ISphExpr * pLon,CSphVector<ISphExpr * > dExprs)3624 Expr_ContainsExprvec_c ( ISphExpr * pLat, ISphExpr * pLon, CSphVector<ISphExpr*> dExprs )
3625 : Expr_Contains_c ( pLat, pLon )
3626 {
3627 m_dExpr.SwapData ( dExprs );
3628 m_dPoly.Resize ( m_dExpr.GetLength() );
3629 }
3630
~Expr_ContainsExprvec_c()3631 ~Expr_ContainsExprvec_c()
3632 {
3633 ARRAY_FOREACH ( i, m_dExpr )
3634 SafeRelease ( m_dExpr[i] );
3635 }
3636
IntEval(const CSphMatch & tMatch) const3637 virtual int IntEval ( const CSphMatch & tMatch ) const
3638 {
3639 ARRAY_FOREACH ( i, m_dExpr )
3640 m_dPoly[i] = m_dExpr[i]->Eval ( tMatch );
3641 return Contains ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_dPoly.GetLength(), m_dPoly.Begin() );
3642 }
3643 };
3644
3645
3646 class Expr_ContainsStrattr_c : public Expr_Contains_c
3647 {
3648 protected:
3649 ISphExpr * m_pStr;
3650 bool m_bGeo;
3651
3652 public:
Expr_ContainsStrattr_c(ISphExpr * pLat,ISphExpr * pLon,ISphExpr * pStr,bool bGeo)3653 Expr_ContainsStrattr_c ( ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pStr, bool bGeo )
3654 : Expr_Contains_c ( pLat, pLon )
3655 {
3656 m_pStr = pStr;
3657 m_bGeo = bGeo;
3658 }
3659
~Expr_ContainsStrattr_c()3660 ~Expr_ContainsStrattr_c()
3661 {
3662 SafeRelease ( m_pStr );
3663 }
3664
ParsePoly(const char * p,int iLen,CSphVector<float> & dPoly)3665 static void ParsePoly ( const char * p, int iLen, CSphVector<float> & dPoly )
3666 {
3667 const char * pMax = p+iLen;
3668 while ( p<pMax )
3669 {
3670 if ( isdigit(p[0]) || ( p+1<pMax && p[0]=='-' && isdigit(p[1]) ) )
3671 dPoly.Add ( (float)strtod ( p, (char**)&p ) );
3672 else
3673 p++;
3674 }
3675 }
3676
IntEval(const CSphMatch & tMatch) const3677 virtual int IntEval ( const CSphMatch & tMatch ) const
3678 {
3679 const char * pStr;
3680 assert ( !m_pStr->IsStringPtr() ); // aware of mem leaks caused by some StringEval implementations
3681 int iLen = m_pStr->StringEval ( tMatch, (const BYTE **)&pStr );
3682
3683 CSphVector<float> dPoly;
3684 ParsePoly ( pStr, iLen, dPoly );
3685 if ( dPoly.GetLength()<6 )
3686 return 0;
3687 // OPTIMIZE? add quick bbox check too?
3688
3689 if ( m_bGeo )
3690 GeoTesselate ( dPoly );
3691 return Contains ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), dPoly.GetLength(), dPoly.Begin() );
3692 }
3693
Command(ESphExprCommand eCmd,void * pArg)3694 virtual void Command ( ESphExprCommand eCmd, void * pArg )
3695 {
3696 Expr_Contains_c::Command ( eCmd, pArg );
3697 m_pStr->Command ( eCmd, pArg );
3698 }
3699 };
3700
3701
CreateContainsNode(const ExprNode_t & tNode)3702 ISphExpr * ExprParser_t::CreateContainsNode ( const ExprNode_t & tNode )
3703 {
3704 // get and check them args
3705 const ExprNode_t & tArglist = m_dNodes [ tNode.m_iLeft ];
3706 const int iPoly = m_dNodes [ tArglist.m_iLeft ].m_iLeft;
3707 const int iLat = m_dNodes [ tArglist.m_iLeft ].m_iRight;
3708 const int iLon = tArglist.m_iRight;
3709 assert ( IsNumeric ( m_dNodes[iLat].m_eRetType ) );
3710 assert ( IsNumeric ( m_dNodes[iLat].m_eRetType ) );
3711 assert ( m_dNodes[iPoly].m_eRetType==SPH_ATTR_POLY2D );
3712
3713 // create evaluator
3714 // gotta handle an optimized constant poly case
3715 CSphVector<int> dPolyArgs;
3716 GatherArgNodes ( m_dNodes[iPoly].m_iLeft, dPolyArgs );
3717
3718 bool bGeoTesselate = ( m_dNodes[iPoly].m_iToken==TOK_FUNC && m_dNodes[iPoly].m_iFunc==FUNC_GEOPOLY2D );
3719
3720 if ( dPolyArgs.GetLength()==1 && m_dNodes[dPolyArgs[0]].m_iToken==TOK_ATTR_STRING )
3721 {
3722 return new Expr_ContainsStrattr_c ( CreateTree(iLat), CreateTree(iLon),
3723 CreateTree ( dPolyArgs[0] ), bGeoTesselate );
3724 }
3725
3726 bool bConst = ARRAY_ALL ( bConst, dPolyArgs, IsConst ( &m_dNodes [ dPolyArgs[_all] ] ) );
3727 if ( bConst )
3728 {
3729 // POLY2D(numeric-consts)
3730 return new Expr_ContainsConstvec_c ( CreateTree(iLat), CreateTree(iLon),
3731 dPolyArgs, m_dNodes.Begin(), bGeoTesselate );
3732 } else
3733 {
3734 // POLY2D(generic-exprs)
3735 CSphVector<ISphExpr*> dExprs ( dPolyArgs.GetLength() );
3736 ARRAY_FOREACH ( i, dExprs )
3737 dExprs[i] = CreateTree ( dPolyArgs[i] );
3738 return new Expr_ContainsExprvec_c ( CreateTree(iLat), CreateTree(iLon), dExprs );
3739 }
3740 }
3741
3742 class Expr_Remap_c : public ISphExpr
3743 {
3744 struct CondValPair_t
3745 {
3746 int64_t m_iCond;
3747 union
3748 {
3749 int64_t m_iVal;
3750 float m_fVal;
3751 };
3752
CondValPair_tExpr_Remap_c::CondValPair_t3753 explicit CondValPair_t ( int64_t iCond=0 ) : m_iCond ( iCond ), m_iVal ( 0 ) {}
operator <Expr_Remap_c::CondValPair_t3754 bool operator< ( const CondValPair_t & rhs ) const { return m_iCond<rhs.m_iCond; }
operator ==Expr_Remap_c::CondValPair_t3755 bool operator== ( const CondValPair_t & rhs ) const { return m_iCond==rhs.m_iCond; }
3756 };
3757
3758 ISphExpr * m_pCond;
3759 ISphExpr * m_pVal;
3760 CSphVector<CondValPair_t> m_dPairs;
3761
3762 public:
Expr_Remap_c(ISphExpr * pCondExpr,ISphExpr * pValExpr,const CSphVector<int64_t> & dConds,const ConstList_c & tVals)3763 Expr_Remap_c ( ISphExpr * pCondExpr, ISphExpr * pValExpr, const CSphVector<int64_t> & dConds, const ConstList_c & tVals )
3764 : m_pCond ( pCondExpr )
3765 , m_pVal ( pValExpr )
3766 , m_dPairs ( dConds.GetLength() )
3767 {
3768 assert ( pCondExpr && pValExpr );
3769 assert ( dConds.GetLength() );
3770 assert ( dConds.GetLength()==tVals.m_dInts.GetLength() ||
3771 dConds.GetLength()==tVals.m_dFloats.GetLength() );
3772
3773 if ( tVals.m_dInts.GetLength() )
3774 ARRAY_FOREACH ( i, m_dPairs )
3775 {
3776 m_dPairs[i].m_iCond = dConds[i];
3777 m_dPairs[i].m_iVal = tVals.m_dInts[i];
3778 }
3779 else
3780 ARRAY_FOREACH ( i, m_dPairs )
3781 {
3782 m_dPairs[i].m_iCond = dConds[i];
3783 m_dPairs[i].m_fVal = tVals.m_dFloats[i];
3784 }
3785
3786 m_dPairs.Uniq();
3787 }
3788
~Expr_Remap_c()3789 ~Expr_Remap_c()
3790 {
3791 SafeRelease ( m_pCond );
3792 SafeRelease ( m_pVal );
3793 }
3794
Eval(const CSphMatch & tMatch) const3795 virtual float Eval ( const CSphMatch & tMatch ) const
3796 {
3797 const CondValPair_t * p = m_dPairs.BinarySearch ( CondValPair_t ( m_pCond->Int64Eval ( tMatch ) ) );
3798 if ( p )
3799 return p->m_fVal;
3800 return m_pVal->Eval ( tMatch );
3801 }
3802
IntEval(const CSphMatch & tMatch) const3803 virtual int IntEval ( const CSphMatch & tMatch ) const
3804 {
3805 return (int)Int64Eval ( tMatch );
3806 }
3807
Int64Eval(const CSphMatch & tMatch) const3808 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
3809 {
3810 const CondValPair_t * p = m_dPairs.BinarySearch ( CondValPair_t ( m_pCond->Int64Eval ( tMatch ) ) );
3811 if ( p )
3812 return p->m_iVal;
3813 return m_pVal->Int64Eval ( tMatch );
3814 }
3815
3816 };
3817
3818 //////////////////////////////////////////////////////////////////////////
3819
3820 /// fold nodes subtree into opcodes
CreateTree(int iNode)3821 ISphExpr * ExprParser_t::CreateTree ( int iNode )
3822 {
3823 if ( iNode<0 || GetError() )
3824 return NULL;
3825
3826 const ExprNode_t & tNode = m_dNodes[iNode];
3827
3828 // avoid spawning argument node in some cases
3829 bool bSkipLeft = false;
3830 bool bSkipRight = false;
3831 if ( tNode.m_iToken==TOK_FUNC )
3832 {
3833 switch ( tNode.m_iFunc )
3834 {
3835 case FUNC_IN:
3836 case FUNC_EXIST:
3837 case FUNC_GEODIST:
3838 case FUNC_CONTAINS:
3839 case FUNC_ZONESPANLIST:
3840 case FUNC_RANKFACTORS:
3841 case FUNC_PACKEDFACTORS:
3842 case FUNC_FACTORS:
3843 case FUNC_BM25F:
3844 case FUNC_CURTIME:
3845 case FUNC_UTC_TIME:
3846 case FUNC_UTC_TIMESTAMP:
3847 case FUNC_ALL:
3848 case FUNC_ANY:
3849 case FUNC_INDEXOF:
3850 case FUNC_MIN_TOP_WEIGHT:
3851 case FUNC_MIN_TOP_SORTVAL:
3852 case FUNC_REMAP:
3853 bSkipLeft = true;
3854 bSkipRight = true;
3855 break;
3856 default:
3857 break;
3858 }
3859 }
3860
3861 ISphExpr * pLeft = bSkipLeft ? NULL : CreateTree ( tNode.m_iLeft );
3862 ISphExpr * pRight = bSkipRight ? NULL : CreateTree ( tNode.m_iRight );
3863
3864 if ( GetError() )
3865 {
3866 SafeRelease ( pLeft );
3867 SafeRelease ( pRight );
3868 return NULL;
3869 }
3870
3871 #define LOC_SPAWN_POLY(_classname) \
3872 if ( tNode.m_eArgType==SPH_ATTR_INTEGER ) return new _classname##Int_c ( pLeft, pRight ); \
3873 else if ( tNode.m_eArgType==SPH_ATTR_BIGINT ) return new _classname##Int64_c ( pLeft, pRight ); \
3874 else return new _classname##Float_c ( pLeft, pRight );
3875
3876 int iOp = tNode.m_iToken;
3877 if ( iOp=='+' || iOp=='-' || iOp=='*' || iOp=='/' || iOp=='&' || iOp=='|' || iOp=='%' || iOp=='<' || iOp=='>'
3878 || iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE || iOp==TOK_AND || iOp==TOK_OR || iOp==TOK_NOT )
3879 {
3880 if ( pLeft && m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD && m_dNodes[tNode.m_iLeft].m_iToken==TOK_ATTR_JSON )
3881 pLeft = new Expr_JsonFieldConv_c ( pLeft );
3882 if ( pRight && m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_JSON_FIELD && m_dNodes[tNode.m_iRight].m_iToken==TOK_ATTR_JSON )
3883 pRight = new Expr_JsonFieldConv_c ( pRight );
3884 }
3885
3886 switch ( tNode.m_iToken )
3887 {
3888 case TOK_ATTR_INT: return new Expr_GetInt_c ( tNode.m_tLocator, tNode.m_iLocator );
3889 case TOK_ATTR_BITS: return new Expr_GetBits_c ( tNode.m_tLocator, tNode.m_iLocator );
3890 case TOK_ATTR_FLOAT: return new Expr_GetFloat_c ( tNode.m_tLocator, tNode.m_iLocator );
3891 case TOK_ATTR_SINT: return new Expr_GetSint_c ( tNode.m_tLocator, tNode.m_iLocator );
3892 case TOK_ATTR_STRING: return new Expr_GetString_c ( tNode.m_tLocator, tNode.m_iLocator );
3893 case TOK_ATTR_MVA64:
3894 case TOK_ATTR_MVA32: return new Expr_GetMva_c ( tNode.m_tLocator, tNode.m_iLocator );
3895 case TOK_ATTR_FACTORS: return new Expr_GetFactorsAttr_c ( tNode.m_tLocator, tNode.m_iLocator );
3896
3897 case TOK_CONST_FLOAT: return new Expr_GetConst_c ( tNode.m_fConst );
3898 case TOK_CONST_INT:
3899 if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
3900 return new Expr_GetIntConst_c ( (int)tNode.m_iConst );
3901 else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
3902 return new Expr_GetInt64Const_c ( tNode.m_iConst );
3903 else
3904 return new Expr_GetConst_c ( float(tNode.m_iConst) );
3905 break;
3906 case TOK_CONST_STRING:
3907 return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ), true );
3908 case TOK_SUBKEY:
3909 return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ), false );
3910
3911 case TOK_ID: return new Expr_GetId_c ();
3912 case TOK_WEIGHT: return new Expr_GetWeight_c ();
3913
3914 case '+': return new Expr_Add_c ( pLeft, pRight ); break;
3915 case '-': return new Expr_Sub_c ( pLeft, pRight ); break;
3916 case '*': return new Expr_Mul_c ( pLeft, pRight ); break;
3917 case '/': return new Expr_Div_c ( pLeft, pRight ); break;
3918 case '&': return new Expr_BitAnd_c ( pLeft, pRight ); break;
3919 case '|': return new Expr_BitOr_c ( pLeft, pRight ); break;
3920 case '%': return new Expr_Mod_c ( pLeft, pRight ); break;
3921
3922 case '<': LOC_SPAWN_POLY ( Expr_Lt ); break;
3923 case '>': LOC_SPAWN_POLY ( Expr_Gt ); break;
3924 case TOK_LTE: LOC_SPAWN_POLY ( Expr_Lte ); break;
3925 case TOK_GTE: LOC_SPAWN_POLY ( Expr_Gte ); break;
3926 case TOK_EQ: if ( ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_STRING ||
3927 m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_STRINGPTR ) &&
3928 ( m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRING ||
3929 m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRINGPTR ) )
3930 return new Expr_StrEq_c ( pLeft, pRight, m_eCollation );
3931 else if ( ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD ) &&
3932 ( m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRING ||
3933 m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRINGPTR ) )
3934 return new Expr_StrEq_c ( pLeft, pRight, m_eCollation );
3935 LOC_SPAWN_POLY ( Expr_Eq ); break;
3936 case TOK_NE: LOC_SPAWN_POLY ( Expr_Ne ); break;
3937 case TOK_AND: LOC_SPAWN_POLY ( Expr_And ); break;
3938 case TOK_OR: LOC_SPAWN_POLY ( Expr_Or ); break;
3939 case TOK_NOT:
3940 if ( tNode.m_eArgType==SPH_ATTR_BIGINT )
3941 return new Expr_NotInt64_c ( pLeft );
3942 else
3943 return new Expr_NotInt_c ( pLeft );
3944 break;
3945
3946 case ',':
3947 if ( pLeft && pRight )
3948 return new Expr_Arglist_c ( pLeft, pRight );
3949 break;
3950
3951 case TOK_NEG: assert ( pRight==NULL ); return new Expr_Neg_c ( pLeft ); break;
3952 case TOK_FUNC:
3953 {
3954 // fold arglist to array
3955 Func_e eFunc = (Func_e)tNode.m_iFunc;
3956 assert ( g_dFuncs[tNode.m_iFunc].m_eFunc==eFunc );
3957
3958 CSphVector<ISphExpr *> dArgs;
3959 if ( !bSkipLeft )
3960 FoldArglist ( pLeft, dArgs );
3961
3962 // spawn proper function
3963 assert ( tNode.m_iFunc>=0 && tNode.m_iFunc<int(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])) );
3964 assert (
3965 ( bSkipLeft ) || // function will handle its arglist,
3966 ( g_dFuncs[tNode.m_iFunc].m_iArgs>=0 && g_dFuncs[tNode.m_iFunc].m_iArgs==dArgs.GetLength() ) || // arg count matches,
3967 ( g_dFuncs[tNode.m_iFunc].m_iArgs<0 && -g_dFuncs[tNode.m_iFunc].m_iArgs<=dArgs.GetLength() ) ); // or min vararg count reached
3968
3969 switch ( eFunc )
3970 {
3971 case FUNC_NOW: assert ( 0 ); break; // prevent gcc bitching
3972
3973 case FUNC_ABS: return new Expr_Abs_c ( dArgs[0] );
3974 case FUNC_CEIL: return new Expr_Ceil_c ( dArgs[0] );
3975 case FUNC_FLOOR: return new Expr_Floor_c ( dArgs[0] );
3976 case FUNC_SIN: return new Expr_Sin_c ( dArgs[0] );
3977 case FUNC_COS: return new Expr_Cos_c ( dArgs[0] );
3978 case FUNC_LN: return new Expr_Ln_c ( dArgs[0] );
3979 case FUNC_LOG2: return new Expr_Log2_c ( dArgs[0] );
3980 case FUNC_LOG10: return new Expr_Log10_c ( dArgs[0] );
3981 case FUNC_EXP: return new Expr_Exp_c ( dArgs[0] );
3982 case FUNC_SQRT: return new Expr_Sqrt_c ( dArgs[0] );
3983 case FUNC_SINT: return new Expr_Sint_c ( dArgs[0] );
3984 case FUNC_CRC32: return new Expr_Crc32_c ( dArgs[0] );
3985 case FUNC_FIBONACCI:return new Expr_Fibonacci_c ( dArgs[0] );
3986
3987 case FUNC_DAY: return new Expr_Day_c ( dArgs[0] );
3988 case FUNC_MONTH: return new Expr_Month_c ( dArgs[0] );
3989 case FUNC_YEAR: return new Expr_Year_c ( dArgs[0] );
3990 case FUNC_YEARMONTH: return new Expr_YearMonth_c ( dArgs[0] );
3991 case FUNC_YEARMONTHDAY: return new Expr_YearMonthDay_c ( dArgs[0] );
3992
3993 case FUNC_MIN: return new Expr_Min_c ( dArgs[0], dArgs[1] );
3994 case FUNC_MAX: return new Expr_Max_c ( dArgs[0], dArgs[1] );
3995 case FUNC_POW: return new Expr_Pow_c ( dArgs[0], dArgs[1] );
3996 case FUNC_IDIV: return new Expr_Idiv_c ( dArgs[0], dArgs[1] );
3997
3998 case FUNC_IF: return new Expr_If_c ( dArgs[0], dArgs[1], dArgs[2] );
3999 case FUNC_MADD: return new Expr_Madd_c ( dArgs[0], dArgs[1], dArgs[2] );
4000 case FUNC_MUL3: return new Expr_Mul3_c ( dArgs[0], dArgs[1], dArgs[2] );
4001 case FUNC_ATAN2: return new Expr_Atan2_c ( dArgs[0], dArgs[1] );
4002
4003 case FUNC_INTERVAL: return CreateIntervalNode ( tNode.m_iLeft, dArgs );
4004 case FUNC_IN: return CreateInNode ( iNode );
4005 case FUNC_LENGTH: return CreateLengthNode ( tNode, dArgs[0] );
4006 case FUNC_BITDOT: return CreateBitdotNode ( tNode.m_iLeft, dArgs );
4007 case FUNC_REMAP:
4008 {
4009 ISphExpr * pCond = CreateTree ( tNode.m_iLeft );
4010 ISphExpr * pVal = CreateTree ( tNode.m_iRight );
4011 assert ( pCond && pVal );
4012 // This is a hack. I know how parser fills m_dNodes and thus know where to find constlists.
4013 const CSphVector<int64_t> & dConds = m_dNodes [ iNode-2 ].m_pConsts->m_dInts;
4014 const ConstList_c & tVals = *m_dNodes [ iNode-1 ].m_pConsts;
4015 return new Expr_Remap_c ( pCond, pVal, dConds, tVals );
4016 }
4017
4018 case FUNC_GEODIST: return CreateGeodistNode ( tNode.m_iLeft );
4019 case FUNC_EXIST: return CreateExistNode ( tNode );
4020 case FUNC_CONTAINS: return CreateContainsNode ( tNode );
4021
4022 case FUNC_POLY2D:
4023 case FUNC_GEOPOLY2D:break; // just make gcc happy
4024
4025 case FUNC_ZONESPANLIST:
4026 m_bHasZonespanlist = true;
4027 m_eEvalStage = SPH_EVAL_PRESORT;
4028 return new Expr_GetZonespanlist_c ();
4029 case FUNC_TO_STRING:
4030 return new Expr_ToString_c ( dArgs[0], m_dNodes [ tNode.m_iLeft ].m_eRetType );
4031 case FUNC_RANKFACTORS:
4032 m_eEvalStage = SPH_EVAL_PRESORT;
4033 return new Expr_GetRankFactors_c();
4034 case FUNC_PACKEDFACTORS:
4035 case FUNC_FACTORS:
4036 return CreatePFNode ( tNode.m_iLeft );
4037 case FUNC_BM25F:
4038 {
4039 m_uPackedFactorFlags |= SPH_FACTOR_ENABLE;
4040
4041 CSphVector<int> dBM25FArgs;
4042 GatherArgNodes ( tNode.m_iLeft, dBM25FArgs );
4043
4044 const ExprNode_t & tLeft = m_dNodes [ dBM25FArgs[0] ];
4045 const ExprNode_t & tRight = m_dNodes [ dBM25FArgs[1] ];
4046 float fK1 = tLeft.m_fConst;
4047 float fB = tRight.m_fConst;
4048 fK1 = Max ( fK1, 0.001f );
4049 fB = Min ( Max ( fB, 0.0f ), 1.0f );
4050
4051 CSphVector<CSphNamedVariant> * pFieldWeights = NULL;
4052 if ( dBM25FArgs.GetLength()>2 )
4053 pFieldWeights = &m_dNodes [ dBM25FArgs[2] ].m_pMapArg->m_dPairs;
4054
4055 return new Expr_BM25F_c ( fK1, fB, pFieldWeights );
4056 }
4057
4058 case FUNC_BIGINT:
4059 case FUNC_INTEGER:
4060 case FUNC_DOUBLE:
4061 case FUNC_UINT:
4062 if ( m_dNodes[tNode.m_iLeft].m_iToken==TOK_ATTR_JSON )
4063 return new Expr_JsonFieldConv_c ( dArgs[0] );
4064 return dArgs[0];
4065
4066 case FUNC_LEAST: return CreateAggregateNode ( tNode, SPH_AGGR_MIN, dArgs[0] );
4067 case FUNC_GREATEST: return CreateAggregateNode ( tNode, SPH_AGGR_MAX, dArgs[0] );
4068
4069 case FUNC_CURTIME: return new Expr_Time_c ( false, false ); break;
4070 case FUNC_UTC_TIME: return new Expr_Time_c ( true, false ); break;
4071 case FUNC_UTC_TIMESTAMP: return new Expr_Time_c ( true, true ); break;
4072 case FUNC_TIMEDIFF: return new Expr_TimeDiff_c ( dArgs[0], dArgs[1] ); break;
4073
4074 case FUNC_ALL:
4075 case FUNC_ANY:
4076 case FUNC_INDEXOF:
4077 return CreateForInNode ( iNode );
4078
4079 case FUNC_MIN_TOP_WEIGHT:
4080 m_eEvalStage = SPH_EVAL_PRESORT;
4081 return new Expr_MinTopWeight();
4082 break;
4083 case FUNC_MIN_TOP_SORTVAL:
4084 m_eEvalStage = SPH_EVAL_PRESORT;
4085 return new Expr_MinTopSortval();
4086 break;
4087 default: // just make gcc happy
4088 break;
4089 }
4090 assert ( 0 && "unhandled function id" );
4091 break;
4092 }
4093
4094 case TOK_UDF: return CreateUdfNode ( tNode.m_iFunc, pLeft ); break;
4095 case TOK_HOOK_IDENT: return m_pHook->CreateNode ( tNode.m_iFunc, NULL, NULL, m_sCreateError ); break;
4096 case TOK_HOOK_FUNC: return m_pHook->CreateNode ( tNode.m_iFunc, pLeft, &m_eEvalStage, m_sCreateError ); break;
4097 case TOK_MAP_ARG:
4098 // tricky bit
4099 // data gets moved (!) from node to ISphExpr at this point
4100 return new Expr_MapArg_c ( tNode.m_pMapArg->m_dPairs );
4101 break;
4102 case TOK_ATTR_JSON:
4103 if ( pLeft && m_dNodes[tNode.m_iLeft].m_iToken==TOK_SUBKEY && !tNode.m_tLocator.m_bDynamic )
4104 {
4105 // json key is a single static subkey, switch to fastpath
4106 return new Expr_JsonFastKey_c ( tNode.m_tLocator, tNode.m_iLocator, pLeft );
4107 } else
4108 {
4109 // json key is a generic expression, use generic catch-all JsonField
4110 CSphVector<ISphExpr *> dArgs;
4111 CSphVector<ESphAttr> dTypes;
4112 if ( pLeft ) // may be NULL (top level array)
4113 {
4114 FoldArglist ( pLeft, dArgs );
4115 GatherArgRetTypes ( tNode.m_iLeft, dTypes );
4116 }
4117 return new Expr_JsonField_c ( tNode.m_tLocator, tNode.m_iLocator, dArgs, dTypes );
4118 }
4119 break;
4120 case TOK_ITERATOR:
4121 {
4122 // iterator, e.g. handles "x.gid" in SELECT ALL(x.gid=1 FOR x IN json.array)
4123 CSphVector<ISphExpr *> dArgs;
4124 CSphVector<ESphAttr> dTypes;
4125 if ( pLeft )
4126 {
4127 FoldArglist ( pLeft, dArgs );
4128 GatherArgRetTypes ( tNode.m_iLeft, dTypes );
4129 }
4130 return new Expr_JsonFieldConv_c ( new Expr_Iterator_c ( tNode.m_tLocator, tNode.m_iLocator, dArgs, dTypes, tNode.m_pAttr ) );
4131 }
4132 case TOK_IDENT: m_sCreateError.SetSprintf ( "unknown column: %s", tNode.m_sIdent ); break;
4133
4134 case TOK_IS_NULL:
4135 case TOK_IS_NOT_NULL:
4136 if ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD )
4137 return new Expr_JsonFieldIsNull_c ( pLeft, tNode.m_iToken==TOK_IS_NULL );
4138 else
4139 return new Expr_GetIntConst_c ( tNode.m_iToken!=TOK_IS_NULL );
4140
4141 default: assert ( 0 && "unhandled token type" ); break;
4142 }
4143
4144 #undef LOC_SPAWN_POLY
4145
4146 // fire exit
4147 SafeRelease ( pLeft );
4148 SafeRelease ( pRight );
4149 return NULL;
4150 }
4151
4152 //////////////////////////////////////////////////////////////////////////
4153
4154 /// arg-vs-set function (currently, IN or INTERVAL) evaluator traits
4155 template < typename T >
4156 class Expr_ArgVsSet_c : public ISphExpr
4157 {
4158 protected:
4159 ISphExpr * m_pArg;
4160
4161 public:
Expr_ArgVsSet_c(ISphExpr * pArg)4162 explicit Expr_ArgVsSet_c ( ISphExpr * pArg ) : m_pArg ( pArg ) {}
~Expr_ArgVsSet_c()4163 ~Expr_ArgVsSet_c () { SafeRelease ( m_pArg ); }
4164
4165 virtual int IntEval ( const CSphMatch & tMatch ) const = 0;
Eval(const CSphMatch & tMatch) const4166 virtual float Eval ( const CSphMatch & tMatch ) const { return (float) IntEval ( tMatch ); }
Int64Eval(const CSphMatch & tMatch) const4167 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
4168
4169 protected:
4170 T ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const;
4171 };
4172
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4173 template<> int Expr_ArgVsSet_c<int>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4174 {
4175 return pArg->IntEval ( tMatch );
4176 }
4177
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4178 template<> DWORD Expr_ArgVsSet_c<DWORD>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4179 {
4180 return (DWORD)pArg->IntEval ( tMatch );
4181 }
4182
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4183 template<> float Expr_ArgVsSet_c<float>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4184 {
4185 return pArg->Eval ( tMatch );
4186 }
4187
ExprEval(ISphExpr * pArg,const CSphMatch & tMatch) const4188 template<> int64_t Expr_ArgVsSet_c<int64_t>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
4189 {
4190 return pArg->Int64Eval ( tMatch );
4191 }
4192
4193
4194 /// arg-vs-constant-set
4195 template < typename T >
4196 class Expr_ArgVsConstSet_c : public Expr_ArgVsSet_c<T>
4197 {
4198 protected:
4199 CSphVector<T> m_dValues;
4200
4201 public:
4202 /// take ownership of arg, pre-evaluate and dismiss turn points
Expr_ArgVsConstSet_c(ISphExpr * pArg,CSphVector<ISphExpr * > & dArgs,int iSkip)4203 Expr_ArgVsConstSet_c ( ISphExpr * pArg, CSphVector<ISphExpr *> & dArgs, int iSkip )
4204 : Expr_ArgVsSet_c<T> ( pArg )
4205 {
4206 CSphMatch tDummy;
4207 for ( int i=iSkip; i<dArgs.GetLength(); i++ )
4208 {
4209 m_dValues.Add ( Expr_ArgVsSet_c<T>::ExprEval ( dArgs[i], tDummy ) );
4210 SafeRelease ( dArgs[i] );
4211 }
4212 }
4213
4214 /// take ownership of arg, and copy that constlist
Expr_ArgVsConstSet_c(ISphExpr * pArg,ConstList_c * pConsts)4215 Expr_ArgVsConstSet_c ( ISphExpr * pArg, ConstList_c * pConsts )
4216 : Expr_ArgVsSet_c<T> ( pArg )
4217 {
4218 if ( !pConsts )
4219 return; // can happen on uservar path
4220 if ( pConsts->m_eRetType==SPH_ATTR_FLOAT )
4221 {
4222 m_dValues.Reserve ( pConsts->m_dFloats.GetLength() );
4223 ARRAY_FOREACH ( i, pConsts->m_dFloats )
4224 m_dValues.Add ( (T)pConsts->m_dFloats[i] );
4225 } else
4226 {
4227 m_dValues.Reserve ( pConsts->m_dInts.GetLength() );
4228 ARRAY_FOREACH ( i, pConsts->m_dInts )
4229 m_dValues.Add ( (T)pConsts->m_dInts[i] );
4230 }
4231 }
4232 };
4233
4234 //////////////////////////////////////////////////////////////////////////
4235
4236 /// INTERVAL() evaluator for constant turn point values case
4237 template < typename T >
4238 class Expr_IntervalConst_c : public Expr_ArgVsConstSet_c<T>
4239 {
4240 public:
4241 /// take ownership of arg, pre-evaluate and dismiss turn points
Expr_IntervalConst_c(CSphVector<ISphExpr * > & dArgs)4242 explicit Expr_IntervalConst_c ( CSphVector<ISphExpr *> & dArgs )
4243 : Expr_ArgVsConstSet_c<T> ( dArgs[0], dArgs, 1 )
4244 {}
4245
4246 /// evaluate arg, return interval id
IntEval(const CSphMatch & tMatch) const4247 virtual int IntEval ( const CSphMatch & tMatch ) const
4248 {
4249 T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4250 ARRAY_FOREACH ( i, this->m_dValues ) // FIXME! OPTIMIZE! perform binary search here
4251 if ( val<this->m_dValues[i] )
4252 return i;
4253 return this->m_dValues.GetLength();
4254 }
4255
Command(ESphExprCommand eCmd,void * pArg)4256 virtual void Command ( ESphExprCommand eCmd, void * pArg ) { this->m_pArg->Command ( eCmd, pArg ); }
4257 };
4258
4259
4260 /// generic INTERVAL() evaluator
4261 template < typename T >
4262 class Expr_Interval_c : public Expr_ArgVsSet_c<T>
4263 {
4264 protected:
4265 CSphVector<ISphExpr *> m_dTurnPoints;
4266
4267 public:
4268 /// take ownership of arg and turn points
Expr_Interval_c(const CSphVector<ISphExpr * > & dArgs)4269 explicit Expr_Interval_c ( const CSphVector<ISphExpr *> & dArgs )
4270 : Expr_ArgVsSet_c<T> ( dArgs[0] )
4271 {
4272 for ( int i=1; i<dArgs.GetLength(); i++ )
4273 m_dTurnPoints.Add ( dArgs[i] );
4274 }
4275
4276 /// evaluate arg, return interval id
IntEval(const CSphMatch & tMatch) const4277 virtual int IntEval ( const CSphMatch & tMatch ) const
4278 {
4279 T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4280 ARRAY_FOREACH ( i, m_dTurnPoints )
4281 if ( val < Expr_ArgVsSet_c<T>::ExprEval ( m_dTurnPoints[i], tMatch ) )
4282 return i;
4283 return m_dTurnPoints.GetLength();
4284 }
4285
Command(ESphExprCommand eCmd,void * pArg)4286 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4287 {
4288 this->m_pArg->Command ( eCmd, pArg );
4289 ARRAY_FOREACH ( i, m_dTurnPoints )
4290 m_dTurnPoints[i]->Command ( eCmd, pArg );
4291 }
4292 };
4293
4294 //////////////////////////////////////////////////////////////////////////
4295
4296 /// IN() evaluator, arbitrary scalar expression vs. constant values
4297 template < typename T >
4298 class Expr_In_c : public Expr_ArgVsConstSet_c<T>
4299 {
4300 public:
4301 /// pre-sort values for binary search
Expr_In_c(ISphExpr * pArg,ConstList_c * pConsts)4302 Expr_In_c ( ISphExpr * pArg, ConstList_c * pConsts ) :
4303 Expr_ArgVsConstSet_c<T> ( pArg, pConsts )
4304 {
4305 this->m_dValues.Sort();
4306 }
4307
4308 /// evaluate arg, check if the value is within set
IntEval(const CSphMatch & tMatch) const4309 virtual int IntEval ( const CSphMatch & tMatch ) const
4310 {
4311 T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4312 return this->m_dValues.BinarySearch ( val )!=NULL;
4313 }
4314
Command(ESphExprCommand eCmd,void * pArg)4315 virtual void Command ( ESphExprCommand eCmd, void * pArg ) { this->m_pArg->Command ( eCmd, pArg ); }
4316 };
4317
4318
4319 /// IN() evaluator, arbitrary scalar expression vs. uservar
4320 /// (for the sake of evaluator, uservar is a pre-sorted, refcounted external vector)
4321 class Expr_InUservar_c : public Expr_ArgVsSet_c<int64_t>
4322 {
4323 protected:
4324 UservarIntSet_c * m_pConsts;
4325
4326 public:
4327 /// just get hold of args
Expr_InUservar_c(ISphExpr * pArg,UservarIntSet_c * pConsts)4328 explicit Expr_InUservar_c ( ISphExpr * pArg, UservarIntSet_c * pConsts )
4329 : Expr_ArgVsSet_c<int64_t> ( pArg )
4330 , m_pConsts ( pConsts ) // no addref, hook should have addref'd (otherwise there'd be a race)
4331 {}
4332
4333 /// release the uservar value
~Expr_InUservar_c()4334 ~Expr_InUservar_c()
4335 {
4336 SafeRelease ( m_pConsts );
4337 }
4338
4339 /// evaluate arg, check if the value is within set
IntEval(const CSphMatch & tMatch) const4340 virtual int IntEval ( const CSphMatch & tMatch ) const
4341 {
4342 int64_t iVal = ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
4343 return m_pConsts->BinarySearch ( iVal )!=NULL;
4344 }
4345
Command(ESphExprCommand eCmd,void * pArg)4346 virtual void Command ( ESphExprCommand eCmd, void * pArg ) { this->m_pArg->Command ( eCmd, pArg ); }
4347 };
4348
4349
4350 /// IN() evaluator, MVA attribute vs. constant values
4351 template < bool MVA64 >
4352 class Expr_MVAIn_c : public Expr_ArgVsConstSet_c<int64_t>
4353 {
4354 public:
4355 /// pre-sort values for binary search
Expr_MVAIn_c(const CSphAttrLocator & tLoc,int iLocator,ConstList_c * pConsts,UservarIntSet_c * pUservar)4356 Expr_MVAIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar )
4357 : Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
4358 , m_tLocator ( tLoc )
4359 , m_iLocator ( iLocator )
4360 , m_pMvaPool ( NULL )
4361 , m_pUservar ( pUservar )
4362 , m_bArenaProhibit ( false )
4363 {
4364 assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4365 assert ( !pConsts || !pUservar ); // either constlist or uservar, not both
4366 this->m_dValues.Sort();
4367 }
4368
~Expr_MVAIn_c()4369 ~Expr_MVAIn_c()
4370 {
4371 SafeRelease ( m_pUservar );
4372 }
4373
4374 int MvaEval ( const DWORD * pMva ) const;
4375
MvaEval(const CSphMatch &) const4376 virtual const DWORD * MvaEval ( const CSphMatch & ) const { assert ( 0 && "not implemented" ); return NULL; }
4377
4378 /// evaluate arg, check if any values are within set
IntEval(const CSphMatch & tMatch) const4379 virtual int IntEval ( const CSphMatch & tMatch ) const
4380 {
4381 const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
4382 if ( !pMva )
4383 return 0;
4384
4385 return MvaEval ( pMva );
4386 }
4387
Command(ESphExprCommand eCmd,void * pArg)4388 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4389 {
4390 if ( eCmd==SPH_EXPR_SET_MVA_POOL )
4391 {
4392 const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
4393 assert ( pArg );
4394 m_pMvaPool = pPool->m_pMva;
4395 m_bArenaProhibit = pPool->m_bArenaProhibit;
4396 }
4397 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4398 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4399 }
4400
4401 protected:
4402 CSphAttrLocator m_tLocator;
4403 int m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
4404 const DWORD * m_pMvaPool;
4405 UservarIntSet_c * m_pUservar;
4406 bool m_bArenaProhibit;
4407 };
4408
4409
4410 template<>
MvaEval(const DWORD * pMva) const4411 int Expr_MVAIn_c<false>::MvaEval ( const DWORD * pMva ) const
4412 {
4413 // OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
4414 DWORD uLen = *pMva++;
4415 const DWORD * pMvaMax = pMva+uLen;
4416
4417 const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4418 const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4419
4420 const DWORD * L = pMva;
4421 const DWORD * R = pMvaMax - 1;
4422 for ( ; pFilter < pFilterMax; pFilter++ )
4423 {
4424 while ( L<=R )
4425 {
4426 const DWORD * m = L + (R - L) / 2;
4427
4428 if ( *pFilter > *m )
4429 L = m + 1;
4430 else if ( *pFilter < *m )
4431 R = m - 1;
4432 else
4433 return 1;
4434 }
4435 R = pMvaMax - 1;
4436 }
4437 return 0;
4438 }
4439
4440
4441 template<>
MvaEval(const DWORD * pMva) const4442 int Expr_MVAIn_c<true>::MvaEval ( const DWORD * pMva ) const
4443 {
4444 // OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
4445 DWORD uLen = *pMva++;
4446 assert ( ( uLen%2 )==0 );
4447 const DWORD * pMvaMax = pMva+uLen;
4448
4449 const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4450 const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4451
4452 const int64_t * L = (const int64_t *)pMva;
4453 const int64_t * R = (const int64_t *)( pMvaMax - 2 );
4454 for ( ; pFilter < pFilterMax; pFilter++ )
4455 {
4456 while ( L<=R )
4457 {
4458 const int64_t * pVal = L + (R - L) / 2;
4459 int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
4460
4461 if ( *pFilter > iMva )
4462 L = pVal + 1;
4463 else if ( *pFilter < iMva )
4464 R = pVal - 1;
4465 else
4466 return 1;
4467 }
4468 R = (const int64_t *) ( pMvaMax - 2 );
4469 }
4470 return 0;
4471 }
4472
4473 /// LENGTH() evaluator for MVAs
4474 class Expr_MVALength_c : public ISphExpr
4475 {
4476 protected:
4477 CSphAttrLocator m_tLocator;
4478 int m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
4479 bool m_b64;
4480 const DWORD * m_pMvaPool;
4481 bool m_bArenaProhibit;
4482
4483 public:
Expr_MVALength_c(const CSphAttrLocator & tLoc,int iLocator,bool b64)4484 Expr_MVALength_c ( const CSphAttrLocator & tLoc, int iLocator, bool b64 )
4485 : m_tLocator ( tLoc )
4486 , m_iLocator ( iLocator )
4487 , m_b64 ( b64 )
4488 , m_pMvaPool ( NULL )
4489 , m_bArenaProhibit ( false )
4490 {
4491 assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4492 }
4493
IntEval(const CSphMatch & tMatch) const4494 virtual int IntEval ( const CSphMatch & tMatch ) const
4495 {
4496 const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
4497 if ( !pMva )
4498 return 0;
4499 return (int)( m_b64 ? *pMva/2 : *pMva );
4500 }
4501
Command(ESphExprCommand eCmd,void * pArg)4502 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4503 {
4504 if ( eCmd==SPH_EXPR_SET_MVA_POOL )
4505 {
4506 const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
4507 assert ( pArg );
4508 m_pMvaPool = pPool->m_pMva;
4509 m_bArenaProhibit = pPool->m_bArenaProhibit;
4510 }
4511 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4512 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4513 }
4514
Eval(const CSphMatch & tMatch) const4515 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
4516 };
4517
4518
4519 /// aggregate functions evaluator for MVA attribute
4520 template < bool MVA64 >
4521 class Expr_MVAAggr_c : public ISphExpr
4522 {
4523 public:
Expr_MVAAggr_c(const CSphAttrLocator & tLoc,int iLocator,ESphAggrFunc eFunc)4524 Expr_MVAAggr_c ( const CSphAttrLocator & tLoc, int iLocator, ESphAggrFunc eFunc )
4525 : m_tLocator ( tLoc )
4526 , m_iLocator ( iLocator )
4527 , m_pMvaPool ( NULL )
4528 , m_bArenaProhibit ( false )
4529 , m_eFunc ( eFunc )
4530 {
4531 assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4532 }
4533
4534 int64_t MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const;
4535
Int64Eval(const CSphMatch & tMatch) const4536 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
4537 {
4538 const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
4539 if ( !pMva )
4540 return 0;
4541 return MvaAggr ( pMva, m_eFunc );
4542 }
4543
Command(ESphExprCommand eCmd,void * pArg)4544 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4545 {
4546 if ( eCmd==SPH_EXPR_SET_MVA_POOL )
4547 {
4548 const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
4549 assert ( pArg );
4550 m_pMvaPool = pPool->m_pMva;
4551 m_bArenaProhibit = pPool->m_bArenaProhibit;
4552 }
4553 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4554 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4555 }
4556
Eval(const CSphMatch & tMatch) const4557 virtual float Eval ( const CSphMatch & tMatch ) const { return (float)Int64Eval ( tMatch ); }
IntEval(const CSphMatch & tMatch) const4558 virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)Int64Eval ( tMatch ); }
4559
4560 protected:
4561 CSphAttrLocator m_tLocator;
4562 int m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
4563 const DWORD * m_pMvaPool;
4564 bool m_bArenaProhibit;
4565 ESphAggrFunc m_eFunc;
4566 };
4567
4568
4569 template <>
MvaAggr(const DWORD * pMva,ESphAggrFunc eFunc) const4570 int64_t Expr_MVAAggr_c<false>::MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const
4571 {
4572 DWORD uLen = *pMva++;
4573 const DWORD * pMvaMax = pMva+uLen;
4574 const DWORD * L = pMva;
4575 const DWORD * R = pMvaMax - 1;
4576
4577 switch ( eFunc )
4578 {
4579 case SPH_AGGR_MIN: return *L;
4580 case SPH_AGGR_MAX: return *R;
4581 default: return 0;
4582 }
4583 }
4584
4585
4586 template <>
MvaAggr(const DWORD * pMva,ESphAggrFunc eFunc) const4587 int64_t Expr_MVAAggr_c<true>::MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const
4588 {
4589 DWORD uLen = *pMva++;
4590 assert ( ( uLen%2 )==0 );
4591 const DWORD * pMvaMax = pMva+uLen;
4592 const int64_t * L = (const int64_t *)pMva;
4593 const int64_t * R = (const int64_t *)( pMvaMax - 2 );
4594
4595 switch ( eFunc )
4596 {
4597 case SPH_AGGR_MIN: return *L;
4598 case SPH_AGGR_MAX: return *R;
4599 default: return 0;
4600 }
4601 }
4602
4603
4604 /// IN() evaluator, JSON array vs. constant values
4605 class Expr_JsonFieldIn_c : public Expr_ArgVsConstSet_c<int64_t>
4606 {
4607 protected:
4608 UservarIntSet_c * m_pUservar;
4609 const BYTE * m_pStrings;
4610 ISphExpr * m_pArg;
4611 CSphVector<int64_t> m_dHashes;
4612
4613 public:
Expr_JsonFieldIn_c(ConstList_c * pConsts,UservarIntSet_c * pUservar,ISphExpr * pArg)4614 Expr_JsonFieldIn_c ( ConstList_c * pConsts, UservarIntSet_c * pUservar, ISphExpr * pArg )
4615 : Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
4616 , m_pUservar ( pUservar )
4617 , m_pStrings ( NULL )
4618 , m_pArg ( pArg )
4619 {
4620 assert ( !pConsts || !pUservar );
4621
4622 const char * sExpr = pConsts->m_sExpr.cstr();
4623 int iExprLen = pConsts->m_sExpr.Length();
4624
4625 const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4626 const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4627
4628 for ( const int64_t * pCur=pFilter; pCur<pFilterMax; pCur++ )
4629 {
4630 int64_t iVal = *pCur;
4631 int iOfs = (int)( iVal>>32 );
4632 int iLen = (int)( iVal & 0xffffffffUL );
4633 if ( iOfs>0 && iOfs+iLen<=iExprLen )
4634 {
4635 CSphString sRes;
4636 SqlUnescape ( sRes, sExpr + iOfs, iLen );
4637 m_dHashes.Add ( sphFNV64 ( sRes.cstr(), sRes.Length() ) );
4638 }
4639 }
4640
4641 m_dHashes.Sort();
4642 }
4643
~Expr_JsonFieldIn_c()4644 ~Expr_JsonFieldIn_c()
4645 {
4646 SafeRelease ( m_pUservar );
4647 SafeRelease ( m_pArg );
4648 }
4649
Command(ESphExprCommand eCmd,void * pArg)4650 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4651 {
4652 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4653 m_pStrings = (const BYTE*)pArg;
4654 m_pArg->Command ( eCmd, pArg );
4655 }
4656
4657 /// evaluate arg, check if any values are within set
IntEval(const CSphMatch & tMatch) const4658 virtual int IntEval ( const CSphMatch & tMatch ) const
4659 {
4660 const BYTE * pVal = NULL;
4661 ESphJsonType eJson = GetKey ( &pVal, tMatch );
4662 switch ( eJson )
4663 {
4664 case JSON_INT32_VECTOR: return ArrayEval<int> ( pVal );
4665 case JSON_INT64_VECTOR: return ArrayEval<int64_t> ( pVal );
4666 case JSON_STRING_VECTOR: return StringArrayEval ( pVal, false );
4667 case JSON_STRING: return StringArrayEval ( pVal, true );
4668 case JSON_INT32: return ValueEval ( (int64_t) sphJsonLoadInt ( &pVal ) );
4669 case JSON_INT64: return ValueEval ( sphJsonLoadBigint ( &pVal ) );
4670 case JSON_MIXED_VECTOR:
4671 {
4672 const BYTE * p = pVal;
4673 sphJsonUnpackInt ( &p ); // skip node length
4674 int iLen = sphJsonUnpackInt ( &p );
4675 for ( int i=0; i<iLen; i++ )
4676 {
4677 ESphJsonType eType = (ESphJsonType)*p++;
4678 pVal = p;
4679 int iRes = 0;
4680 switch (eType)
4681 {
4682 case JSON_STRING: iRes = StringArrayEval ( pVal, true ); break;
4683 case JSON_INT32: iRes = ValueEval ( (int64_t) sphJsonLoadInt ( &pVal ) ); break;
4684 case JSON_INT64: iRes = ValueEval ( sphJsonLoadBigint ( &pVal ) ); break;
4685 case JSON_DOUBLE: iRes = ValueEval ( (int64_t)sphQW2D ( sphJsonLoadBigint ( &pVal ) ) ); break;
4686 default: break; // for weird subobjects, just let IN() return false
4687 }
4688 if ( iRes )
4689 return 1;
4690 sphJsonSkipNode ( eType, &p );
4691 }
4692 return 0;
4693 }
4694 default: return 0;
4695 }
4696 }
4697
4698 protected:
GetKey(const BYTE ** ppKey,const CSphMatch & tMatch) const4699 ESphJsonType GetKey ( const BYTE ** ppKey, const CSphMatch & tMatch ) const
4700 {
4701 assert ( ppKey );
4702 if ( !m_pStrings )
4703 return JSON_EOF;
4704 uint64_t uValue = m_pArg->Int64Eval ( tMatch );
4705 *ppKey = m_pStrings + ( uValue & 0xffffffff );
4706 return (ESphJsonType)( uValue >> 32 );
4707 }
4708
ValueEval(const int64_t iVal) const4709 int ValueEval ( const int64_t iVal ) const
4710 {
4711 const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4712 const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4713 for ( ; pFilter<pFilterMax; pFilter++ )
4714 if ( iVal==*pFilter )
4715 return 1;
4716 return 0;
4717 }
4718
4719 // cannot apply MvaEval() on unordered JSON arrays, using linear search
4720 template <typename T>
ArrayEval(const BYTE * pVal) const4721 int ArrayEval ( const BYTE * pVal ) const
4722 {
4723 const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4724 const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4725
4726 int iLen = sphJsonUnpackInt ( &pVal );
4727 const T * pArray = (const T *)pVal;
4728 const T * pArrayMax = pArray+iLen;
4729 for ( ; pFilter<pFilterMax; pFilter++ )
4730 {
4731 T iVal = (T)*pFilter;
4732 for ( const T * m = pArray; m<pArrayMax; m++ )
4733 if ( iVal==*m )
4734 return 1;
4735 }
4736 return 0;
4737 }
4738
StringArrayEval(const BYTE * pVal,bool bValueEval) const4739 int StringArrayEval ( const BYTE * pVal, bool bValueEval ) const
4740 {
4741 if ( !bValueEval )
4742 sphJsonUnpackInt ( &pVal );
4743 int iCount = bValueEval ? 1 : sphJsonUnpackInt ( &pVal );
4744
4745 while ( iCount-- )
4746 {
4747 int iLen = sphJsonUnpackInt ( &pVal );
4748 if ( m_dHashes.BinarySearch ( sphFNV64 ( pVal, iLen ) ) )
4749 return 1;
4750 pVal += iLen;
4751 }
4752 return 0;
4753 }
4754 };
4755
4756
4757 class Expr_StrIn_c : public Expr_ArgVsConstSet_c<int64_t>
4758 {
4759 protected:
4760 CSphAttrLocator m_tLocator;
4761 int m_iLocator;
4762 const BYTE * m_pStrings;
4763 UservarIntSet_c * m_pUservar;
4764 CSphVector<CSphString> m_dStringValues;
4765 SphStringCmp_fn m_fnStrCmp;
4766
4767 public:
Expr_StrIn_c(const CSphAttrLocator & tLoc,int iLocator,ConstList_c * pConsts,UservarIntSet_c * pUservar,ESphCollation eCollation)4768 Expr_StrIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar, ESphCollation eCollation )
4769 : Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts )
4770 , m_tLocator ( tLoc )
4771 , m_iLocator ( iLocator )
4772 , m_pStrings ( NULL )
4773 , m_pUservar ( pUservar )
4774 {
4775 assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
4776 assert ( !pConsts || !pUservar );
4777
4778 m_fnStrCmp = GetCollationFn ( eCollation );
4779
4780 const char * sExpr = pConsts->m_sExpr.cstr();
4781 int iExprLen = pConsts->m_sExpr.Length();
4782
4783 const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
4784 const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
4785
4786 for ( const int64_t * pCur=pFilter; pCur<pFilterMax; pCur++ )
4787 {
4788 int64_t iVal = *pCur;
4789 int iOfs = (int)( iVal>>32 );
4790 int iLen = (int)( iVal & 0xffffffffUL );
4791 if ( iOfs>0 && iOfs+iLen<=iExprLen )
4792 {
4793 CSphString sRes;
4794 SqlUnescape ( sRes, sExpr + iOfs, iLen );
4795 m_dStringValues.Add ( sRes );
4796 }
4797 }
4798 }
4799
~Expr_StrIn_c()4800 ~Expr_StrIn_c()
4801 {
4802 SafeRelease ( m_pUservar );
4803 }
4804
IntEval(const CSphMatch & tMatch) const4805 virtual int IntEval ( const CSphMatch & tMatch ) const
4806 {
4807 const BYTE * pVal;
4808 SphAttr_t iOfs = tMatch.GetAttr ( m_tLocator );
4809 if ( iOfs<=0 )
4810 return 0;
4811 int iLen = sphUnpackStr ( m_pStrings + iOfs, &pVal );
4812
4813 CSphString sValue ( (const char*)pVal, iLen );
4814 const BYTE * pStr = (const BYTE*)sValue.cstr();
4815
4816 ARRAY_FOREACH ( i, m_dStringValues )
4817 if ( m_fnStrCmp ( pStr, (const BYTE*)m_dStringValues[i].cstr(), false )==0 )
4818 return 1;
4819
4820 return 0;
4821 }
4822
Command(ESphExprCommand eCmd,void * pArg)4823 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4824 {
4825 if ( eCmd==SPH_EXPR_SET_STRING_POOL )
4826 m_pStrings = (const BYTE*)pArg;
4827 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4828 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
4829 }
4830 };
4831
4832 //////////////////////////////////////////////////////////////////////////
4833
4834 /// generic BITDOT() evaluator
4835 /// first argument is a bit mask and the rest ones are bit weights
4836 /// function returns sum of bits multiplied by their weights
4837 /// BITDOT(5, 11, 33, 55) => 1*11 + 0*33 + 1*55 = 66
4838 /// BITDOT(4, 11, 33, 55) => 0*11 + 0*33 + 1*55 = 55
4839 template < typename T >
4840 class Expr_Bitdot_c : public Expr_ArgVsSet_c<T>
4841 {
4842 protected:
4843 CSphVector<ISphExpr *> m_dBitWeights;
4844
4845 public:
4846 /// take ownership of arg and turn points
Expr_Bitdot_c(const CSphVector<ISphExpr * > & dArgs)4847 explicit Expr_Bitdot_c ( const CSphVector<ISphExpr *> & dArgs )
4848 : Expr_ArgVsSet_c<T> ( dArgs[0] )
4849 {
4850 for ( int i=1; i<dArgs.GetLength(); i++ )
4851 m_dBitWeights.Add ( dArgs[i] );
4852 }
4853
4854 protected:
4855 /// generic evaluate
DoEval(const CSphMatch & tMatch) const4856 virtual T DoEval ( const CSphMatch & tMatch ) const
4857 {
4858 int64_t uArg = this->m_pArg->Int64Eval ( tMatch ); // 'this' fixes gcc braindamage
4859 T tRes = 0;
4860
4861 int iBit = 0;
4862 while ( uArg && iBit<m_dBitWeights.GetLength() )
4863 {
4864 if ( uArg & 1 )
4865 tRes += Expr_ArgVsSet_c<T>::ExprEval ( m_dBitWeights[iBit], tMatch );
4866 uArg >>= 1;
4867 iBit++;
4868 }
4869
4870 return tRes;
4871 }
4872
4873 public:
Eval(const CSphMatch & tMatch) const4874 virtual float Eval ( const CSphMatch & tMatch ) const
4875 {
4876 return (float) DoEval ( tMatch );
4877 }
4878
IntEval(const CSphMatch & tMatch) const4879 virtual int IntEval ( const CSphMatch & tMatch ) const
4880 {
4881 return (int) DoEval ( tMatch );
4882 }
4883
Int64Eval(const CSphMatch & tMatch) const4884 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
4885 {
4886 return (int64_t) DoEval ( tMatch );
4887 }
4888
Command(ESphExprCommand eCmd,void * pArg)4889 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4890 {
4891 this->m_pArg->Command ( eCmd, pArg );
4892 ARRAY_FOREACH ( i, m_dBitWeights )
4893 m_dBitWeights[i]->Command ( eCmd, pArg );
4894 }
4895 };
4896
4897 //////////////////////////////////////////////////////////////////////////
4898
4899 enum GeoFunc_e
4900 {
4901 GEO_HAVERSINE,
4902 GEO_ADAPTIVE
4903 };
4904
4905 typedef float (*Geofunc_fn)( float, float, float, float );
4906
GeodistFn(GeoFunc_e eFunc,bool bDeg)4907 static Geofunc_fn GeodistFn ( GeoFunc_e eFunc, bool bDeg )
4908 {
4909 switch ( 2*eFunc+bDeg )
4910 {
4911 case 2*GEO_HAVERSINE: return &GeodistSphereRad;
4912 case 2*GEO_HAVERSINE+1: return &GeodistSphereDeg;
4913 case 2*GEO_ADAPTIVE: return &GeodistAdaptiveRad;
4914 case 2*GEO_ADAPTIVE+1: return &GeodistAdaptiveDeg;
4915 }
4916 return NULL;
4917 }
4918
Geodist(GeoFunc_e eFunc,bool bDeg,float lat1,float lon1,float lat2,float lon2)4919 static float Geodist ( GeoFunc_e eFunc, bool bDeg, float lat1, float lon1, float lat2, float lon2 )
4920 {
4921 return GeodistFn ( eFunc, bDeg ) ( lat1, lon1, lat2, lon2 );
4922 }
4923
4924 /// geodist() - attr point, constant anchor
4925 class Expr_GeodistAttrConst_c : public ISphExpr
4926 {
4927 public:
Expr_GeodistAttrConst_c(Geofunc_fn pFunc,float fOut,CSphAttrLocator tLat,CSphAttrLocator tLon,float fAnchorLat,float fAnchorLon,int iLat,int iLon)4928 Expr_GeodistAttrConst_c ( Geofunc_fn pFunc, float fOut, CSphAttrLocator tLat, CSphAttrLocator tLon, float fAnchorLat, float fAnchorLon, int iLat, int iLon )
4929 : m_pFunc ( pFunc )
4930 , m_fOut ( fOut )
4931 , m_tLat ( tLat )
4932 , m_tLon ( tLon )
4933 , m_fAnchorLat ( fAnchorLat )
4934 , m_fAnchorLon ( fAnchorLon )
4935 , m_iLat ( iLat )
4936 , m_iLon ( iLon )
4937 {}
4938
Eval(const CSphMatch & tMatch) const4939 virtual float Eval ( const CSphMatch & tMatch ) const
4940 {
4941 return m_fOut*m_pFunc ( tMatch.GetAttrFloat ( m_tLat ), tMatch.GetAttrFloat ( m_tLon ), m_fAnchorLat, m_fAnchorLon );
4942 }
4943
Command(ESphExprCommand eCmd,void * pArg)4944 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4945 {
4946 if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
4947 {
4948 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLat );
4949 static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLon );
4950 }
4951 }
4952
4953 private:
4954 Geofunc_fn m_pFunc;
4955 float m_fOut;
4956 CSphAttrLocator m_tLat;
4957 CSphAttrLocator m_tLon;
4958 float m_fAnchorLat;
4959 float m_fAnchorLon;
4960 int m_iLat;
4961 int m_iLon;
4962 };
4963
4964 /// geodist() - expr point, constant anchor
4965 class Expr_GeodistConst_c: public ISphExpr
4966 {
4967 public:
Expr_GeodistConst_c(Geofunc_fn pFunc,float fOut,ISphExpr * pLat,ISphExpr * pLon,float fAnchorLat,float fAnchorLon)4968 Expr_GeodistConst_c ( Geofunc_fn pFunc, float fOut, ISphExpr * pLat, ISphExpr * pLon, float fAnchorLat, float fAnchorLon )
4969 : m_pFunc ( pFunc )
4970 , m_fOut ( fOut )
4971 , m_pLat ( pLat )
4972 , m_pLon ( pLon )
4973 , m_fAnchorLat ( fAnchorLat )
4974 , m_fAnchorLon ( fAnchorLon )
4975 {}
4976
~Expr_GeodistConst_c()4977 ~Expr_GeodistConst_c ()
4978 {
4979 SafeRelease ( m_pLon );
4980 SafeRelease ( m_pLat );
4981 }
4982
Eval(const CSphMatch & tMatch) const4983 virtual float Eval ( const CSphMatch & tMatch ) const
4984 {
4985 return m_fOut*m_pFunc ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_fAnchorLat, m_fAnchorLon );
4986 }
4987
Command(ESphExprCommand eCmd,void * pArg)4988 virtual void Command ( ESphExprCommand eCmd, void * pArg )
4989 {
4990 m_pLat->Command ( eCmd, pArg );
4991 m_pLon->Command ( eCmd, pArg );
4992 }
4993
4994 private:
4995 Geofunc_fn m_pFunc;
4996 float m_fOut;
4997 ISphExpr * m_pLat;
4998 ISphExpr * m_pLon;
4999 float m_fAnchorLat;
5000 float m_fAnchorLon;
5001 };
5002
5003 /// geodist() - expr point, expr anchor
5004 class Expr_Geodist_c: public ISphExpr
5005 {
5006 public:
Expr_Geodist_c(Geofunc_fn pFunc,float fOut,ISphExpr * pLat,ISphExpr * pLon,ISphExpr * pAnchorLat,ISphExpr * pAnchorLon)5007 Expr_Geodist_c ( Geofunc_fn pFunc, float fOut, ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pAnchorLat, ISphExpr * pAnchorLon )
5008 : m_pFunc ( pFunc )
5009 , m_fOut ( fOut )
5010 , m_pLat ( pLat )
5011 , m_pLon ( pLon )
5012 , m_pAnchorLat ( pAnchorLat )
5013 , m_pAnchorLon ( pAnchorLon )
5014 {}
5015
~Expr_Geodist_c()5016 ~Expr_Geodist_c ()
5017 {
5018 SafeRelease ( m_pAnchorLon );
5019 SafeRelease ( m_pAnchorLat );
5020 SafeRelease ( m_pLon );
5021 SafeRelease ( m_pLat );
5022 }
5023
Eval(const CSphMatch & tMatch) const5024 virtual float Eval ( const CSphMatch & tMatch ) const
5025 {
5026 return m_fOut*m_pFunc ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_pAnchorLat->Eval(tMatch), m_pAnchorLon->Eval(tMatch) );
5027 }
5028
Command(ESphExprCommand eCmd,void * pArg)5029 virtual void Command ( ESphExprCommand eCmd, void * pArg )
5030 {
5031 m_pLat->Command ( eCmd, pArg );
5032 m_pLon->Command ( eCmd, pArg );
5033 m_pAnchorLat->Command ( eCmd, pArg );
5034 m_pAnchorLon->Command ( eCmd, pArg );
5035 }
5036
5037 private:
5038 Geofunc_fn m_pFunc;
5039 float m_fOut;
5040 ISphExpr * m_pLat;
5041 ISphExpr * m_pLon;
5042 ISphExpr * m_pAnchorLat;
5043 ISphExpr * m_pAnchorLon;
5044 };
5045
5046 //////////////////////////////////////////////////////////////////////////
5047
5048 struct GatherArgTypes_t : ISphNoncopyable
5049 {
5050 CSphVector<int> & m_dTypes;
GatherArgTypes_tGatherArgTypes_t5051 explicit GatherArgTypes_t ( CSphVector<int> & dTypes )
5052 : m_dTypes ( dTypes )
5053 {}
CollectGatherArgTypes_t5054 void Collect ( int , const ExprNode_t & tNode )
5055 {
5056 m_dTypes.Add ( tNode.m_iToken );
5057 }
5058 };
5059
GatherArgTypes(int iNode,CSphVector<int> & dTypes)5060 void ExprParser_t::GatherArgTypes ( int iNode, CSphVector<int> & dTypes )
5061 {
5062 GatherArgTypes_t tCollector ( dTypes );
5063 GatherArgT ( iNode, tCollector );
5064 }
5065
5066 struct GatherArgNodes_t : ISphNoncopyable
5067 {
5068 CSphVector<int> & m_dNodes;
GatherArgNodes_tGatherArgNodes_t5069 explicit GatherArgNodes_t ( CSphVector<int> & dNodes )
5070 : m_dNodes ( dNodes )
5071 {}
CollectGatherArgNodes_t5072 void Collect ( int iNode, const ExprNode_t & )
5073 {
5074 m_dNodes.Add ( iNode );
5075 }
5076 };
5077
GatherArgNodes(int iNode,CSphVector<int> & dNodes)5078 void ExprParser_t::GatherArgNodes ( int iNode, CSphVector<int> & dNodes )
5079 {
5080 GatherArgNodes_t tCollector ( dNodes );
5081 GatherArgT ( iNode, tCollector );
5082 }
5083
5084 struct GatherArgReturnTypes_t : ISphNoncopyable
5085 {
5086 CSphVector<ESphAttr> & m_dTypes;
GatherArgReturnTypes_tGatherArgReturnTypes_t5087 explicit GatherArgReturnTypes_t ( CSphVector<ESphAttr> & dTypes )
5088 : m_dTypes ( dTypes )
5089 {}
CollectGatherArgReturnTypes_t5090 void Collect ( int , const ExprNode_t & tNode )
5091 {
5092 m_dTypes.Add ( tNode.m_eRetType );
5093 }
5094 };
5095
GatherArgRetTypes(int iNode,CSphVector<ESphAttr> & dTypes)5096 void ExprParser_t::GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes )
5097 {
5098 GatherArgReturnTypes_t tCollector ( dTypes );
5099 GatherArgT ( iNode, tCollector );
5100 }
5101
5102 template < typename T >
GatherArgT(int iNode,T & FUNCTOR)5103 void ExprParser_t::GatherArgT ( int iNode, T & FUNCTOR )
5104 {
5105 if ( iNode<0 )
5106 return;
5107
5108 m_dGatherStack.Resize ( 0 );
5109 StackNode_t & tInitial = m_dGatherStack.Add();
5110 const ExprNode_t & tNode = m_dNodes[iNode];
5111 tInitial.m_iNode = iNode;
5112 tInitial.m_iLeft = tNode.m_iLeft;
5113 tInitial.m_iRight = tNode.m_iRight;
5114
5115 while ( m_dGatherStack.GetLength()>0 )
5116 {
5117 StackNode_t & tCur = m_dGatherStack.Last();
5118 const ExprNode_t & tNode = m_dNodes[tCur.m_iNode];
5119 if ( tNode.m_iToken!=',' )
5120 {
5121 FUNCTOR.Collect ( tCur.m_iNode, tNode );
5122 m_dGatherStack.Pop();
5123 continue;
5124 }
5125 if ( tCur.m_iLeft==-1 && tCur.m_iRight==-1 )
5126 {
5127 m_dGatherStack.Pop();
5128 continue;
5129 }
5130
5131 int iChild = -1;
5132 if ( tCur.m_iLeft>=0 )
5133 {
5134 iChild = tCur.m_iLeft;
5135 tCur.m_iLeft = -1;
5136 } else if ( tCur.m_iRight>=0 )
5137 {
5138 iChild = tCur.m_iRight;
5139 tCur.m_iRight = -1;
5140 }
5141
5142 assert ( iChild>=0 );
5143 const ExprNode_t & tChild = m_dNodes[iChild];
5144 StackNode_t & tNext = m_dGatherStack.Add();
5145 tNext.m_iNode = iChild;
5146 tNext.m_iLeft = tChild.m_iLeft;
5147 tNext.m_iRight = tChild.m_iRight;
5148 }
5149 }
5150
CheckForConstSet(int iArgsNode,int iSkip)5151 bool ExprParser_t::CheckForConstSet ( int iArgsNode, int iSkip )
5152 {
5153 CSphVector<int> dTypes;
5154 GatherArgTypes ( iArgsNode, dTypes );
5155
5156 for ( int i=iSkip; i<dTypes.GetLength(); i++ )
5157 if ( dTypes[i]!=TOK_CONST_INT && dTypes[i]!=TOK_CONST_FLOAT && dTypes[i]!=TOK_MAP_ARG )
5158 return false;
5159 return true;
5160 }
5161
5162
5163 template < typename T >
WalkTree(int iRoot,T & FUNCTOR)5164 void ExprParser_t::WalkTree ( int iRoot, T & FUNCTOR )
5165 {
5166 if ( iRoot>=0 )
5167 {
5168 const ExprNode_t & tNode = m_dNodes[iRoot];
5169 FUNCTOR.Enter ( tNode, m_dNodes );
5170 WalkTree ( tNode.m_iLeft, FUNCTOR );
5171 WalkTree ( tNode.m_iRight, FUNCTOR );
5172 FUNCTOR.Exit ( tNode );
5173 }
5174 }
5175
5176
CreateIntervalNode(int iArgsNode,CSphVector<ISphExpr * > & dArgs)5177 ISphExpr * ExprParser_t::CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
5178 {
5179 assert ( dArgs.GetLength()>=2 );
5180
5181 CSphVector<ESphAttr> dTypes;
5182 GatherArgRetTypes ( iArgsNode, dTypes );
5183
5184 // force type conversion, where possible
5185 if ( dTypes[0]==SPH_ATTR_JSON_FIELD )
5186 dArgs[0] = new Expr_JsonFieldConv_c ( dArgs[0] );
5187
5188 bool bConst = CheckForConstSet ( iArgsNode, 1 );
5189 ESphAttr eAttrType = m_dNodes[iArgsNode].m_eArgType;
5190 if ( bConst )
5191 {
5192 switch ( eAttrType )
5193 {
5194 case SPH_ATTR_INTEGER: return new Expr_IntervalConst_c<int> ( dArgs ); break;
5195 case SPH_ATTR_BIGINT: return new Expr_IntervalConst_c<int64_t> ( dArgs ); break;
5196 default: return new Expr_IntervalConst_c<float> ( dArgs ); break;
5197 }
5198 } else
5199 {
5200 switch ( eAttrType )
5201 {
5202 case SPH_ATTR_INTEGER: return new Expr_Interval_c<int> ( dArgs ); break;
5203 case SPH_ATTR_BIGINT: return new Expr_Interval_c<int64_t> ( dArgs ); break;
5204 default: return new Expr_Interval_c<float> ( dArgs ); break;
5205 }
5206 }
5207 #if !USE_WINDOWS
5208 return NULL;
5209 #endif
5210 }
5211
5212
CreateInNode(int iNode)5213 ISphExpr * ExprParser_t::CreateInNode ( int iNode )
5214 {
5215 const ExprNode_t & tLeft = m_dNodes[m_dNodes[iNode].m_iLeft];
5216 const ExprNode_t & tRight = m_dNodes[m_dNodes[iNode].m_iRight];
5217
5218 switch ( tRight.m_iToken )
5219 {
5220 // create IN(arg,constlist)
5221 case TOK_CONST_LIST:
5222 switch ( tLeft.m_iToken )
5223 {
5224 case TOK_ATTR_MVA32:
5225 return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
5226 case TOK_ATTR_MVA64:
5227 return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
5228 case TOK_ATTR_STRING:
5229 return new Expr_StrIn_c ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL, m_eCollation );
5230 case TOK_ATTR_JSON:
5231 return new Expr_JsonFieldIn_c ( tRight.m_pConsts, NULL, CreateTree ( m_dNodes [ iNode ].m_iLeft ) );
5232 default:
5233 {
5234 ISphExpr * pArg = CreateTree ( m_dNodes[iNode].m_iLeft );
5235 switch ( WidestType ( tLeft.m_eRetType, tRight.m_pConsts->m_eRetType ) )
5236 {
5237 case SPH_ATTR_INTEGER: return new Expr_In_c<int> ( pArg, tRight.m_pConsts ); break;
5238 case SPH_ATTR_BIGINT: return new Expr_In_c<int64_t> ( pArg, tRight.m_pConsts ); break;
5239 default: return new Expr_In_c<float> ( pArg, tRight.m_pConsts ); break;
5240 }
5241 }
5242 }
5243 break;
5244
5245 // create IN(arg,uservar)
5246 case TOK_USERVAR:
5247 {
5248 if ( !g_pUservarsHook )
5249 {
5250 m_sCreateError.SetSprintf ( "internal error: no uservars hook" );
5251 return NULL;
5252 }
5253
5254 UservarIntSet_c * pUservar = g_pUservarsHook ( m_dUservars[(int)tRight.m_iConst] );
5255 if ( !pUservar )
5256 {
5257 m_sCreateError.SetSprintf ( "undefined user variable '%s'", m_dUservars[(int)tRight.m_iConst].cstr() );
5258 return NULL;
5259 }
5260
5261 switch ( tLeft.m_iToken )
5262 {
5263 case TOK_ATTR_MVA32:
5264 return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
5265 case TOK_ATTR_MVA64:
5266 return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
5267 case TOK_ATTR_STRING:
5268 return new Expr_StrIn_c ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar, m_eCollation );
5269 case TOK_ATTR_JSON:
5270 return new Expr_JsonFieldIn_c ( NULL, pUservar, CreateTree ( m_dNodes[iNode].m_iLeft ) );
5271 default:
5272 return new Expr_InUservar_c ( CreateTree ( m_dNodes[iNode].m_iLeft ), pUservar );
5273 }
5274 break;
5275 }
5276
5277 // oops, unhandled case
5278 default:
5279 m_sCreateError = "IN() arguments must be constants (except the 1st one)";
5280 return NULL;
5281 }
5282 }
5283
5284
CreateLengthNode(const ExprNode_t & tNode,ISphExpr * pLeft)5285 ISphExpr * ExprParser_t::CreateLengthNode ( const ExprNode_t & tNode, ISphExpr * pLeft )
5286 {
5287 const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
5288 switch ( tLeft.m_iToken )
5289 {
5290 case TOK_ATTR_MVA32:
5291 case TOK_ATTR_MVA64:
5292 return new Expr_MVALength_c ( tLeft.m_tLocator, tLeft.m_iLocator, tLeft.m_iToken==TOK_ATTR_MVA64 );
5293 case TOK_ATTR_JSON:
5294 return new Expr_JsonFieldLength_c ( pLeft );
5295 default:
5296 m_sCreateError = "LENGTH() argument must be MVA or JSON field";
5297 return NULL;
5298 }
5299 }
5300
5301
CreateGeodistNode(int iArgs)5302 ISphExpr * ExprParser_t::CreateGeodistNode ( int iArgs )
5303 {
5304 CSphVector<int> dArgs;
5305 GatherArgNodes ( iArgs, dArgs );
5306 assert ( dArgs.GetLength()==4 || dArgs.GetLength()==5 );
5307
5308 float fOut = 1.0f; // result scale, defaults to out=meters
5309 bool bDeg = false; // arg units, defaults to in=radians
5310 GeoFunc_e eMethod = GEO_ADAPTIVE; // geodist function to use, defaults to adaptive
5311
5312 if ( dArgs.GetLength()==5 )
5313 {
5314 assert ( m_dNodes [ dArgs[4] ].m_eRetType==SPH_ATTR_MAPARG );
5315 CSphVector<CSphNamedVariant> & dOpts = m_dNodes [ dArgs[4] ].m_pMapArg->m_dPairs;
5316
5317 // FIXME! handle errors in options somehow?
5318 ARRAY_FOREACH ( i, dOpts )
5319 {
5320 const CSphNamedVariant & t = dOpts[i];
5321 if ( t.m_sKey=="in" )
5322 {
5323 if ( t.m_sValue=="deg" || t.m_sValue=="degrees" )
5324 bDeg = true;
5325 else if ( t.m_sValue=="rad" || t.m_sValue=="radians" )
5326 bDeg = false;
5327
5328 } else if ( t.m_sKey=="out" )
5329 {
5330 if ( t.m_sValue=="km" || t.m_sValue=="kilometers" )
5331 fOut = 1.0f / 1000.0f;
5332 else if ( t.m_sValue=="mi" || t.m_sValue=="miles" )
5333 fOut = 1.0f / 1609.34f;
5334 else if ( t.m_sValue=="ft" || t.m_sValue=="feet" )
5335 fOut = 1.0f / 0.3048f;
5336 else if ( t.m_sValue=="m" || t.m_sValue=="meters" )
5337 fOut = 1.0f;
5338 } else if ( t.m_sKey=="method" )
5339 {
5340 if ( t.m_sValue=="haversine" )
5341 eMethod = GEO_HAVERSINE;
5342 else if ( t.m_sValue=="adaptive" )
5343 eMethod = GEO_ADAPTIVE;
5344 }
5345 }
5346 }
5347
5348 bool bConst1 = ( IsConst ( &m_dNodes[dArgs[0]] ) && IsConst ( &m_dNodes[dArgs[1]] ) );
5349 bool bConst2 = ( IsConst ( &m_dNodes[dArgs[2]] ) && IsConst ( &m_dNodes[dArgs[3]] ) );
5350
5351 if ( bConst1 && bConst2 )
5352 {
5353 float t[4];
5354 for ( int i=0; i<4; i++ )
5355 t[i] = FloatVal ( &m_dNodes[dArgs[i]] );
5356 return new Expr_GetConst_c ( fOut*Geodist ( eMethod, bDeg, t[0], t[1], t[2], t[3] ) );
5357 }
5358
5359 if ( bConst1 )
5360 {
5361 Swap ( dArgs[0], dArgs[2] );
5362 Swap ( dArgs[1], dArgs[3] );
5363 Swap ( bConst1, bConst2 );
5364 }
5365
5366 if ( bConst2 )
5367 {
5368 // constant anchor
5369 if ( m_dNodes[dArgs[0]].m_iToken==TOK_ATTR_FLOAT && m_dNodes[dArgs[1]].m_iToken==TOK_ATTR_FLOAT )
5370 {
5371 // attr point
5372 return new Expr_GeodistAttrConst_c ( GeodistFn ( eMethod, bDeg ), fOut,
5373 m_dNodes[dArgs[0]].m_tLocator, m_dNodes[dArgs[1]].m_tLocator,
5374 FloatVal ( &m_dNodes[dArgs[2]] ), FloatVal ( &m_dNodes[dArgs[3]] ),
5375 m_dNodes[dArgs[0]].m_iLocator, m_dNodes[dArgs[1]].m_iLocator );
5376 } else
5377 {
5378 // expr point
5379 return new Expr_GeodistConst_c ( GeodistFn ( eMethod, bDeg ), fOut,
5380 CreateTree ( dArgs[0] ), CreateTree ( dArgs[1] ),
5381 FloatVal ( &m_dNodes[dArgs[2]] ), FloatVal ( &m_dNodes[dArgs[3]] ) );
5382 }
5383 }
5384
5385 // four expressions
5386 CSphVector<ISphExpr *> dExpr;
5387 FoldArglist ( CreateTree ( iArgs ), dExpr );
5388 assert ( dExpr.GetLength()==4 );
5389 return new Expr_Geodist_c ( GeodistFn ( eMethod, bDeg ), fOut, dExpr[0], dExpr[1], dExpr[2], dExpr[3] );
5390 }
5391
5392
CreatePFNode(int iArg)5393 ISphExpr * ExprParser_t::CreatePFNode ( int iArg )
5394 {
5395 m_eEvalStage = SPH_EVAL_FINAL;
5396
5397 DWORD uNodeFactorFlags = SPH_FACTOR_ENABLE | SPH_FACTOR_CALC_ATC;
5398
5399 CSphVector<int> dArgs;
5400 GatherArgNodes ( iArg, dArgs );
5401 assert ( dArgs.GetLength()==0 || dArgs.GetLength()==1 );
5402
5403 bool bNoATC = false;
5404 bool bJsonOut = false;
5405
5406 if ( dArgs.GetLength()==1 )
5407 {
5408 assert ( m_dNodes[dArgs[0]].m_eRetType==SPH_ATTR_MAPARG );
5409 CSphVector<CSphNamedVariant> & dOpts = m_dNodes[dArgs[0]].m_pMapArg->m_dPairs;
5410
5411 ARRAY_FOREACH ( i, dOpts )
5412 {
5413 if ( dOpts[i].m_sKey=="no_atc" && dOpts[i].m_iValue>0)
5414 bNoATC = true;
5415 else if ( dOpts[i].m_sKey=="json" && dOpts[i].m_iValue>0 )
5416 bJsonOut = true;
5417 }
5418 }
5419
5420 if ( bNoATC )
5421 uNodeFactorFlags &= ~SPH_FACTOR_CALC_ATC;
5422 if ( bJsonOut )
5423 uNodeFactorFlags |= SPH_FACTOR_JSON_OUT;
5424
5425 m_uPackedFactorFlags |= uNodeFactorFlags;
5426
5427 return new Expr_GetPackedFactors_c();
5428 }
5429
5430
5431
CreateBitdotNode(int iArgsNode,CSphVector<ISphExpr * > & dArgs)5432 ISphExpr * ExprParser_t::CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
5433 {
5434 assert ( dArgs.GetLength()>=1 );
5435
5436 ESphAttr eAttrType = m_dNodes[iArgsNode].m_eRetType;
5437 switch ( eAttrType )
5438 {
5439 case SPH_ATTR_INTEGER: return new Expr_Bitdot_c<int> ( dArgs ); break;
5440 case SPH_ATTR_BIGINT: return new Expr_Bitdot_c<int64_t> ( dArgs ); break;
5441 default: return new Expr_Bitdot_c<float> ( dArgs ); break;
5442 }
5443 }
5444
5445
CreateAggregateNode(const ExprNode_t & tNode,ESphAggrFunc eFunc,ISphExpr * pLeft)5446 ISphExpr * ExprParser_t::CreateAggregateNode ( const ExprNode_t & tNode, ESphAggrFunc eFunc, ISphExpr * pLeft )
5447 {
5448 const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
5449 switch ( tLeft.m_iToken )
5450 {
5451 case TOK_ATTR_JSON: return new Expr_JsonFieldAggr_c ( pLeft, eFunc );
5452 case TOK_ATTR_MVA32: return new Expr_MVAAggr_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
5453 case TOK_ATTR_MVA64: return new Expr_MVAAggr_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
5454 default: return NULL;
5455 }
5456 }
5457
5458
FixupIterators(int iNode,const char * sKey,SphAttr_t * pAttr)5459 void ExprParser_t::FixupIterators ( int iNode, const char * sKey, SphAttr_t * pAttr )
5460 {
5461 if ( iNode==-1 )
5462 return;
5463
5464 ExprNode_t & tNode = m_dNodes[iNode];
5465
5466 if ( tNode.m_iToken==TOK_IDENT && !strcmp ( sKey, tNode.m_sIdent ) )
5467 {
5468 tNode.m_iToken = TOK_ITERATOR;
5469 tNode.m_pAttr = pAttr;
5470 }
5471
5472 FixupIterators ( tNode.m_iLeft, sKey, pAttr );
5473 FixupIterators ( tNode.m_iRight, sKey, pAttr );
5474 }
5475
5476
CreateForInNode(int iNode)5477 ISphExpr * ExprParser_t::CreateForInNode ( int iNode )
5478 {
5479 ExprNode_t & tNode = m_dNodes[iNode];
5480
5481 int iFunc = tNode.m_iFunc;
5482 int iExprNode = tNode.m_iLeft;
5483 int iNameNode = tNode.m_iRight;
5484 int iDataNode = m_dNodes[iNameNode].m_iLeft;
5485
5486 Expr_ForIn_c * pFunc = new Expr_ForIn_c ( CreateTree ( iDataNode ), iFunc==FUNC_ALL, iFunc==FUNC_INDEXOF );
5487
5488 FixupIterators ( iExprNode, m_dNodes[iNameNode].m_sIdent, pFunc->GetRef() );
5489 pFunc->SetExpr ( CreateTree ( iExprNode ) );
5490
5491 return pFunc;
5492 }
5493
5494 //////////////////////////////////////////////////////////////////////////
5495
yylex(YYSTYPE * lvalp,ExprParser_t * pParser)5496 int yylex ( YYSTYPE * lvalp, ExprParser_t * pParser )
5497 {
5498 return pParser->GetToken ( lvalp );
5499 }
5500
yyerror(ExprParser_t * pParser,const char * sMessage)5501 void yyerror ( ExprParser_t * pParser, const char * sMessage )
5502 {
5503 pParser->m_sParserError.SetSprintf ( "Sphinx expr: %s near '%s'", sMessage, pParser->m_pLastTokenStart );
5504 }
5505
5506 #if USE_WINDOWS
5507 #pragma warning(push,1)
5508 #endif
5509
5510 #ifdef CMAKE_GENERATED_GRAMMAR
5511 #include "bissphinxexpr.c"
5512 #else
5513 #include "yysphinxexpr.c"
5514 #endif
5515
5516 #if USE_WINDOWS
5517 #pragma warning(pop)
5518 #endif
5519
5520 //////////////////////////////////////////////////////////////////////////
5521
~ExprParser_t()5522 ExprParser_t::~ExprParser_t ()
5523 {
5524 // i kinda own those things
5525 ARRAY_FOREACH ( i, m_dNodes )
5526 {
5527 if ( m_dNodes[i].m_iToken==TOK_CONST_LIST )
5528 SafeDelete ( m_dNodes[i].m_pConsts );
5529 if ( m_dNodes[i].m_iToken==TOK_MAP_ARG )
5530 SafeDelete ( m_dNodes[i].m_pMapArg );
5531 }
5532
5533 // free any UDF calls that weren't taken over
5534 ARRAY_FOREACH ( i, m_dUdfCalls )
5535 SafeDelete ( m_dUdfCalls[i] );
5536
5537 // free temp map arguments storage
5538 ARRAY_FOREACH ( i, m_dIdents )
5539 SafeDeleteArray ( m_dIdents[i] );
5540 }
5541
GetWidestRet(int iLeft,int iRight)5542 ESphAttr ExprParser_t::GetWidestRet ( int iLeft, int iRight )
5543 {
5544 ESphAttr uLeftType = ( iLeft<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iLeft].m_eRetType;
5545 ESphAttr uRightType = ( iRight<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iRight].m_eRetType;
5546
5547 ESphAttr uRes = SPH_ATTR_FLOAT; // default is float
5548 if ( ( uLeftType==SPH_ATTR_INTEGER || uLeftType==SPH_ATTR_BIGINT ) &&
5549 ( uRightType==SPH_ATTR_INTEGER || uRightType==SPH_ATTR_BIGINT ) )
5550 {
5551 // both types are integer (int32 or int64), compute in integers
5552 uRes = ( uLeftType==SPH_ATTR_INTEGER && uRightType==SPH_ATTR_INTEGER )
5553 ? SPH_ATTR_INTEGER
5554 : SPH_ATTR_BIGINT;
5555 }
5556
5557 // if json vs numeric then return numeric type (for the autoconversion)
5558 if ( uLeftType==SPH_ATTR_JSON_FIELD && IsNumeric ( uRightType ) )
5559 uRes = uRightType;
5560 else if ( uRightType==SPH_ATTR_JSON_FIELD && IsNumeric ( uLeftType ) )
5561 uRes = uLeftType;
5562
5563 return uRes;
5564 }
5565
AddNodeInt(int64_t iValue)5566 int ExprParser_t::AddNodeInt ( int64_t iValue )
5567 {
5568 ExprNode_t & tNode = m_dNodes.Add ();
5569 tNode.m_iToken = TOK_CONST_INT;
5570 tNode.m_eRetType = GetIntType ( iValue );
5571 tNode.m_iConst = iValue;
5572 return m_dNodes.GetLength()-1;
5573 }
5574
AddNodeFloat(float fValue)5575 int ExprParser_t::AddNodeFloat ( float fValue )
5576 {
5577 ExprNode_t & tNode = m_dNodes.Add ();
5578 tNode.m_iToken = TOK_CONST_FLOAT;
5579 tNode.m_eRetType = SPH_ATTR_FLOAT;
5580 tNode.m_fConst = fValue;
5581 return m_dNodes.GetLength()-1;
5582 }
5583
AddNodeString(int64_t iValue)5584 int ExprParser_t::AddNodeString ( int64_t iValue )
5585 {
5586 ExprNode_t & tNode = m_dNodes.Add ();
5587 tNode.m_iToken = TOK_CONST_STRING;
5588 tNode.m_eRetType = SPH_ATTR_STRING;
5589 tNode.m_iConst = iValue;
5590 return m_dNodes.GetLength()-1;
5591 }
5592
AddNodeAttr(int iTokenType,uint64_t uAttrLocator)5593 int ExprParser_t::AddNodeAttr ( int iTokenType, uint64_t uAttrLocator )
5594 {
5595 assert ( iTokenType==TOK_ATTR_INT || iTokenType==TOK_ATTR_BITS || iTokenType==TOK_ATTR_FLOAT
5596 || iTokenType==TOK_ATTR_MVA32 || iTokenType==TOK_ATTR_MVA64 || iTokenType==TOK_ATTR_STRING
5597 || iTokenType==TOK_ATTR_FACTORS || iTokenType==TOK_ATTR_JSON );
5598 ExprNode_t & tNode = m_dNodes.Add ();
5599 tNode.m_iToken = iTokenType;
5600 sphUnpackAttrLocator ( uAttrLocator, &tNode );
5601
5602 if ( iTokenType==TOK_ATTR_FLOAT ) tNode.m_eRetType = SPH_ATTR_FLOAT;
5603 else if ( iTokenType==TOK_ATTR_MVA32 ) tNode.m_eRetType = SPH_ATTR_UINT32SET;
5604 else if ( iTokenType==TOK_ATTR_MVA64 ) tNode.m_eRetType = SPH_ATTR_INT64SET;
5605 else if ( iTokenType==TOK_ATTR_STRING ) tNode.m_eRetType = SPH_ATTR_STRING;
5606 else if ( iTokenType==TOK_ATTR_FACTORS ) tNode.m_eRetType = SPH_ATTR_FACTORS;
5607 else if ( iTokenType==TOK_ATTR_JSON ) tNode.m_eRetType = SPH_ATTR_JSON_FIELD;
5608 else if ( tNode.m_tLocator.m_iBitCount>32 ) tNode.m_eRetType = SPH_ATTR_BIGINT;
5609 else tNode.m_eRetType = SPH_ATTR_INTEGER;
5610 return m_dNodes.GetLength()-1;
5611 }
5612
AddNodeID()5613 int ExprParser_t::AddNodeID ()
5614 {
5615 ExprNode_t & tNode = m_dNodes.Add ();
5616 tNode.m_iToken = TOK_ID;
5617 tNode.m_eRetType = USE_64BIT ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER;
5618 return m_dNodes.GetLength()-1;
5619 }
5620
AddNodeWeight()5621 int ExprParser_t::AddNodeWeight ()
5622 {
5623 ExprNode_t & tNode = m_dNodes.Add ();
5624 tNode.m_iToken = TOK_WEIGHT;
5625 tNode.m_eRetType = SPH_ATTR_INTEGER;
5626 return m_dNodes.GetLength()-1;
5627 }
5628
AddNodeOp(int iOp,int iLeft,int iRight)5629 int ExprParser_t::AddNodeOp ( int iOp, int iLeft, int iRight )
5630 {
5631 ExprNode_t & tNode = m_dNodes.Add ();
5632 tNode.m_iToken = iOp;
5633
5634 // deduce type
5635 tNode.m_eRetType = SPH_ATTR_FLOAT; // default to float
5636 if ( iOp==TOK_NEG )
5637 {
5638 // NEG just inherits the type
5639 tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
5640 tNode.m_eRetType = tNode.m_eArgType;
5641
5642 } else if ( iOp==TOK_NOT )
5643 {
5644 // NOT result is integer, and its argument must be integer
5645 tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
5646 tNode.m_eRetType = SPH_ATTR_INTEGER;
5647 if (!( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
5648 {
5649 m_sParserError.SetSprintf ( "NOT argument must be integer" );
5650 return -1;
5651 }
5652
5653 } else if ( iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE
5654 || iOp=='<' || iOp=='>' || iOp==TOK_AND || iOp==TOK_OR
5655 || iOp=='+' || iOp=='-' || iOp=='*' || iOp==','
5656 || iOp=='&' || iOp=='|' || iOp=='%'
5657 || iOp==TOK_IS_NULL || iOp==TOK_IS_NOT_NULL )
5658 {
5659 tNode.m_eArgType = GetWidestRet ( iLeft, iRight );
5660
5661 // arithmetical operations return arg type, logical return int
5662 tNode.m_eRetType = ( iOp=='+' || iOp=='-' || iOp=='*' || iOp==',' || iOp=='&' || iOp=='|' || iOp=='%' )
5663 ? tNode.m_eArgType
5664 : SPH_ATTR_INTEGER;
5665
5666 // both logical and bitwise AND/OR can only be over ints
5667 if ( ( iOp==TOK_AND || iOp==TOK_OR || iOp=='&' || iOp=='|' )
5668 && !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
5669 {
5670 m_sParserError.SetSprintf ( "%s arguments must be integer", ( iOp==TOK_AND || iOp=='&' ) ? "AND" : "OR" );
5671 return -1;
5672 }
5673
5674 // MOD can only be over ints
5675 if ( iOp=='%'
5676 && !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
5677 {
5678 m_sParserError.SetSprintf ( "MOD arguments must be integer" );
5679 return -1;
5680 }
5681
5682 } else
5683 {
5684 // check for unknown op
5685 assert ( iOp=='/' && "unknown op in AddNodeOp() type deducer" );
5686 }
5687
5688 tNode.m_iArgs = 0;
5689 if ( iOp==',' )
5690 {
5691 if ( iLeft>=0 ) tNode.m_iArgs += ( m_dNodes[iLeft].m_iToken==',' ) ? m_dNodes[iLeft].m_iArgs : 1;
5692 if ( iRight>=0 ) tNode.m_iArgs += ( m_dNodes[iRight].m_iToken==',' ) ? m_dNodes[iRight].m_iArgs : 1;
5693 }
5694
5695 // argument type conversion for functions like INDEXOF(), ALL() and ANY()
5696 // we need no conversion for operands of comma!
5697 if ( iOp!=',' && iLeft>=0 && iRight>=0 )
5698 {
5699 if ( m_dNodes[iRight].m_eRetType==SPH_ATTR_STRING && m_dNodes[iLeft].m_iToken==TOK_IDENT )
5700 m_dNodes[iLeft].m_eRetType = SPH_ATTR_STRING;
5701 else if ( m_dNodes[iLeft].m_eRetType==SPH_ATTR_STRING && m_dNodes[iRight].m_iToken==TOK_IDENT )
5702 m_dNodes[iRight].m_eRetType = SPH_ATTR_STRING;
5703 }
5704
5705 tNode.m_iLeft = iLeft;
5706 tNode.m_iRight = iRight;
5707 return m_dNodes.GetLength()-1;
5708 }
5709
5710
AddNodeFunc(int iFunc,int iFirst,int iSecond,int iThird,int iFourth)5711 int ExprParser_t::AddNodeFunc ( int iFunc, int iFirst, int iSecond, int iThird, int iFourth )
5712 {
5713 // regular case, iFirst is entire arglist, iSecond is -1
5714 // special case for IN(), iFirst is arg, iSecond is constlist
5715 // special case for REMAP(), iFirst and iSecond are expressions, iThird and iFourth are constlists
5716 assert ( iFunc>=0 && iFunc< int ( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0]) ) );
5717 Func_e eFunc = (Func_e)iFunc;
5718 assert ( g_dFuncs [ iFunc ].m_eFunc==eFunc );
5719 const char * sFuncName = g_dFuncs [ iFunc ].m_sName;
5720
5721 // check args count
5722 if ( iSecond<0 || eFunc==FUNC_IN )
5723 {
5724 int iExpectedArgc = g_dFuncs [ iFunc ].m_iArgs;
5725 int iArgc = 0;
5726 if ( iFirst>=0 )
5727 iArgc = ( m_dNodes [ iFirst ].m_iToken==',' ) ? m_dNodes [ iFirst ].m_iArgs : 1;
5728 if ( iExpectedArgc<0 )
5729 {
5730 if ( iArgc<-iExpectedArgc )
5731 {
5732 m_sParserError.SetSprintf ( "%s() called with %d args, at least %d args expected", sFuncName, iArgc, -iExpectedArgc );
5733 return -1;
5734 }
5735 } else if ( iArgc!=iExpectedArgc )
5736 {
5737 m_sParserError.SetSprintf ( "%s() called with %d args, %d args expected", sFuncName, iArgc, iExpectedArgc );
5738 return -1;
5739 }
5740 }
5741
5742 // check arg types
5743 //
5744 // check for string args
5745 // most builtin functions take numeric args only
5746 bool bGotString = false, bGotMva = false;
5747 CSphVector<ESphAttr> dRetTypes;
5748 if ( iSecond<0 )
5749 {
5750 GatherArgRetTypes ( iFirst, dRetTypes );
5751 ARRAY_FOREACH ( i, dRetTypes )
5752 {
5753 bGotString |= ( dRetTypes[i]==SPH_ATTR_STRING );
5754 bGotMva |= ( dRetTypes[i]==SPH_ATTR_UINT32SET || dRetTypes[i]==SPH_ATTR_INT64SET );
5755 }
5756 }
5757 if ( bGotString && !( eFunc==FUNC_CRC32 || eFunc==FUNC_EXIST || eFunc==FUNC_POLY2D || eFunc==FUNC_GEOPOLY2D ) )
5758 {
5759 m_sParserError.SetSprintf ( "%s() arguments can not be string", sFuncName );
5760 return -1;
5761 }
5762 if ( bGotMva && !( eFunc==FUNC_IN || eFunc==FUNC_TO_STRING || eFunc==FUNC_LENGTH || eFunc==FUNC_LEAST || eFunc==FUNC_GREATEST ) )
5763 {
5764 m_sParserError.SetSprintf ( "%s() arguments can not be MVA", sFuncName );
5765 return -1;
5766 }
5767
5768 // check that first BITDOT arg is integer or bigint
5769 if ( eFunc==FUNC_BITDOT )
5770 {
5771 int iLeftmost = iFirst;
5772 while ( m_dNodes [ iLeftmost ].m_iToken==',' )
5773 iLeftmost = m_dNodes [ iLeftmost ].m_iLeft;
5774
5775 ESphAttr eArg = m_dNodes [ iLeftmost ].m_eRetType;
5776 if ( eArg!=SPH_ATTR_INTEGER && eArg!=SPH_ATTR_BIGINT )
5777 {
5778 m_sParserError.SetSprintf ( "first BITDOT() argument must be integer" );
5779 return -1;
5780 }
5781 }
5782
5783 if ( eFunc==FUNC_EXIST )
5784 {
5785 int iExistLeft = m_dNodes [ iFirst ].m_iLeft;
5786 int iExistRight = m_dNodes [ iFirst ].m_iRight;
5787 bool bIsLeftGood = ( m_dNodes [ iExistLeft ].m_eRetType==SPH_ATTR_STRING );
5788 ESphAttr eRight = m_dNodes [ iExistRight ].m_eRetType;
5789 bool bIsRightGood = ( eRight==SPH_ATTR_INTEGER || eRight==SPH_ATTR_TIMESTAMP || eRight==SPH_ATTR_BOOL
5790 || eRight==SPH_ATTR_FLOAT || eRight==SPH_ATTR_BIGINT );
5791
5792 if ( !bIsLeftGood || !bIsRightGood )
5793 {
5794 if ( bIsRightGood )
5795 m_sParserError.SetSprintf ( "first EXIST() argument must be string" );
5796 else
5797 m_sParserError.SetSprintf ( "ill-formed EXIST" );
5798 return -1;
5799 }
5800 }
5801
5802
5803 // check that first SINT or timestamp family arg is integer
5804 if ( eFunc==FUNC_SINT || eFunc==FUNC_DAY || eFunc==FUNC_MONTH || eFunc==FUNC_YEAR || eFunc==FUNC_YEARMONTH || eFunc==FUNC_YEARMONTHDAY
5805 || eFunc==FUNC_FIBONACCI )
5806 {
5807 assert ( iFirst>=0 );
5808 if ( m_dNodes [ iFirst ].m_eRetType!=SPH_ATTR_INTEGER )
5809 {
5810 m_sParserError.SetSprintf ( "%s() argument must be integer", sFuncName );
5811 return -1;
5812 }
5813 }
5814
5815 // check that CONTAINS args are poly, float, float
5816 if ( eFunc==FUNC_CONTAINS )
5817 {
5818 assert ( dRetTypes.GetLength()==3 );
5819 if ( dRetTypes[0]!=SPH_ATTR_POLY2D )
5820 {
5821 m_sParserError.SetSprintf ( "1st CONTAINS() argument must be a 2D polygon (see POLY2D)" );
5822 return -1;
5823 }
5824 if ( !IsNumeric ( dRetTypes[1] ) || !IsNumeric ( dRetTypes[2] ) )
5825 {
5826 m_sParserError.SetSprintf ( "2nd and 3rd CONTAINS() arguments must be numeric" );
5827 return -1;
5828 }
5829 }
5830
5831 // check POLY2D args
5832 if ( eFunc==FUNC_POLY2D || eFunc==FUNC_GEOPOLY2D )
5833 {
5834 if ( dRetTypes.GetLength()==1 )
5835 {
5836 // handle 1 arg version, POLY2D(string-attr)
5837 if ( dRetTypes[0]!=SPH_ATTR_STRING )
5838 {
5839 m_sParserError.SetSprintf ( "%s() argument must be a string attribute", sFuncName );
5840 return -1;
5841 }
5842 } else if ( dRetTypes.GetLength()<6 )
5843 {
5844 // handle 2..5 arg versions, invalid
5845 m_sParserError.SetSprintf ( "bad %s() argument count, must be either 1 (string) or 6+ (x/y pairs list)", sFuncName );
5846 return -1;
5847
5848 } else
5849 {
5850 // handle 6+ arg version, POLY2D(xy-list)
5851 if ( dRetTypes.GetLength() & 1 )
5852 {
5853 m_sParserError.SetSprintf ( "bad %s() argument count, must be even", sFuncName );
5854 return -1;
5855 }
5856 ARRAY_FOREACH ( i, dRetTypes )
5857 if ( !IsNumeric ( dRetTypes[i] ) )
5858 {
5859 m_sParserError.SetSprintf ( "%s() argument %d must be numeric", sFuncName, 1+i );
5860 return -1;
5861 }
5862 }
5863 }
5864
5865 // check that BM25F args are float, float [, {file_name=weight}]
5866 if ( eFunc==FUNC_BM25F )
5867 {
5868 if ( dRetTypes.GetLength()>3 )
5869 {
5870 m_sParserError.SetSprintf ( "%s() called with %d args, at most 3 args expected", sFuncName, dRetTypes.GetLength() );
5871 return -1;
5872 }
5873
5874 if ( dRetTypes[0]!=SPH_ATTR_FLOAT || dRetTypes[1]!=SPH_ATTR_FLOAT )
5875 {
5876 m_sParserError.SetSprintf ( "%s() arguments 1,2 must be numeric", sFuncName );
5877 return -1;
5878 }
5879
5880 if ( dRetTypes.GetLength()==3 && dRetTypes[2]!=SPH_ATTR_MAPARG )
5881 {
5882 m_sParserError.SetSprintf ( "%s() argument 3 must be map", sFuncName );
5883 return -1;
5884 }
5885 }
5886
5887 // check GEODIST args count, and that optional arg 5 is a map argument
5888 if ( eFunc==FUNC_GEODIST )
5889 {
5890 if ( dRetTypes.GetLength()>5 )
5891 {
5892 m_sParserError.SetSprintf ( "%s() called with %d args, at most 5 args expected", sFuncName, dRetTypes.GetLength() );
5893 return -1;
5894 }
5895
5896 if ( dRetTypes.GetLength()==5 && dRetTypes[4]!=SPH_ATTR_MAPARG )
5897 {
5898 m_sParserError.SetSprintf ( "%s() argument 5 must be map", sFuncName );
5899 return -1;
5900 }
5901 }
5902
5903 // check REMAP(expr, expr, (constlist), (constlist)) args
5904 if ( eFunc==FUNC_REMAP )
5905 {
5906 if ( m_dNodes [ iFirst ].m_iToken==TOK_IDENT )
5907 {
5908 m_sParserError.SetSprintf ( "%s() incorrect first argument (not integer?)", sFuncName );
5909 return 1;
5910 }
5911 if ( m_dNodes [ iSecond ].m_iToken==TOK_IDENT )
5912 {
5913 m_sParserError.SetSprintf ( "%s() incorrect second argument (not integer/float?)", sFuncName );
5914 return 1;
5915 }
5916
5917 ESphAttr eFirstRet = m_dNodes [ iFirst ].m_eRetType;
5918 ESphAttr eSecondRet = m_dNodes [ iSecond ].m_eRetType;
5919 if ( eFirstRet!=SPH_ATTR_INTEGER && eFirstRet!=SPH_ATTR_BIGINT )
5920 {
5921 m_sParserError.SetSprintf ( "%s() first argument should result in integer value", sFuncName );
5922 return -1;
5923 }
5924 if ( eSecondRet!=SPH_ATTR_INTEGER && eSecondRet!=SPH_ATTR_BIGINT && eSecondRet!=SPH_ATTR_FLOAT )
5925 {
5926 m_sParserError.SetSprintf ( "%s() second argument should result in integer or float value", sFuncName );
5927 return -1;
5928 }
5929
5930 ConstList_c & tThirdList = *m_dNodes [ iThird ].m_pConsts;
5931 ConstList_c & tFourthList = *m_dNodes [ iFourth ].m_pConsts;
5932 if ( tThirdList.m_dInts.GetLength()==0 )
5933 {
5934 m_sParserError.SetSprintf ( "%s() first constlist should consist of integer values", sFuncName );
5935 return -1;
5936 }
5937 if ( tThirdList.m_dInts.GetLength()!=tFourthList.m_dInts.GetLength() &&
5938 tThirdList.m_dInts.GetLength()!=tFourthList.m_dFloats.GetLength() )
5939 {
5940 m_sParserError.SetSprintf ( "%s() both constlists should have the same length", sFuncName );
5941 return -1;
5942 }
5943
5944 if ( eSecondRet==SPH_ATTR_FLOAT && tFourthList.m_dFloats.GetLength()==0 )
5945 {
5946 m_sParserError.SetSprintf ( "%s() second argument results in float value and thus fourth argument should be a list of floats", sFuncName );
5947 return -1;
5948 }
5949 if ( eSecondRet!=SPH_ATTR_FLOAT && tFourthList.m_dInts.GetLength()==0 )
5950 {
5951 m_sParserError.SetSprintf ( "%s() second argument results in integer value and thus fourth argument should be a list of integers", sFuncName );
5952 return -1;
5953 }
5954 }
5955
5956 // do add
5957 ExprNode_t & tNode = m_dNodes.Add ();
5958 tNode.m_iToken = TOK_FUNC;
5959 tNode.m_iFunc = iFunc;
5960 tNode.m_iLeft = iFirst;
5961 tNode.m_iRight = iSecond;
5962 tNode.m_eArgType = ( iFirst>=0 ) ? m_dNodes [ iFirst ].m_eRetType : SPH_ATTR_INTEGER;
5963 tNode.m_eRetType = g_dFuncs [ iFunc ].m_eRet;
5964
5965 // fixup return type in a few special cases
5966 if ( eFunc==FUNC_MIN || eFunc==FUNC_MAX || eFunc==FUNC_MADD || eFunc==FUNC_MUL3 || eFunc==FUNC_ABS || eFunc==FUNC_IDIV )
5967 tNode.m_eRetType = tNode.m_eArgType;
5968
5969 if ( eFunc==FUNC_EXIST )
5970 {
5971 int iExistRight = m_dNodes [ iFirst ].m_iRight;
5972 ESphAttr eType = m_dNodes [ iExistRight ].m_eRetType;
5973 tNode.m_eArgType = eType;
5974 tNode.m_eRetType = eType;
5975 }
5976
5977 if ( eFunc==FUNC_BIGINT && tNode.m_eRetType==SPH_ATTR_FLOAT )
5978 tNode.m_eRetType = SPH_ATTR_FLOAT; // enforce if we can; FIXME! silently ignores BIGINT() on floats; should warn or raise an error
5979
5980 if ( eFunc==FUNC_IF || eFunc==FUNC_BITDOT )
5981 tNode.m_eRetType = GetWidestRet ( iFirst, iSecond );
5982
5983 // fixup MVA return type according to the leftmost argument
5984 if ( eFunc==FUNC_GREATEST || eFunc==FUNC_LEAST )
5985 {
5986 int iLeftmost = iFirst;
5987 while ( m_dNodes [ iLeftmost ].m_iToken==',' )
5988 iLeftmost = m_dNodes [ iLeftmost ].m_iLeft;
5989 ESphAttr eArg = m_dNodes [ iLeftmost ].m_eRetType;
5990 if ( eArg==SPH_ATTR_INT64SET )
5991 tNode.m_eRetType = SPH_ATTR_BIGINT;
5992 if ( eArg==SPH_ATTR_UINT32SET )
5993 tNode.m_eRetType = SPH_ATTR_INTEGER;
5994 }
5995
5996 if ( eFunc==FUNC_REMAP )
5997 {
5998 // function return type depends on second expression
5999 tNode.m_eRetType = m_dNodes [ iSecond ].m_eRetType;
6000 }
6001
6002 // all ok
6003 assert ( tNode.m_eRetType!=SPH_ATTR_NONE );
6004 return m_dNodes.GetLength()-1;
6005 }
6006
AddNodeUdf(int iCall,int iArg)6007 int ExprParser_t::AddNodeUdf ( int iCall, int iArg )
6008 {
6009 UdfCall_t * pCall = m_dUdfCalls[iCall];
6010 SPH_UDF_INIT & tInit = pCall->m_tInit;
6011 SPH_UDF_ARGS & tArgs = pCall->m_tArgs;
6012
6013 // initialize UDF right here, at AST creation stage
6014 // just because it's easy to gather arg types here
6015 if ( iArg>=0 )
6016 {
6017 // gather arg types
6018 CSphVector<DWORD> dArgTypes;
6019
6020 int iCur = iArg;
6021 while ( iCur>=0 )
6022 {
6023 if ( m_dNodes[iCur].m_iToken!=',' )
6024 {
6025 const ExprNode_t & tNode = m_dNodes[iCur];
6026 if ( tNode.m_iToken==TOK_FUNC && ( tNode.m_iFunc==FUNC_PACKEDFACTORS || tNode.m_iFunc==FUNC_RANKFACTORS || tNode.m_iFunc==FUNC_FACTORS ) )
6027 pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
6028 dArgTypes.Add ( tNode.m_eRetType );
6029 break;
6030 }
6031
6032 int iRight = m_dNodes[iCur].m_iRight;
6033 if ( iRight>=0 )
6034 {
6035 const ExprNode_t & tNode = m_dNodes[iRight];
6036 assert ( tNode.m_iToken!=',' );
6037 if ( tNode.m_iToken==TOK_FUNC && ( tNode.m_iFunc==FUNC_PACKEDFACTORS || tNode.m_iFunc==FUNC_RANKFACTORS || tNode.m_iFunc==FUNC_FACTORS) )
6038 pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
6039 dArgTypes.Add ( tNode.m_eRetType );
6040 }
6041
6042 iCur = m_dNodes[iCur].m_iLeft;
6043 }
6044
6045 assert ( dArgTypes.GetLength() );
6046 tArgs.arg_count = dArgTypes.GetLength();
6047 tArgs.arg_types = new sphinx_udf_argtype [ tArgs.arg_count ];
6048
6049 // we gathered internal type ids in right-to-left order
6050 // reverse and remap
6051 // FIXME! eliminate remap, maybe?
6052 ARRAY_FOREACH ( i, dArgTypes )
6053 {
6054 sphinx_udf_argtype & eRes = tArgs.arg_types [ tArgs.arg_count-1-i ];
6055 switch ( dArgTypes[i] )
6056 {
6057 case SPH_ATTR_INTEGER:
6058 case SPH_ATTR_TIMESTAMP:
6059 case SPH_ATTR_BOOL:
6060 eRes = SPH_UDF_TYPE_UINT32;
6061 break;
6062 case SPH_ATTR_FLOAT:
6063 eRes = SPH_UDF_TYPE_FLOAT;
6064 break;
6065 case SPH_ATTR_BIGINT:
6066 eRes = SPH_UDF_TYPE_INT64;
6067 break;
6068 case SPH_ATTR_STRING:
6069 eRes = SPH_UDF_TYPE_STRING;
6070 break;
6071 case SPH_ATTR_UINT32SET:
6072 eRes = SPH_UDF_TYPE_UINT32SET;
6073 break;
6074 case SPH_ATTR_INT64SET:
6075 eRes = SPH_UDF_TYPE_UINT64SET;
6076 break;
6077 case SPH_ATTR_FACTORS:
6078 eRes = SPH_UDF_TYPE_FACTORS;
6079 break;
6080 default:
6081 m_sParserError.SetSprintf ( "internal error: unmapped UDF argument type (arg=%d, type=%d)", i, dArgTypes[i] );
6082 return -1;
6083 }
6084 }
6085
6086 ARRAY_FOREACH ( i, pCall->m_dArgs2Free )
6087 pCall->m_dArgs2Free[i] = tArgs.arg_count - 1 - pCall->m_dArgs2Free[i];
6088 }
6089
6090 // init
6091 if ( pCall->m_pUdf->m_fnInit )
6092 {
6093 char sError [ SPH_UDF_ERROR_LEN ];
6094 if ( pCall->m_pUdf->m_fnInit ( &tInit, &tArgs, sError ) )
6095 {
6096 m_sParserError = sError;
6097 return -1;
6098 }
6099 }
6100
6101 // do add
6102 ExprNode_t & tNode = m_dNodes.Add ();
6103 tNode.m_iToken = TOK_UDF;
6104 tNode.m_iFunc = iCall;
6105 tNode.m_iLeft = iArg;
6106 tNode.m_iRight = -1;
6107
6108 // deduce type
6109 tNode.m_eArgType = ( iArg>=0 ) ? m_dNodes[iArg].m_eRetType : SPH_ATTR_INTEGER;
6110 tNode.m_eRetType = pCall->m_pUdf->m_eRetType;
6111 return m_dNodes.GetLength()-1;
6112 }
6113
AddNodePF(int iFunc,int iArg)6114 int ExprParser_t::AddNodePF ( int iFunc, int iArg )
6115 {
6116 assert ( iFunc>=0 && iFunc< int ( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0]) ) );
6117 const char * sFuncName = g_dFuncs [ iFunc ].m_sName;
6118
6119 CSphVector<ESphAttr> dRetTypes;
6120 GatherArgRetTypes ( iArg, dRetTypes );
6121
6122 assert ( dRetTypes.GetLength()==0 || dRetTypes.GetLength()==1 );
6123
6124 if ( dRetTypes.GetLength()==1 && dRetTypes[0]!=SPH_ATTR_MAPARG )
6125 {
6126 m_sParserError.SetSprintf ( "%s() argument must be a map", sFuncName );
6127 return -1;
6128 }
6129
6130 ExprNode_t & tNode = m_dNodes.Add ();
6131 tNode.m_iToken = TOK_FUNC;
6132 tNode.m_iFunc = iFunc;
6133 tNode.m_iLeft = iArg;
6134 tNode.m_iRight = -1;
6135 tNode.m_eArgType = SPH_ATTR_MAPARG;
6136 tNode.m_eRetType = g_dFuncs[iFunc].m_eRet;
6137
6138 return m_dNodes.GetLength()-1;
6139 }
6140
AddNodeConstlist(int64_t iValue)6141 int ExprParser_t::AddNodeConstlist ( int64_t iValue )
6142 {
6143 ExprNode_t & tNode = m_dNodes.Add();
6144 tNode.m_iToken = TOK_CONST_LIST;
6145 tNode.m_pConsts = new ConstList_c();
6146 tNode.m_pConsts->Add ( iValue );
6147 tNode.m_pConsts->m_sExpr = m_sExpr;
6148 return m_dNodes.GetLength()-1;
6149 }
6150
AddNodeConstlist(float iValue)6151 int ExprParser_t::AddNodeConstlist ( float iValue )
6152 {
6153 ExprNode_t & tNode = m_dNodes.Add();
6154 tNode.m_iToken = TOK_CONST_LIST;
6155 tNode.m_pConsts = new ConstList_c();
6156 tNode.m_pConsts->Add ( iValue );
6157 return m_dNodes.GetLength()-1;
6158 }
6159
AppendToConstlist(int iNode,int64_t iValue)6160 void ExprParser_t::AppendToConstlist ( int iNode, int64_t iValue )
6161 {
6162 m_dNodes[iNode].m_pConsts->Add ( iValue );
6163 }
6164
AppendToConstlist(int iNode,float iValue)6165 void ExprParser_t::AppendToConstlist ( int iNode, float iValue )
6166 {
6167 m_dNodes[iNode].m_pConsts->Add ( iValue );
6168 }
6169
AddNodeUservar(int iUservar)6170 int ExprParser_t::AddNodeUservar ( int iUservar )
6171 {
6172 ExprNode_t & tNode = m_dNodes.Add();
6173 tNode.m_iToken = TOK_USERVAR;
6174 tNode.m_iConst = iUservar;
6175 return m_dNodes.GetLength()-1;
6176 }
6177
AddNodeHookIdent(int iID)6178 int ExprParser_t::AddNodeHookIdent ( int iID )
6179 {
6180 ExprNode_t & tNode = m_dNodes.Add();
6181 tNode.m_iToken = TOK_HOOK_IDENT;
6182 tNode.m_iFunc = iID;
6183 tNode.m_eRetType = m_pHook->GetIdentType ( iID );
6184 return m_dNodes.GetLength()-1;
6185 }
6186
AddNodeHookFunc(int iID,int iLeft)6187 int ExprParser_t::AddNodeHookFunc ( int iID, int iLeft )
6188 {
6189 CSphVector<ESphAttr> dArgTypes;
6190 GatherArgRetTypes ( iLeft, dArgTypes );
6191
6192 ESphAttr eRet = m_pHook->GetReturnType ( iID, dArgTypes, CheckForConstSet ( iLeft, 0 ), m_sParserError );
6193 if ( eRet==SPH_ATTR_NONE )
6194 return -1;
6195
6196 ExprNode_t & tNode = m_dNodes.Add();
6197 tNode.m_iToken = TOK_HOOK_FUNC;
6198 tNode.m_iFunc = iID;
6199 tNode.m_iLeft = iLeft;
6200 tNode.m_iRight = -1;
6201
6202 // deduce type
6203 tNode.m_eArgType = ( iLeft>=0 ) ? m_dNodes[iLeft].m_eRetType : SPH_ATTR_INTEGER;
6204 tNode.m_eRetType = eRet;
6205
6206 return m_dNodes.GetLength()-1;
6207 }
6208
AddNodeMapArg(const char * sKey,const char * sValue,int64_t iValue)6209 int ExprParser_t::AddNodeMapArg ( const char * sKey, const char * sValue, int64_t iValue )
6210 {
6211 ExprNode_t & tNode = m_dNodes.Add();
6212 tNode.m_iToken = TOK_MAP_ARG;
6213 tNode.m_pMapArg = new MapArg_c();
6214 tNode.m_pMapArg->Add ( sKey, sValue, iValue );
6215 tNode.m_eRetType = SPH_ATTR_MAPARG;
6216 return m_dNodes.GetLength()-1;
6217 }
6218
AppendToMapArg(int iNode,const char * sKey,const char * sValue,int64_t iValue)6219 void ExprParser_t::AppendToMapArg ( int iNode, const char * sKey, const char * sValue, int64_t iValue )
6220 {
6221 m_dNodes[iNode].m_pMapArg->Add ( sKey, sValue, iValue );
6222 }
6223
Attr2Ident(uint64_t uAttrLoc)6224 const char * ExprParser_t::Attr2Ident ( uint64_t uAttrLoc )
6225 {
6226 ExprNode_t tAttr;
6227 sphUnpackAttrLocator ( uAttrLoc, &tAttr );
6228
6229 CSphString sIdent;
6230 sIdent = m_pSchema->GetAttr ( tAttr.m_iLocator ).m_sName;
6231 m_dIdents.Add ( sIdent.Leak() );
6232 return m_dIdents.Last();
6233 }
6234
6235
AddNodeJsonField(uint64_t uAttrLocator,int iLeft)6236 int ExprParser_t::AddNodeJsonField ( uint64_t uAttrLocator, int iLeft )
6237 {
6238 int iNode = AddNodeAttr ( TOK_ATTR_JSON, uAttrLocator );
6239 m_dNodes[iNode].m_iLeft = iLeft;
6240 return m_dNodes.GetLength()-1;
6241 }
6242
6243
AddNodeJsonSubkey(int64_t iValue)6244 int ExprParser_t::AddNodeJsonSubkey ( int64_t iValue )
6245 {
6246 ExprNode_t & tNode = m_dNodes.Add ();
6247 tNode.m_iToken = TOK_SUBKEY;
6248 tNode.m_eRetType = SPH_ATTR_STRING;
6249 tNode.m_iConst = iValue;
6250 return m_dNodes.GetLength()-1;
6251 }
6252
6253
AddNodeDotNumber(int64_t iValue)6254 int ExprParser_t::AddNodeDotNumber ( int64_t iValue )
6255 {
6256 ExprNode_t & tNode = m_dNodes.Add ();
6257 tNode.m_iToken = TOK_CONST_FLOAT;
6258 tNode.m_eRetType = SPH_ATTR_FLOAT;
6259 const char * pCur = m_sExpr + (int)( iValue>>32 );
6260 tNode.m_fConst = (float) strtod ( pCur-1, NULL );
6261 return m_dNodes.GetLength()-1;
6262 }
6263
6264
AddNodeIdent(const char * sKey,int iLeft)6265 int ExprParser_t::AddNodeIdent ( const char * sKey, int iLeft )
6266 {
6267 ExprNode_t & tNode = m_dNodes.Add ();
6268 tNode.m_sIdent = sKey;
6269 tNode.m_iLeft = iLeft;
6270 tNode.m_iToken = TOK_IDENT;
6271 tNode.m_eRetType = SPH_ATTR_JSON_FIELD;
6272 return m_dNodes.GetLength()-1;
6273 }
6274
6275 //////////////////////////////////////////////////////////////////////////
6276
6277 // performs simple semantic analysis
6278 // checks operand types for some arithmetic operators
6279 struct TypeCheck_fn
6280 {
6281 CSphString m_sError;
6282
EnterTypeCheck_fn6283 void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & dNodes )
6284 {
6285 if ( !m_sError.IsEmpty() )
6286 return;
6287
6288 bool bNumberOp = tNode.m_iToken=='+' || tNode.m_iToken=='-' || tNode.m_iToken=='*' || tNode.m_iToken=='/';
6289 if ( bNumberOp )
6290 {
6291 bool bLeftNumeric = tNode.m_iLeft==-1 ? false : IsNumericNode ( dNodes[tNode.m_iLeft] );
6292 bool bRightNumeric = tNode.m_iRight==-1 ? false : IsNumericNode ( dNodes[tNode.m_iRight] );
6293
6294 // if json vs numeric then let it pass (for the autoconversion)
6295 if ( ( bLeftNumeric && dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_JSON_FIELD )
6296 || ( bRightNumeric && dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD ) )
6297 return;
6298
6299 if ( !bLeftNumeric || !bRightNumeric )
6300 {
6301 m_sError = "numeric operation applied to non-numeric operands";
6302 return;
6303 }
6304 }
6305
6306 if ( tNode.m_iToken==TOK_EQ )
6307 {
6308 // string equal must work with string columns only
6309 ESphAttr eLeftRet = tNode.m_iLeft==-1 ? SPH_ATTR_NONE : dNodes[tNode.m_iLeft].m_eRetType;
6310 ESphAttr eRightRet = tNode.m_iRight==-1 ? SPH_ATTR_NONE : dNodes[tNode.m_iRight].m_eRetType;
6311 bool bLeftStr = ( eLeftRet==SPH_ATTR_STRING || eLeftRet==SPH_ATTR_STRINGPTR || eLeftRet==SPH_ATTR_JSON_FIELD );
6312 bool bRightStr = ( eRightRet==SPH_ATTR_STRING || eRightRet==SPH_ATTR_STRINGPTR || eRightRet==SPH_ATTR_JSON_FIELD );
6313 if ( bLeftStr!=bRightStr && eLeftRet!=SPH_ATTR_JSON_FIELD && eRightRet!=SPH_ATTR_JSON_FIELD )
6314 {
6315 m_sError = "equal operation applied to part string operands";
6316 return;
6317 }
6318 }
6319 }
6320
ExitTypeCheck_fn6321 void Exit ( const ExprNode_t & )
6322 {}
6323
IsNumericNodeTypeCheck_fn6324 bool IsNumericNode ( const ExprNode_t & tNode )
6325 {
6326 return tNode.m_eRetType==SPH_ATTR_INTEGER || tNode.m_eRetType==SPH_ATTR_BOOL || tNode.m_eRetType==SPH_ATTR_FLOAT ||
6327 tNode.m_eRetType==SPH_ATTR_BIGINT || tNode.m_eRetType==SPH_ATTR_TOKENCOUNT || tNode.m_eRetType==SPH_ATTR_TIMESTAMP;
6328 }
6329 };
6330
6331
6332 // checks whether we have a WEIGHT() in expression
6333 struct WeightCheck_fn
6334 {
6335 bool * m_pRes;
6336
WeightCheck_fnWeightCheck_fn6337 explicit WeightCheck_fn ( bool * pRes )
6338 : m_pRes ( pRes )
6339 {
6340 assert ( m_pRes );
6341 *m_pRes = false;
6342 }
6343
EnterWeightCheck_fn6344 void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & )
6345 {
6346 if ( tNode.m_iToken==TOK_WEIGHT )
6347 *m_pRes = true;
6348 }
6349
ExitWeightCheck_fn6350 void Exit ( const ExprNode_t & )
6351 {}
6352 };
6353
6354 // checks whether expression has functions defined not in this file like
6355 // searchd-level function or ranker-level functions
6356 struct HookCheck_fn
6357 {
6358 ISphExprHook * m_pHook;
6359
HookCheck_fnHookCheck_fn6360 explicit HookCheck_fn ( ISphExprHook * pHook )
6361 : m_pHook ( pHook )
6362 {}
6363
EnterHookCheck_fn6364 void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & )
6365 {
6366 if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
6367 m_pHook->CheckEnter ( tNode.m_iFunc );
6368 }
6369
ExitHookCheck_fn6370 void Exit ( const ExprNode_t & tNode )
6371 {
6372 if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
6373 m_pHook->CheckExit ( tNode.m_iFunc );
6374 }
6375 };
6376
6377
Parse(const char * sExpr,const ISphSchema & tSchema,ESphAttr * pAttrType,bool * pUsesWeight,CSphString & sError)6378 ISphExpr * ExprParser_t::Parse ( const char * sExpr, const ISphSchema & tSchema,
6379 ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError )
6380 {
6381 m_sLexerError = "";
6382 m_sParserError = "";
6383 m_sCreateError = "";
6384
6385 // setup lexer
6386 m_sExpr = sExpr;
6387 m_pCur = sExpr;
6388 m_pSchema = &tSchema;
6389
6390 // setup constant functions
6391 m_iConstNow = (int) time ( NULL );
6392
6393 // build abstract syntax tree
6394 m_iParsed = -1;
6395 yyparse ( this );
6396
6397 // handle errors
6398 if ( m_iParsed<0 || !m_sLexerError.IsEmpty() || !m_sParserError.IsEmpty() )
6399 {
6400 sError = !m_sLexerError.IsEmpty() ? m_sLexerError : m_sParserError;
6401 if ( sError.IsEmpty() ) sError = "general parsing error";
6402 return NULL;
6403 }
6404
6405 // deduce return type
6406 ESphAttr eAttrType = m_dNodes[m_iParsed].m_eRetType;
6407
6408 // Check expression stack to fit for mutual recursive function calls.
6409 // This check is an approximation, because different compilers with
6410 // different settings produce code which requires different stack size.
6411 if ( m_dNodes.GetLength()>100 )
6412 {
6413 CSphVector<int> dNodes;
6414 dNodes.Reserve ( m_dNodes.GetLength()/2 );
6415 int iMaxHeight = 1;
6416 int iHeight = 1;
6417 dNodes.Add ( m_iParsed );
6418 while ( dNodes.GetLength() )
6419 {
6420 const ExprNode_t & tExpr = m_dNodes[dNodes.Pop()];
6421 iHeight += ( tExpr.m_iLeft>=0 || tExpr.m_iRight>=0 ? 1 : -1 );
6422 iMaxHeight = Max ( iMaxHeight, iHeight );
6423 if ( tExpr.m_iRight>=0 )
6424 dNodes.Add ( tExpr.m_iRight );
6425 if ( tExpr.m_iLeft>=0 )
6426 dNodes.Add ( tExpr.m_iLeft );
6427 }
6428
6429 #define SPH_EXPRNODE_STACK_SIZE 160
6430 int64_t iExprStack = sphGetStackUsed() + iMaxHeight*SPH_EXPRNODE_STACK_SIZE;
6431 if ( g_iThreadStackSize<=iExprStack )
6432 {
6433 sError.SetSprintf ( "query too complex, not enough stack (thread_stack=%dK or higher required)",
6434 (int)( ( iExprStack + 1024 - ( iExprStack%1024 ) ) / 1024 ) );
6435 return NULL;
6436 }
6437 }
6438
6439 // perform optimizations (tree transformations)
6440 Optimize ( m_iParsed );
6441 #if 0
6442 Dump ( m_iParsed );
6443 fflush ( stdout );
6444 #endif
6445
6446 // simple semantic analysis
6447 TypeCheck_fn tTypeChecker;
6448 WalkTree ( m_iParsed, tTypeChecker );
6449 if ( !tTypeChecker.m_sError.IsEmpty() )
6450 {
6451 sError.Swap ( tTypeChecker.m_sError );
6452 return NULL;
6453 }
6454
6455 // create evaluator
6456 ISphExpr * pRes = CreateTree ( m_iParsed );
6457 if ( !m_sCreateError.IsEmpty() )
6458 {
6459 sError = m_sCreateError;
6460 SafeRelease ( pRes );
6461 } else if ( !pRes )
6462 {
6463 sError.SetSprintf ( "empty expression" );
6464 }
6465
6466 if ( pAttrType )
6467 *pAttrType = eAttrType;
6468
6469 if ( pUsesWeight )
6470 {
6471 WeightCheck_fn tWeightFunctor ( pUsesWeight );
6472 WalkTree ( m_iParsed, tWeightFunctor );
6473 }
6474
6475 if ( m_pHook )
6476 {
6477 HookCheck_fn tHookFunctor ( m_pHook );
6478 WalkTree ( m_iParsed, tHookFunctor );
6479 }
6480
6481 return pRes;
6482 }
6483
6484 //////////////////////////////////////////////////////////////////////////
6485 // PUBLIC STUFF
6486 //////////////////////////////////////////////////////////////////////////
6487
6488 /// parser entry point
sphExprParse(const char * sExpr,const ISphSchema & tSchema,ESphAttr * pAttrType,bool * pUsesWeight,CSphString & sError,CSphQueryProfile * pProfiler,ESphCollation eCollation,ISphExprHook * pHook,bool * pZonespanlist,DWORD * pPackedFactorsFlags,ESphEvalStage * pEvalStage)6489 ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight,
6490 CSphString & sError, CSphQueryProfile * pProfiler, ESphCollation eCollation, ISphExprHook * pHook, bool * pZonespanlist, DWORD * pPackedFactorsFlags, ESphEvalStage * pEvalStage )
6491 {
6492 // parse into opcodes
6493 ExprParser_t tParser ( pHook, pProfiler, eCollation );
6494 ISphExpr * pRes = tParser.Parse ( sExpr, tSchema, pAttrType, pUsesWeight, sError );
6495 if ( pZonespanlist )
6496 *pZonespanlist = tParser.m_bHasZonespanlist;
6497 if ( pEvalStage )
6498 *pEvalStage = tParser.m_eEvalStage;
6499 if ( pPackedFactorsFlags )
6500 *pPackedFactorsFlags = tParser.m_uPackedFactorFlags;
6501 return pRes;
6502 }
6503
6504 //
6505 // $Id$
6506 //
6507