1 //
2 // $Id$
3 //
4 
5 //
6 // Copyright (c) 2001-2016, Andrew Aksyonoff
7 // Copyright (c) 2008-2016, Sphinx Technologies Inc
8 // All rights reserved
9 //
10 // This program is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License. You should have
12 // received a copy of the GPL license along with this program; if you
13 // did not, you can find it at http://www.gnu.org/
14 //
15 
16 #ifndef _sphinxexpr_
17 #define _sphinxexpr_
18 
19 #include "sphinxstd.h"
20 
21 /// forward decls
22 class CSphMatch;
23 class ISphSchema;
24 class CSphSchema;
25 struct CSphString;
26 struct CSphColumnInfo;
27 
28 /// known attribute types
29 enum ESphAttr
30 {
31 	// these types are full types
32 	// their typecodes are saved in the index schema, and thus,
33 	// TYPECODES MUST NOT CHANGE ONCE INTRODUCED
34 	SPH_ATTR_NONE		= 0,			///< not an attribute at all
35 	SPH_ATTR_INTEGER	= 1,			///< unsigned 32-bit integer
36 	SPH_ATTR_TIMESTAMP	= 2,			///< this attr is a timestamp
37 	// there was SPH_ATTR_ORDINAL=3 once
38 	SPH_ATTR_BOOL		= 4,			///< this attr is a boolean bit field
39 	SPH_ATTR_FLOAT		= 5,			///< floating point number (IEEE 32-bit)
40 	SPH_ATTR_BIGINT		= 6,			///< signed 64-bit integer
41 	SPH_ATTR_STRING		= 7,			///< string (binary; in-memory)
42 	// there was SPH_ATTR_WORDCOUNT=8 once
43 	SPH_ATTR_POLY2D		= 9,			///< vector of floats, 2D polygon (see POLY2D)
44 	SPH_ATTR_STRINGPTR	= 10,			///< string (binary, in-memory, stored as pointer to the zero-terminated string)
45 	SPH_ATTR_TOKENCOUNT	= 11,			///< field token count, 32-bit integer
46 	SPH_ATTR_JSON		= 12,			///< JSON subset; converted, packed, and stored as string
47 
48 	SPH_ATTR_UINT32SET	= 0x40000001UL,	///< MVA, set of unsigned 32-bit integers
49 	SPH_ATTR_INT64SET	= 0x40000002UL,	///< MVA, set of signed 64-bit integers
50 
51 	// these types are runtime only
52 	// used as intermediate types in the expression engine
53 	SPH_ATTR_MAPARG		= 1000,
54 	SPH_ATTR_FACTORS	= 1001,			///< packed search factors (binary, in-memory, pooled)
55 	SPH_ATTR_JSON_FIELD	= 1002,			///< points to particular field in JSON column subset
56 	SPH_ATTR_FACTORS_JSON	= 1003		///< packed search factors (binary, in-memory, pooled, provided to client json encoded)
57 };
58 
59 /// column evaluation stage
60 enum ESphEvalStage
61 {
62 	SPH_EVAL_STATIC = 0,		///< static data, no real evaluation needed
63 	SPH_EVAL_OVERRIDE,			///< static but possibly overridden
64 	SPH_EVAL_PREFILTER,			///< expression needed for candidate matches filtering
65 	SPH_EVAL_PRESORT,			///< expression needed for final matches sorting
66 	SPH_EVAL_SORTER,			///< expression evaluated by sorter object
67 	SPH_EVAL_FINAL,				///< expression not (!) used in filters/sorting; can be postponed until final result set cooking
68 	SPH_EVAL_POSTLIMIT			///< expression needs to be postponed until we apply all the LIMIT clauses (say, too expensive)
69 };
70 
71 /// expression tree wide commands
72 /// FIXME? maybe merge with ExtraData_e?
73 enum ESphExprCommand
74 {
75 	SPH_EXPR_SET_MVA_POOL,
76 	SPH_EXPR_SET_STRING_POOL,
77 	SPH_EXPR_SET_EXTRA_DATA,
78 	SPH_EXPR_GET_DEPENDENT_COLS, ///< used to determine proper evaluating stage
79 	SPH_EXPR_GET_UDF
80 };
81 
82 /// expression evaluator
83 /// can always be evaluated in floats using Eval()
84 /// can sometimes be evaluated in integers using IntEval(), depending on type as returned from sphExprParse()
85 struct ISphExpr : public ISphRefcounted
86 {
87 public:
88 	/// evaluate this expression for that match
89 	virtual float Eval ( const CSphMatch & tMatch ) const = 0;
90 
91 	/// evaluate this expression for that match, using int math
IntEvalISphExpr92 	virtual int IntEval ( const CSphMatch & tMatch ) const { assert ( 0 ); return (int) Eval ( tMatch ); }
93 
94 	/// evaluate this expression for that match, using int64 math
Int64EvalISphExpr95 	virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { assert ( 0 ); return (int64_t) Eval ( tMatch ); }
96 
97 	/// Evaluate string attr.
98 	/// Note, that sometimes this method returns pointer to a static buffer
99 	/// and sometimes it allocates a new buffer, so aware of memory leaks.
100 	/// IsStringPtr() returns true if this method allocates a new buffer and false otherwise.
StringEvalISphExpr101 	virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const { *ppStr = NULL; return 0; }
102 
103 	/// evaluate MVA attr
MvaEvalISphExpr104 	virtual const DWORD * MvaEval ( const CSphMatch & ) const { assert ( 0 ); return NULL; }
105 
106 	/// evaluate Packed factors
FactorEvalISphExpr107 	virtual const DWORD * FactorEval ( const CSphMatch & ) const { assert ( 0 ); return NULL; }
108 
109 	/// check for arglist subtype
110 	/// FIXME? replace with a single GetType() call?
IsArglistISphExpr111 	virtual bool IsArglist () const { return false; }
112 
113 	/// check for stringptr subtype
114 	/// FIXME? replace with a single GetType() call?
IsStringPtrISphExpr115 	virtual bool IsStringPtr () const { return false; }
116 
117 	/// get Nth arg of an arglist
GetArgISphExpr118 	virtual ISphExpr * GetArg ( int ) const { return NULL; }
119 
120 	/// get the number of args in an arglist
GetNumArgsISphExpr121 	virtual int GetNumArgs() const { return 0; }
122 
123 	/// run a tree wide action (1st arg is an action, 2nd is its parameter)
124 	/// usually sets something into ISphExpr like string pool or gets something from it like dependent columns
CommandISphExpr125 	virtual void Command ( ESphExprCommand, void * ) {}
126 
127 	/// check for const type
IsConstISphExpr128 	virtual bool IsConst () const { return false; }
129 };
130 
131 /// string expression traits
132 /// can never be evaluated in floats or integers, only StringEval() is allowed
133 struct ISphStringExpr : public ISphExpr
134 {
EvalISphStringExpr135 	virtual float Eval ( const CSphMatch & ) const { assert ( 0 && "one just does not simply evaluate a string as float" ); return 0; }
IntEvalISphStringExpr136 	virtual int IntEval ( const CSphMatch & ) const { assert ( 0 && "one just does not simply evaluate a string as int" ); return 0; }
Int64EvalISphStringExpr137 	virtual int64_t Int64Eval ( const CSphMatch & ) const { assert ( 0 && "one just does not simply evaluate a string as bigint" ); return 0; }
138 };
139 
140 /// hook to extend expressions
141 /// lets one to add her own identifier and function handlers
142 struct ISphExprHook
143 {
~ISphExprHookISphExprHook144 	virtual ~ISphExprHook () {}
145 	/// checks for an identifier known to the hook
146 	/// returns -1 on failure, a non-negative OID on success
147 	virtual int IsKnownIdent ( const char * sIdent ) = 0;
148 
149 	/// checks for a valid function call
150 	/// returns -1 on failure, a non-negative OID on success (possibly adjusted)
151 	virtual int IsKnownFunc ( const char * sFunc ) = 0;
152 
153 	/// create node by OID
154 	/// pEvalStage is an optional out-parameter
155 	/// hook may fill it, but that is *not* required
156 	virtual ISphExpr * CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError ) = 0;
157 
158 	/// get identifier return type by OID
159 	virtual ESphAttr GetIdentType ( int iID ) = 0;
160 
161 	/// get function return type by OID and argument types vector
162 	/// must return SPH_ATTR_NONE and fill the message on failure
163 	virtual ESphAttr GetReturnType ( int iID, const CSphVector<ESphAttr> & dArgs, bool bAllConst, CSphString & sError ) = 0;
164 
165 	/// recursive scope check
166 	virtual void CheckEnter ( int iID ) = 0;
167 
168 	/// recursive scope check
169 	virtual void CheckExit ( int iID ) = 0;
170 };
171 
172 
173 /// named int/string variant
174 /// used for named expression function arguments block
175 /// ie. {..} part in, for example, BM25F(1.2, 0.8, {title=3}) call
176 struct CSphNamedVariant
177 {
178 	CSphString		m_sKey;		///< key
179 	CSphString		m_sValue;	///< value for strings, empty for ints
180 	int				m_iValue;	///< value for ints
181 };
182 
183 
184 /// a container used to pass maps of constants/variables around the evaluation tree
185 struct Expr_MapArg_c : public ISphExpr
186 {
187 	CSphVector<CSphNamedVariant> m_dValues;
188 
Expr_MapArg_cExpr_MapArg_c189 	explicit Expr_MapArg_c ( CSphVector<CSphNamedVariant> & dValues )
190 	{
191 		m_dValues.SwapData ( dValues );
192 	}
193 
EvalExpr_MapArg_c194 	virtual float Eval ( const CSphMatch & ) const
195 	{
196 		assert ( 0 && "one just does not simply evaluate a const hash" );
197 		return 0.0f;
198 	}
199 };
200 
201 
202 enum
203 {
204 	SPH_FACTOR_DISABLE		= 0,
205 	SPH_FACTOR_ENABLE		= 1,
206 	SPH_FACTOR_CALC_ATC		= 1 << 1,
207 	SPH_FACTOR_JSON_OUT		= 1 << 2
208 };
209 
210 
211 /// known collations
212 enum ESphCollation
213 {
214 	SPH_COLLATION_LIBC_CI,
215 	SPH_COLLATION_LIBC_CS,
216 	SPH_COLLATION_UTF8_GENERAL_CI,
217 	SPH_COLLATION_BINARY,
218 
219 	SPH_COLLATION_DEFAULT = SPH_COLLATION_LIBC_CI
220 };
221 
222 /// parses given expression, builds evaluator
223 /// returns NULL and fills sError on failure
224 /// returns pointer to evaluator on success
225 /// fills pAttrType with result type (for now, can be SPH_ATTR_SINT or SPH_ATTR_FLOAT)
226 /// fills pUsesWeight with a flag whether match relevance is referenced in expression AST
227 /// fills pEvalStage with a required (!) evaluation stage
228 class CSphQueryProfile;
229 ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight,
230 	CSphString & sError, CSphQueryProfile * pProfiler, ESphCollation eCollation=SPH_COLLATION_DEFAULT, ISphExprHook * pHook=NULL,
231 	bool * pZonespanlist=NULL, DWORD * pPackedFactorsFlags=NULL, ESphEvalStage * pEvalStage=NULL );
232 
233 //////////////////////////////////////////////////////////////////////////
234 
235 /// init tables used by our geodistance functions
236 void GeodistInit();
237 
238 /// haversine sphere distance, radians
239 float GeodistSphereRad ( float lat1, float lon1, float lat2, float lon2 );
240 
241 /// haversine sphere distance, degrees
242 float GeodistSphereDeg ( float lat1, float lon1, float lat2, float lon2 );
243 
244 /// flat ellipsoid distance, degrees
245 float GeodistFlatDeg ( float fLat1, float fLon1, float fLat2, float fLon2 );
246 
247 /// adaptive flat/haversine distance, degrees
248 float GeodistAdaptiveDeg ( float lat1, float lon1, float lat2, float lon2 );
249 
250 /// adaptive flat/haversine distance, radians
251 float GeodistAdaptiveRad ( float lat1, float lon1, float lat2, float lon2 );
252 
253 #endif // _sphinxexpr_
254 
255 //
256 // $Id$
257 //
258