1 // 2 // $Id$ 3 // 4 5 // 6 // Copyright (c) 2001-2016, Andrew Aksyonoff 7 // Copyright (c) 2008-2016, Sphinx Technologies Inc 8 // All rights reserved 9 // 10 // This program is free software; you can redistribute it and/or modify 11 // it under the terms of the GNU General Public License. You should have 12 // received a copy of the GPL license along with this program; if you 13 // did not, you can find it at http://www.gnu.org/ 14 // 15 16 #ifndef _sphinxexpr_ 17 #define _sphinxexpr_ 18 19 #include "sphinxstd.h" 20 21 /// forward decls 22 class CSphMatch; 23 class ISphSchema; 24 class CSphSchema; 25 struct CSphString; 26 struct CSphColumnInfo; 27 28 /// known attribute types 29 enum ESphAttr 30 { 31 // these types are full types 32 // their typecodes are saved in the index schema, and thus, 33 // TYPECODES MUST NOT CHANGE ONCE INTRODUCED 34 SPH_ATTR_NONE = 0, ///< not an attribute at all 35 SPH_ATTR_INTEGER = 1, ///< unsigned 32-bit integer 36 SPH_ATTR_TIMESTAMP = 2, ///< this attr is a timestamp 37 // there was SPH_ATTR_ORDINAL=3 once 38 SPH_ATTR_BOOL = 4, ///< this attr is a boolean bit field 39 SPH_ATTR_FLOAT = 5, ///< floating point number (IEEE 32-bit) 40 SPH_ATTR_BIGINT = 6, ///< signed 64-bit integer 41 SPH_ATTR_STRING = 7, ///< string (binary; in-memory) 42 // there was SPH_ATTR_WORDCOUNT=8 once 43 SPH_ATTR_POLY2D = 9, ///< vector of floats, 2D polygon (see POLY2D) 44 SPH_ATTR_STRINGPTR = 10, ///< string (binary, in-memory, stored as pointer to the zero-terminated string) 45 SPH_ATTR_TOKENCOUNT = 11, ///< field token count, 32-bit integer 46 SPH_ATTR_JSON = 12, ///< JSON subset; converted, packed, and stored as string 47 48 SPH_ATTR_UINT32SET = 0x40000001UL, ///< MVA, set of unsigned 32-bit integers 49 SPH_ATTR_INT64SET = 0x40000002UL, ///< MVA, set of signed 64-bit integers 50 51 // these types are runtime only 52 // used as intermediate types in the expression engine 53 SPH_ATTR_MAPARG = 1000, 54 SPH_ATTR_FACTORS = 1001, ///< packed search factors (binary, in-memory, pooled) 55 SPH_ATTR_JSON_FIELD = 1002, ///< points to particular field in JSON column subset 56 SPH_ATTR_FACTORS_JSON = 1003 ///< packed search factors (binary, in-memory, pooled, provided to client json encoded) 57 }; 58 59 /// column evaluation stage 60 enum ESphEvalStage 61 { 62 SPH_EVAL_STATIC = 0, ///< static data, no real evaluation needed 63 SPH_EVAL_OVERRIDE, ///< static but possibly overridden 64 SPH_EVAL_PREFILTER, ///< expression needed for candidate matches filtering 65 SPH_EVAL_PRESORT, ///< expression needed for final matches sorting 66 SPH_EVAL_SORTER, ///< expression evaluated by sorter object 67 SPH_EVAL_FINAL, ///< expression not (!) used in filters/sorting; can be postponed until final result set cooking 68 SPH_EVAL_POSTLIMIT ///< expression needs to be postponed until we apply all the LIMIT clauses (say, too expensive) 69 }; 70 71 /// expression tree wide commands 72 /// FIXME? maybe merge with ExtraData_e? 73 enum ESphExprCommand 74 { 75 SPH_EXPR_SET_MVA_POOL, 76 SPH_EXPR_SET_STRING_POOL, 77 SPH_EXPR_SET_EXTRA_DATA, 78 SPH_EXPR_GET_DEPENDENT_COLS, ///< used to determine proper evaluating stage 79 SPH_EXPR_GET_UDF 80 }; 81 82 /// expression evaluator 83 /// can always be evaluated in floats using Eval() 84 /// can sometimes be evaluated in integers using IntEval(), depending on type as returned from sphExprParse() 85 struct ISphExpr : public ISphRefcounted 86 { 87 public: 88 /// evaluate this expression for that match 89 virtual float Eval ( const CSphMatch & tMatch ) const = 0; 90 91 /// evaluate this expression for that match, using int math IntEvalISphExpr92 virtual int IntEval ( const CSphMatch & tMatch ) const { assert ( 0 ); return (int) Eval ( tMatch ); } 93 94 /// evaluate this expression for that match, using int64 math Int64EvalISphExpr95 virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { assert ( 0 ); return (int64_t) Eval ( tMatch ); } 96 97 /// Evaluate string attr. 98 /// Note, that sometimes this method returns pointer to a static buffer 99 /// and sometimes it allocates a new buffer, so aware of memory leaks. 100 /// IsStringPtr() returns true if this method allocates a new buffer and false otherwise. StringEvalISphExpr101 virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const { *ppStr = NULL; return 0; } 102 103 /// evaluate MVA attr MvaEvalISphExpr104 virtual const DWORD * MvaEval ( const CSphMatch & ) const { assert ( 0 ); return NULL; } 105 106 /// evaluate Packed factors FactorEvalISphExpr107 virtual const DWORD * FactorEval ( const CSphMatch & ) const { assert ( 0 ); return NULL; } 108 109 /// check for arglist subtype 110 /// FIXME? replace with a single GetType() call? IsArglistISphExpr111 virtual bool IsArglist () const { return false; } 112 113 /// check for stringptr subtype 114 /// FIXME? replace with a single GetType() call? IsStringPtrISphExpr115 virtual bool IsStringPtr () const { return false; } 116 117 /// get Nth arg of an arglist GetArgISphExpr118 virtual ISphExpr * GetArg ( int ) const { return NULL; } 119 120 /// get the number of args in an arglist GetNumArgsISphExpr121 virtual int GetNumArgs() const { return 0; } 122 123 /// run a tree wide action (1st arg is an action, 2nd is its parameter) 124 /// usually sets something into ISphExpr like string pool or gets something from it like dependent columns CommandISphExpr125 virtual void Command ( ESphExprCommand, void * ) {} 126 127 /// check for const type IsConstISphExpr128 virtual bool IsConst () const { return false; } 129 }; 130 131 /// string expression traits 132 /// can never be evaluated in floats or integers, only StringEval() is allowed 133 struct ISphStringExpr : public ISphExpr 134 { EvalISphStringExpr135 virtual float Eval ( const CSphMatch & ) const { assert ( 0 && "one just does not simply evaluate a string as float" ); return 0; } IntEvalISphStringExpr136 virtual int IntEval ( const CSphMatch & ) const { assert ( 0 && "one just does not simply evaluate a string as int" ); return 0; } Int64EvalISphStringExpr137 virtual int64_t Int64Eval ( const CSphMatch & ) const { assert ( 0 && "one just does not simply evaluate a string as bigint" ); return 0; } 138 }; 139 140 /// hook to extend expressions 141 /// lets one to add her own identifier and function handlers 142 struct ISphExprHook 143 { ~ISphExprHookISphExprHook144 virtual ~ISphExprHook () {} 145 /// checks for an identifier known to the hook 146 /// returns -1 on failure, a non-negative OID on success 147 virtual int IsKnownIdent ( const char * sIdent ) = 0; 148 149 /// checks for a valid function call 150 /// returns -1 on failure, a non-negative OID on success (possibly adjusted) 151 virtual int IsKnownFunc ( const char * sFunc ) = 0; 152 153 /// create node by OID 154 /// pEvalStage is an optional out-parameter 155 /// hook may fill it, but that is *not* required 156 virtual ISphExpr * CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError ) = 0; 157 158 /// get identifier return type by OID 159 virtual ESphAttr GetIdentType ( int iID ) = 0; 160 161 /// get function return type by OID and argument types vector 162 /// must return SPH_ATTR_NONE and fill the message on failure 163 virtual ESphAttr GetReturnType ( int iID, const CSphVector<ESphAttr> & dArgs, bool bAllConst, CSphString & sError ) = 0; 164 165 /// recursive scope check 166 virtual void CheckEnter ( int iID ) = 0; 167 168 /// recursive scope check 169 virtual void CheckExit ( int iID ) = 0; 170 }; 171 172 173 /// named int/string variant 174 /// used for named expression function arguments block 175 /// ie. {..} part in, for example, BM25F(1.2, 0.8, {title=3}) call 176 struct CSphNamedVariant 177 { 178 CSphString m_sKey; ///< key 179 CSphString m_sValue; ///< value for strings, empty for ints 180 int m_iValue; ///< value for ints 181 }; 182 183 184 /// a container used to pass maps of constants/variables around the evaluation tree 185 struct Expr_MapArg_c : public ISphExpr 186 { 187 CSphVector<CSphNamedVariant> m_dValues; 188 Expr_MapArg_cExpr_MapArg_c189 explicit Expr_MapArg_c ( CSphVector<CSphNamedVariant> & dValues ) 190 { 191 m_dValues.SwapData ( dValues ); 192 } 193 EvalExpr_MapArg_c194 virtual float Eval ( const CSphMatch & ) const 195 { 196 assert ( 0 && "one just does not simply evaluate a const hash" ); 197 return 0.0f; 198 } 199 }; 200 201 202 enum 203 { 204 SPH_FACTOR_DISABLE = 0, 205 SPH_FACTOR_ENABLE = 1, 206 SPH_FACTOR_CALC_ATC = 1 << 1, 207 SPH_FACTOR_JSON_OUT = 1 << 2 208 }; 209 210 211 /// known collations 212 enum ESphCollation 213 { 214 SPH_COLLATION_LIBC_CI, 215 SPH_COLLATION_LIBC_CS, 216 SPH_COLLATION_UTF8_GENERAL_CI, 217 SPH_COLLATION_BINARY, 218 219 SPH_COLLATION_DEFAULT = SPH_COLLATION_LIBC_CI 220 }; 221 222 /// parses given expression, builds evaluator 223 /// returns NULL and fills sError on failure 224 /// returns pointer to evaluator on success 225 /// fills pAttrType with result type (for now, can be SPH_ATTR_SINT or SPH_ATTR_FLOAT) 226 /// fills pUsesWeight with a flag whether match relevance is referenced in expression AST 227 /// fills pEvalStage with a required (!) evaluation stage 228 class CSphQueryProfile; 229 ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, 230 CSphString & sError, CSphQueryProfile * pProfiler, ESphCollation eCollation=SPH_COLLATION_DEFAULT, ISphExprHook * pHook=NULL, 231 bool * pZonespanlist=NULL, DWORD * pPackedFactorsFlags=NULL, ESphEvalStage * pEvalStage=NULL ); 232 233 ////////////////////////////////////////////////////////////////////////// 234 235 /// init tables used by our geodistance functions 236 void GeodistInit(); 237 238 /// haversine sphere distance, radians 239 float GeodistSphereRad ( float lat1, float lon1, float lat2, float lon2 ); 240 241 /// haversine sphere distance, degrees 242 float GeodistSphereDeg ( float lat1, float lon1, float lat2, float lon2 ); 243 244 /// flat ellipsoid distance, degrees 245 float GeodistFlatDeg ( float fLat1, float fLon1, float fLat2, float fLon2 ); 246 247 /// adaptive flat/haversine distance, degrees 248 float GeodistAdaptiveDeg ( float lat1, float lon1, float lat2, float lon2 ); 249 250 /// adaptive flat/haversine distance, radians 251 float GeodistAdaptiveRad ( float lat1, float lon1, float lat2, float lon2 ); 252 253 #endif // _sphinxexpr_ 254 255 // 256 // $Id$ 257 // 258