1 #ifndef UTIL__QUERY_PARSER_HPP__ 2 #define UTIL__QUERY_PARSER_HPP__ 3 4 /* $Id: query_parse.hpp 575325 2018-11-27 18:22:00Z ucko $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Anatoliy Kuznetsov, Mike DiCuccio, Maxim Didenko 30 * 31 * File Description: Query parser implementation 32 * 33 */ 34 35 /// @file query_parse.hpp 36 /// Query string parsing components 37 38 #include <corelib/ncbi_tree.hpp> 39 #include <corelib/ncbiobj.hpp> 40 41 BEGIN_NCBI_SCOPE 42 43 class CQueryParseTree; 44 45 46 47 /** @addtogroup QParser 48 * 49 * @{ 50 */ 51 52 /// Base class for query node user defined object 53 /// 54 /// User object used to carry field dependent data, metainformation, 55 /// execution time data, etc. It can be a bridge between parser and a query 56 /// execution engine. 57 /// 58 59 class NCBI_XUTIL_EXPORT IQueryParseUserObject : public CObject 60 { 61 public: 62 63 /// Reset user object (for reuse without reallocation) 64 virtual void Reset() = 0; 65 66 /// String value for debuging GetVisibleValue() const67 virtual string GetVisibleValue() const { return ""; }; 68 }; 69 70 71 /// Query node class 72 /// 73 /// Query node describes element of the recursive parsing tree 74 /// for the query language. 75 /// (The tree is then interpreted by the execution machine) 76 /// 77 78 class NCBI_XUTIL_EXPORT CQueryParseNode 79 { 80 public: 81 82 /// Query node type 83 /// 84 enum EType { 85 eNotSet = 0, ///< Produced by the (private) default constructor 86 eIdentifier, ///< Identifier like db.field (Org, Fld12, etc.) 87 eIntConst, ///< Integer const 88 eFloatConst, ///< Floating point const 89 eBoolConst, ///< Boolean (TRUE or FALSE) 90 eString, ///< String ("free text") 91 eFunction, ///< Function 92 93 // Operation codes: 94 eNot, 95 eFieldSearch, 96 eLike, 97 eBetween, 98 eIn, 99 eAnd, 100 eOr, 101 eSub, 102 eXor, 103 eRange, 104 eEQ, 105 eGT, 106 eGE, 107 eLT, 108 eLE, 109 110 // SQL specific components 111 eSelect, 112 eFrom, 113 eWhere, 114 115 eList, 116 117 eMaxType 118 }; 119 120 /// Source location (points to the position in the original src) 121 /// All positions are 0 based 122 /// 123 struct SSrcLoc 124 { 125 unsigned line; ///< Src line number 126 unsigned pos; ///< Position in the src line 127 unsigned length; ///< Token length (optional) 128 SSrcLocCQueryParseNode::SSrcLoc129 SSrcLoc(unsigned src_line = 0, unsigned src_pos = 0, unsigned len = 0) 130 : line(src_line), pos(src_pos), length(len) 131 {} 132 }; 133 134 public: 135 /// Construct the query node 136 /// @param value Node value 137 /// @param orig_text Value as it appears in the original program 138 /// @param isIdent true whe the string is identifier (no quoting) 139 /// 140 CQueryParseNode(const string& value, const string& orig_text, bool isIdent); 141 142 explicit CQueryParseNode(Int8 val, const string& orig_text); 143 explicit CQueryParseNode(bool val, const string& orig_text); 144 explicit CQueryParseNode(double val, const string& orig_text); 145 explicit CQueryParseNode(EType op_type, const string& orig_text); 146 147 /// @name Source reference accessors 148 /// @{ 149 150 /// Set node location in the query text (for error diagnostics) SetLoc(const SSrcLoc & loc)151 void SetLoc(const SSrcLoc& loc) { m_Location = loc; } SetLoc(unsigned line,unsigned pos)152 void SetLoc(unsigned line, unsigned pos) 153 { 154 m_Location.line = line; 155 m_Location.pos = pos; 156 } GetLoc() const157 const SSrcLoc& GetLoc() const { return m_Location; } 158 159 /// @} 160 161 162 /// @name Value accessors 163 /// @{ 164 GetType() const165 EType GetType() const { return m_Type; } 166 const string& GetStrValue() const; 167 const string& GetIdent() const; GetOriginalText() const168 const string& GetOriginalText() const { return m_OrigText; } 169 Int8 GetInt() const; 170 bool GetBool() const; 171 double GetDouble() const; 172 173 int GetIdentIdx() const; GetOrig() const174 const string& GetOrig() const { return m_OrigText; } 175 176 /// @} 177 178 /// TRUE if node was created as explicitly 179 /// FALSE - node was created as a result of a default and the interpreter has 180 /// a degree of freedom in execution IsExplicit() const181 bool IsExplicit() const { return m_Explicit; } SetExplicit(bool expl=true)182 void SetExplicit(bool expl=true) { m_Explicit = expl; } 183 184 /// Check if node is marked with NOT flag (like != ) IsNot() const185 bool IsNot() const { return m_Not; } SetNot(bool n=true)186 void SetNot(bool n=true) { m_Not = n; } 187 188 /// Returns TRUE if node describes logical operation (AND, OR, etc.) IsLogic() const189 bool IsLogic() const 190 { 191 return m_Type == eNot || m_Type == eAnd || m_Type == eOr || 192 m_Type == eSub || m_Type == eXor; 193 } 194 195 /// Returns TRUE if node is value (INT, String, etc.) IsValue() const196 bool IsValue() const 197 { 198 return m_Type == eIdentifier || m_Type == eIntConst || 199 m_Type == eString || m_Type == eFloatConst || 200 m_Type == eBoolConst; 201 } 202 203 /// Elapsed time in seconds Elapsed() const204 double Elapsed() const { return m_Elapsed; } 205 /// Elapsed time in seconds GetElapsed() const206 double GetElapsed() const { return Elapsed(); } 207 208 /// Set node timing SetElapsed(double e)209 void SetElapsed(double e) { m_Elapsed = e; } 210 211 212 /// @name User object operations 213 /// 214 /// Methods to associate application specific data with 215 /// parsing tree node. 216 /// Data should be encapsulated into a user object derived 217 /// from CQueryParseBaseUserObject. 218 /// 219 /// @{ 220 221 /// Get user object GetUserObject() const222 const IQueryParseUserObject* GetUserObject() const 223 { return m_UsrObj.GetPointer(); } GetUserObject()224 IQueryParseUserObject* GetUserObject() 225 { return m_UsrObj.GetPointer(); } 226 227 /// Set user object. Query node takes ownership. 228 void AttachUserObject(IQueryParseUserObject* obj); SetUserObject(IQueryParseUserObject * obj)229 void SetUserObject(IQueryParseUserObject* obj) 230 { AttachUserObject(obj); } 231 232 /// Reset the associated user object 233 /// (see IQueryParseUserObject::Reset()) 234 /// 235 void ResetUserObject(); 236 237 /// @} 238 239 /// Return query node type as a string (for debugging output) 240 string GetNodeTypeAsString() const; 241 static string GetNodeTypeAsString(EType node_type); 242 243 private: 244 // required for use with CTreeNode<> 245 CQueryParseNode(); 246 friend class CTreeNode<CQueryParseNode>; 247 248 private: 249 EType m_Type; 250 union { 251 Int8 m_IntConst; 252 bool m_BoolConst; 253 double m_DoubleConst; 254 }; 255 string m_Value; 256 string m_OrigText; 257 bool m_Explicit; 258 bool m_Not; 259 SSrcLoc m_Location; ///< Reference to original location in query 260 double m_Elapsed; ///< Execution timing 261 262 CRef<IQueryParseUserObject> m_UsrObj; 263 }; 264 265 266 /// Query tree and associated utility methods 267 /// 268 class NCBI_XUTIL_EXPORT CQueryParseTree 269 { 270 public: 271 typedef CTreeNode<CQueryParseNode> TNode; 272 public: 273 /// Contruct the query. Takes the ownership of the clause. 274 explicit CQueryParseTree(TNode *clause=0); 275 virtual ~CQueryParseTree(); 276 277 278 /// Case sensitive parsing 279 /// 280 enum ECase { 281 eCaseSensitiveUpper, ///< Operators must come in upper case (AND) 282 eCaseInsensitive ///< Case insensitive parsing (AnD) 283 }; 284 285 /// Level of tolerance to syntax errors and problems 286 /// 287 enum ESyntaxCheck { 288 eSyntaxCheck, ///< Best possible check for errors 289 eSyntaxRelax ///< Relaxed parsing rules 290 }; 291 292 /// List of keywords recognised as functions 293 typedef vector<string> TFunctionNames; 294 295 /// Query parser front-end function 296 /// 297 /// @param query_str 298 /// Query string subject of parsing 299 /// @param case_sense 300 /// Case sensitivity (AND, AnD, etc.) 301 /// @param syntax_check 302 /// Sensitivity to syntax errors 303 /// @param verbose 304 /// Debug print switch 305 /// @param functions 306 /// List of names recognised as functions 307 /// 308 void Parse(const char* query_str, 309 ECase case_sense = eCaseInsensitive, 310 ESyntaxCheck syntax_check = eSyntaxCheck, 311 bool verbose = false, 312 const TFunctionNames& functions = TFunctionNames(0), 313 unsigned line = 0, 314 unsigned linePos = 0); 315 316 317 /// Replace current query tree with the new one. 318 /// CQueryParseTree takes ownership on the passed argument. 319 /// 320 void SetQueryTree(TNode* qtree); GetQueryTree() const321 const TNode* GetQueryTree() const { return m_Tree.get(); } GetQueryTree()322 TNode* GetQueryTree() { return m_Tree.get(); } 323 324 /// Reset all user objects attached to the parsing tree 325 void ResetUserObjects(); 326 327 328 /// @name Static node creation functions - 329 /// class factories working as virtual constructors 330 /// @{ 331 332 /// Create Identifier node or string node 333 virtual 334 TNode* CreateNode(const string& value, 335 const string& orig_text, 336 bool isIdent); 337 virtual TNode* CreateNode(Int8 value, const string& orig_text); 338 virtual TNode* CreateNode(bool value, const string& orig_text); 339 virtual TNode* CreateNode(double value, const string& orig_text); 340 virtual 341 TNode* CreateNode(CQueryParseNode::EType op, 342 TNode* arg1, 343 TNode* arg2, 344 const string& orig_text=""); 345 /// Create function node 346 virtual 347 TNode* CreateFuncNode(const string& func_name); 348 349 /// @} 350 351 /// Print the query tree (debugging) 352 void Print(CNcbiOstream& os) const; 353 354 private: 355 CQueryParseTree(const CQueryParseTree&); 356 CQueryParseTree& operator=(const CQueryParseTree&); 357 private: 358 unique_ptr<TNode> m_Tree; 359 }; 360 361 /// Query parser exceptions 362 /// 363 class NCBI_XUTIL_EXPORT CQueryParseException : public CException 364 { 365 public: 366 enum EErrCode { 367 eIncorrectNodeType, 368 eParserError, 369 eCompileError, 370 eUnknownFunction 371 }; 372 GetErrCodeString(void) const373 virtual const char* GetErrCodeString(void) const override 374 { 375 switch (GetErrCode()) 376 { 377 case eIncorrectNodeType: return "eIncorrectNodeType"; 378 case eParserError: return "eParserError"; 379 case eCompileError: return "eCompileError"; 380 case eUnknownFunction: return "eUnknownFunction"; 381 382 default: return CException::GetErrCodeString(); 383 } 384 } 385 386 NCBI_EXCEPTION_DEFAULT(CQueryParseException, CException); 387 }; 388 389 390 /* @} */ 391 392 END_NCBI_SCOPE 393 394 395 #endif // UTIL__QUERY_PARSER_HPP__ 396 397 398