1 #ifndef UTIL__QUERY_PARSER_HPP__
2 #define UTIL__QUERY_PARSER_HPP__
3 
4 /*  $Id: query_parse.hpp 575325 2018-11-27 18:22:00Z ucko $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Anatoliy Kuznetsov, Mike DiCuccio, Maxim Didenko
30  *
31  * File Description: Query parser implementation
32  *
33  */
34 
35 /// @file query_parse.hpp
36 /// Query string parsing components
37 
38 #include <corelib/ncbi_tree.hpp>
39 #include <corelib/ncbiobj.hpp>
40 
41 BEGIN_NCBI_SCOPE
42 
43 class CQueryParseTree;
44 
45 
46 
47 /** @addtogroup QParser
48  *
49  * @{
50  */
51 
52 /// Base class for query node user defined object
53 ///
54 /// User object used to carry field dependent data, metainformation,
55 /// execution time data, etc. It can be a bridge between parser and a query
56 /// execution engine.
57 ///
58 
59 class NCBI_XUTIL_EXPORT IQueryParseUserObject : public CObject
60 {
61 public:
62 
63     /// Reset user object (for reuse without reallocation)
64     virtual void Reset() = 0;
65 
66     /// String value for debuging
GetVisibleValue() const67     virtual string GetVisibleValue() const { return ""; };
68 };
69 
70 
71 /// Query node class
72 ///
73 /// Query node describes element of the recursive parsing tree
74 /// for the query language.
75 /// (The tree is then interpreted by the execution machine)
76 ///
77 
78 class NCBI_XUTIL_EXPORT CQueryParseNode
79 {
80 public:
81 
82    /// Query node type
83    ///
84     enum EType {
85         eNotSet     = 0,   ///< Produced by the (private) default constructor
86         eIdentifier,       ///< Identifier like db.field (Org, Fld12, etc.)
87         eIntConst,         ///< Integer const
88         eFloatConst,       ///< Floating point const
89         eBoolConst,        ///< Boolean (TRUE or FALSE)
90         eString,           ///< String ("free text")
91         eFunction,         ///< Function
92 
93         // Operation codes:
94         eNot,
95         eFieldSearch,
96         eLike,
97         eBetween,
98         eIn,
99         eAnd,
100         eOr,
101         eSub,
102         eXor,
103         eRange,
104         eEQ,
105         eGT,
106         eGE,
107         eLT,
108         eLE,
109 
110         // SQL specific components
111         eSelect,
112         eFrom,
113         eWhere,
114 
115         eList,
116 
117         eMaxType
118     };
119 
120     /// Source location (points to the position in the original src)
121     /// All positions are 0 based
122     ///
123     struct SSrcLoc
124     {
125         unsigned   line;     ///< Src line number
126         unsigned   pos;      ///< Position in the src line
127         unsigned   length;   ///< Token length (optional)
128 
SSrcLocCQueryParseNode::SSrcLoc129         SSrcLoc(unsigned src_line = 0, unsigned src_pos = 0, unsigned len = 0)
130         : line(src_line), pos(src_pos), length(len)
131         {}
132     };
133 
134 public:
135     /// Construct the query node
136     /// @param value      Node value
137     /// @param orig_text  Value as it appears in the original program
138     /// @param isIdent    true whe the string is identifier (no quoting)
139     ///
140     CQueryParseNode(const string& value, const string& orig_text, bool isIdent);
141 
142     explicit CQueryParseNode(Int8   val, const string& orig_text);
143     explicit CQueryParseNode(bool   val, const string& orig_text);
144     explicit CQueryParseNode(double val, const string& orig_text);
145     explicit CQueryParseNode(EType op_type, const string& orig_text);
146 
147     /// @name Source reference accessors
148     /// @{
149 
150     /// Set node location in the query text (for error diagnostics)
SetLoc(const SSrcLoc & loc)151     void SetLoc(const SSrcLoc& loc) { m_Location = loc; }
SetLoc(unsigned line,unsigned pos)152     void SetLoc(unsigned line, unsigned pos)
153     {
154         m_Location.line = line;
155         m_Location.pos = pos;
156     }
GetLoc() const157     const SSrcLoc& GetLoc() const   { return m_Location; }
158 
159     /// @}
160 
161 
162     /// @name Value accessors
163     /// @{
164 
GetType() const165     EType     GetType() const { return m_Type; }
166     const string& GetStrValue() const;
167     const string& GetIdent() const;
GetOriginalText() const168     const string& GetOriginalText() const { return m_OrigText; }
169     Int8 GetInt() const;
170     bool GetBool() const;
171     double GetDouble() const;
172 
173     int GetIdentIdx() const;
GetOrig() const174     const string& GetOrig() const { return m_OrigText; }
175 
176     /// @}
177 
178     /// TRUE if node was created as explicitly
179     /// FALSE - node was created as a result of a default and the interpreter has
180     ///         a degree of freedom in execution
IsExplicit() const181     bool IsExplicit() const { return m_Explicit; }
SetExplicit(bool expl=true)182     void SetExplicit(bool expl=true) { m_Explicit = expl; }
183 
184     /// Check if node is marked with NOT flag (like != )
IsNot() const185     bool IsNot() const { return m_Not; }
SetNot(bool n=true)186     void SetNot(bool n=true) { m_Not = n; }
187 
188     /// Returns TRUE if node describes logical operation (AND, OR, etc.)
IsLogic() const189     bool IsLogic() const
190     {
191         return m_Type == eNot || m_Type == eAnd || m_Type == eOr ||
192                m_Type == eSub || m_Type == eXor;
193     }
194 
195     /// Returns TRUE if node is value (INT, String, etc.)
IsValue() const196     bool IsValue() const
197     {
198         return m_Type == eIdentifier || m_Type == eIntConst   ||
199                m_Type == eString     || m_Type == eFloatConst ||
200                m_Type == eBoolConst;
201     }
202 
203     /// Elapsed time in seconds
Elapsed() const204     double Elapsed() const { return m_Elapsed; }
205     /// Elapsed time in seconds
GetElapsed() const206     double GetElapsed() const { return Elapsed(); }
207 
208     /// Set node timing
SetElapsed(double e)209     void SetElapsed(double e) { m_Elapsed = e; }
210 
211 
212     /// @name User object operations
213     ///
214     /// Methods to associate application specific data with
215     /// parsing tree node.
216     /// Data should be encapsulated into a user object derived
217     /// from CQueryParseBaseUserObject.
218     ///
219     /// @{
220 
221     /// Get user object
GetUserObject() const222     const IQueryParseUserObject* GetUserObject() const
223                                     { return m_UsrObj.GetPointer(); }
GetUserObject()224     IQueryParseUserObject* GetUserObject()
225                                     { return m_UsrObj.GetPointer(); }
226 
227     /// Set user object. Query node takes ownership.
228     void AttachUserObject(IQueryParseUserObject* obj);
SetUserObject(IQueryParseUserObject * obj)229     void SetUserObject(IQueryParseUserObject* obj)
230                                     { AttachUserObject(obj); }
231 
232     /// Reset the associated user object
233     ///   (see IQueryParseUserObject::Reset())
234     ///
235     void ResetUserObject();
236 
237     /// @}
238 
239     /// Return query node type as a string (for debugging output)
240     string GetNodeTypeAsString() const;
241     static string GetNodeTypeAsString(EType node_type);
242 
243 private:
244     // required for use with CTreeNode<>
245     CQueryParseNode();
246     friend class CTreeNode<CQueryParseNode>;
247 
248 private:
249     EType         m_Type;
250     union {
251         Int8         m_IntConst;
252         bool         m_BoolConst;
253         double       m_DoubleConst;
254     };
255     string        m_Value;
256     string        m_OrigText;
257     bool          m_Explicit;
258     bool          m_Not;
259     SSrcLoc       m_Location;   ///< Reference to original location in query
260     double        m_Elapsed;    ///< Execution timing
261 
262     CRef<IQueryParseUserObject>  m_UsrObj;
263 };
264 
265 
266 /// Query tree and associated utility methods
267 ///
268 class NCBI_XUTIL_EXPORT CQueryParseTree
269 {
270 public:
271     typedef CTreeNode<CQueryParseNode> TNode;
272 public:
273     /// Contruct the query. Takes the ownership of the clause.
274     explicit CQueryParseTree(TNode *clause=0);
275     virtual ~CQueryParseTree();
276 
277 
278     /// Case sensitive parsing
279     ///
280     enum ECase {
281         eCaseSensitiveUpper, ///< Operators must come in upper case (AND)
282         eCaseInsensitive     ///< Case insensitive parsing (AnD)
283     };
284 
285     /// Level of tolerance to syntax errors and problems
286     ///
287     enum ESyntaxCheck {
288         eSyntaxCheck,      ///< Best possible check for errors
289         eSyntaxRelax       ///< Relaxed parsing rules
290     };
291 
292     /// List of keywords recognised as functions
293     typedef vector<string> TFunctionNames;
294 
295     /// Query parser front-end function
296     ///
297     /// @param query_str
298     ///    Query string subject of parsing
299     /// @param case_sense
300     ///    Case sensitivity (AND, AnD, etc.)
301     /// @param syntax_check
302     ///    Sensitivity to syntax errors
303     /// @param verbose
304     ///    Debug print switch
305     /// @param functions
306     ///    List of names recognised as functions
307     ///
308     void Parse(const char*   query_str,
309                ECase         case_sense        = eCaseInsensitive,
310                ESyntaxCheck  syntax_check      = eSyntaxCheck,
311                bool          verbose           = false,
312                const TFunctionNames& functions =  TFunctionNames(0),
313                unsigned     line               = 0,
314                unsigned     linePos            = 0);
315 
316 
317     /// Replace current query tree with the new one.
318     /// CQueryParseTree takes ownership on the passed argument.
319     ///
320     void SetQueryTree(TNode* qtree);
GetQueryTree() const321     const TNode* GetQueryTree() const { return m_Tree.get(); }
GetQueryTree()322     TNode* GetQueryTree() { return m_Tree.get(); }
323 
324     /// Reset all user objects attached to the parsing tree
325     void ResetUserObjects();
326 
327 
328     /// @name Static node creation functions -
329     ///       class factories working as virtual constructors
330     /// @{
331 
332     /// Create Identifier node or string node
333     virtual
334     TNode* CreateNode(const string&  value,
335                       const string&  orig_text,
336                       bool           isIdent);
337     virtual TNode* CreateNode(Int8   value, const string&  orig_text);
338     virtual TNode* CreateNode(bool   value, const string&  orig_text);
339     virtual TNode* CreateNode(double value, const string&  orig_text);
340     virtual
341     TNode* CreateNode(CQueryParseNode::EType op,
342                       TNode*                 arg1,
343                       TNode*                 arg2,
344                       const string&          orig_text="");
345     /// Create function node
346     virtual
347     TNode* CreateFuncNode(const string&  func_name);
348 
349     /// @}
350 
351     /// Print the query tree (debugging)
352     void Print(CNcbiOstream& os) const;
353 
354 private:
355     CQueryParseTree(const CQueryParseTree&);
356     CQueryParseTree& operator=(const CQueryParseTree&);
357 private:
358     unique_ptr<TNode> m_Tree;
359 };
360 
361 /// Query parser exceptions
362 ///
363 class NCBI_XUTIL_EXPORT CQueryParseException : public CException
364 {
365 public:
366     enum EErrCode {
367         eIncorrectNodeType,
368         eParserError,
369         eCompileError,
370         eUnknownFunction
371     };
372 
GetErrCodeString(void) const373     virtual const char* GetErrCodeString(void) const override
374     {
375         switch (GetErrCode())
376         {
377         case eIncorrectNodeType:       return "eIncorrectNodeType";
378         case eParserError:             return "eParserError";
379         case eCompileError:            return "eCompileError";
380         case eUnknownFunction:         return "eUnknownFunction";
381 
382         default: return CException::GetErrCodeString();
383         }
384     }
385 
386     NCBI_EXCEPTION_DEFAULT(CQueryParseException, CException);
387 };
388 
389 
390 /* @} */
391 
392 END_NCBI_SCOPE
393 
394 
395 #endif  // UTIL__QUERY_PARSER_HPP__
396 
397 
398