1 /*
2  * tokenizer.h - Copyright 2005 Maksim Orlovich <maksim@kde.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #ifndef TOKENIZER_H
26 #define TOKENIZER_H
27 
28 #include <QSet>
29 #include <QString>
30 #include <QHash>
31 
32 #include "step.h"
33 #include "path.h"
34 #include "predicate.h"
35 #include "expression.h"
36 #include "util.h"
37 #include "parser.h"
38 
39 namespace khtml
40 {
41 namespace XPath
42 {
43 
44 struct Token {
45     int     type;
46     QString value;
47     int     intValue; //0 if not set
48     bool    hasString;
49 
TokenToken50     Token(int _type): type(_type), intValue(0), hasString(false) {}
TokenToken51     Token(QString _value): type(ERROR + 1), value(_value), intValue(0), hasString(true) {}
TokenToken52     Token(int _type, QString _value): type(_type), value(_value), intValue(0), hasString(true) {}
TokenToken53     Token(int _type, int     _value): type(_type), intValue(_value), hasString(false) {}
54 };
55 
56 class Tokenizer
57 {
58 private:
59     int m_nextPos;
60     QString m_data;
61     int m_lastTokenType;
62 
63     static QHash<QString, Step::AxisType> *s_axisNamesDict;
64     static QSet<QString> *s_nodeTypeNamesDict;
65 
66     enum XMLCat {
67         NameStart,
68         NameCont,
69         NotPartOfName
70     };
71 
72     XMLCat charCat(QChar aChar);
73 
74     bool isAxisName(QString name, Step::AxisType *type = nullptr);
75     bool isNodeTypeName(QString name);
76     bool isOperatorContext();
77 
78     void  skipWS();
79     Token makeTokenAndAdvance(int code, int advance = 1);
80     Token makeIntTokenAndAdvance(int code, int val, int advance = 1);
81     char  peekAheadHelper();
82     char  peekCurHelper();
83 
84     Token lexString();
85     Token lexNumber();
86     Token lexNCName();
87     Token lexQName();
88 
89     Token nextTokenInternal();
90     Tokenizer();
91     Tokenizer(const Tokenizer &rhs);                  // disabled
92     Tokenizer &operator=(const Tokenizer &rhs);       // disabled
93     ~Tokenizer();
94 public:
95     static Tokenizer &self();
96 
97     void reset(QString);
98     Token nextToken();
99 };
100 
101 // Interface to the parser
102 int khtmlxpathyylex();
103 void khtmlxpathyyerror(const char *str);
104 void initTokenizer(const DOM::DOMString &string);
105 
106 } // namespace XPath
107 
108 } // namespace khtml
109 
110 #endif
111