1 /****************************************************************************
2 **
3 ** Copyright (C) 2019 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the tools applications of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28 
29 /*
30   tokenizer.h
31 */
32 
33 #ifndef TOKENIZER_H
34 #define TOKENIZER_H
35 
36 #include "location.h"
37 
38 #include <QtCore/qfile.h>
39 #include <QtCore/qstack.h>
40 #include <QtCore/qstring.h>
41 
42 QT_BEGIN_NAMESPACE
43 
44 /*
45   Here come the C++ tokens we support.  The first part contains
46   all-purpose tokens; then come keywords.
47 
48   If you add a keyword, make sure to modify the keyword array in
49   tokenizer.cpp as well, and possibly adjust Tok_FirstKeyword and
50   Tok_LastKeyword.
51 */
52 enum {
53     Tok_Eoi,
54     Tok_Ampersand,
55     Tok_Aster,
56     Tok_Caret,
57     Tok_LeftParen,
58     Tok_RightParen,
59     Tok_LeftParenAster,
60     Tok_Equal,
61     Tok_LeftBrace,
62     Tok_RightBrace,
63     Tok_Semicolon,
64     Tok_Colon,
65     Tok_LeftAngle,
66     Tok_RightAngle,
67     Tok_Comma,
68     Tok_Ellipsis,
69     Tok_Gulbrandsen,
70     Tok_LeftBracket,
71     Tok_RightBracket,
72     Tok_Tilde,
73     Tok_SomeOperator,
74     Tok_Number,
75     Tok_String,
76     Tok_Doc,
77     Tok_Comment,
78     Tok_Ident,
79     Tok_At,
80     Tok_char,
81     Tok_class,
82     Tok_const,
83     Tok_double,
84     Tok_enum,
85     Tok_explicit,
86     Tok_friend,
87     Tok_inline,
88     Tok_int,
89     Tok_long,
90     Tok_namespace,
91     Tok_operator,
92     Tok_private,
93     Tok_protected,
94     Tok_public,
95     Tok_short,
96     Tok_signals,
97     Tok_signed,
98     Tok_slots,
99     Tok_static,
100     Tok_struct,
101     Tok_template,
102     Tok_typedef,
103     Tok_typename,
104     Tok_union,
105     Tok_unsigned,
106     Tok_using,
107     Tok_virtual,
108     Tok_void,
109     Tok_volatile,
110     Tok_int64,
111     Tok_default,
112     Tok_delete,
113     Tok_final,
114     Tok_override,
115     Tok_Q_OBJECT,
116     Tok_Q_OVERRIDE,
117     Tok_Q_PROPERTY,
118     Tok_Q_PRIVATE_PROPERTY,
119     Tok_Q_DECLARE_SEQUENTIAL_ITERATOR,
120     Tok_Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR,
121     Tok_Q_DECLARE_ASSOCIATIVE_ITERATOR,
122     Tok_Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR,
123     Tok_Q_DECLARE_FLAGS,
124     Tok_Q_SIGNALS,
125     Tok_Q_SLOTS,
126     Tok_QT_COMPAT,
127     Tok_QT_COMPAT_CONSTRUCTOR,
128     Tok_QT_DEPRECATED,
129     Tok_QT_MOC_COMPAT,
130     Tok_QT_MODULE,
131     Tok_QT3_SUPPORT,
132     Tok_QT3_SUPPORT_CONSTRUCTOR,
133     Tok_QT3_MOC_SUPPORT,
134     Tok_QDOC_PROPERTY,
135     Tok_QPrivateSignal,
136     Tok_FirstKeyword = Tok_char,
137     Tok_LastKeyword = Tok_QPrivateSignal
138 };
139 
140 /*
141   The Tokenizer class implements lexical analysis of C++ source
142   files.
143 
144   Not every operator or keyword of C++ is recognized; only those
145   that are interesting to us. Some Qt keywords or macros are also
146   recognized.
147 */
148 
149 class Tokenizer
150 {
151     Q_DECLARE_TR_FUNCTIONS(QDoc::Tokenizer)
152 
153 public:
154     Tokenizer(const Location &loc, const QByteArray &in);
155     Tokenizer(const Location &loc, QFile &file);
156 
157     ~Tokenizer();
158 
159     int getToken();
setParsingFnOrMacro(bool macro)160     void setParsingFnOrMacro(bool macro) { parsingMacro = macro; }
parsingFnOrMacro()161     bool parsingFnOrMacro() const { return parsingMacro; }
162 
location()163     const Location &location() const { return yyTokLoc; }
164     QString previousLexeme() const;
165     QString lexeme() const;
version()166     QString version() const { return yyVersion; }
braceDepth()167     int braceDepth() const { return yyBraceDepth; }
parenDepth()168     int parenDepth() const { return yyParenDepth; }
bracketDepth()169     int bracketDepth() const { return yyBracketDepth; }
tokenLocation()170     Location &tokenLocation() { return yyTokLoc; }
171 
172     static void initialize();
173     static void terminate();
174     static bool isTrue(const QString &condition);
175 
176 private:
177     void init();
178     void start(const Location &loc);
179     /*
180       This limit on the length of a lexeme seems fairly high, but a
181       doc comment can be arbitrarily long. The previous 65,536 limit
182       was reached by Mark Summerfield.
183     */
184     enum { yyLexBufSize = 524288 };
185 
getch()186     int getch() { return yyPos == yyIn.size() ? EOF : yyIn[yyPos++]; }
187 
getChar()188     inline int getChar()
189     {
190         if (yyCh == EOF)
191             return EOF;
192         if (yyLexLen < yyLexBufSize - 1) {
193             yyLex[yyLexLen++] = (char)yyCh;
194             yyLex[yyLexLen] = '\0';
195         }
196         yyCurLoc.advance(yyCh);
197         int ch = getch();
198         if (ch == EOF)
199             return EOF;
200         // cast explicitly to make sure the value of ch
201         // is in range [0..255] to avoid assert messages
202         // when using debug CRT that checks its input.
203         return int(uint(uchar(ch)));
204     }
205 
206     int getTokenAfterPreprocessor();
207     void pushSkipping(bool skip);
208     bool popSkipping();
209 
210     Location yyTokLoc;
211     Location yyCurLoc;
212     char *yyLexBuf1;
213     char *yyLexBuf2;
214     char *yyPrevLex;
215     char *yyLex;
216     size_t yyLexLen;
217     QStack<bool> yyPreprocessorSkipping;
218     int yyNumPreprocessorSkipping;
219     int yyBraceDepth;
220     int yyParenDepth;
221     int yyBracketDepth;
222     int yyCh;
223 
224     QString yyVersion;
225     bool parsingMacro;
226 
227 protected:
228     QByteArray yyIn;
229     int yyPos;
230 };
231 
232 QT_END_NAMESPACE
233 
234 #endif
235