1 // Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #pragma once
22 
23 #include "CPlusPlusForwardDeclarations.h"
24 
25 namespace CPlusPlus {
26 
27 enum Kind {
28     T_EOF_SYMBOL = 0,
29     T_ERROR,
30 
31     T_CPP_COMMENT,
32     T_CPP_DOXY_COMMENT,
33     T_COMMENT,
34     T_DOXY_COMMENT,
35     T_IDENTIFIER,
36 
37     T_FIRST_LITERAL,
38     T_NUMERIC_LITERAL = T_FIRST_LITERAL,
39     T_FIRST_CHAR_LITERAL,
40     T_CHAR_LITERAL = T_FIRST_CHAR_LITERAL,
41     T_WIDE_CHAR_LITERAL,
42     T_UTF16_CHAR_LITERAL,
43     T_UTF32_CHAR_LITERAL,
44     T_LAST_CHAR_LITERAL = T_UTF32_CHAR_LITERAL,
45     T_FIRST_STRING_LITERAL,
46     T_STRING_LITERAL = T_FIRST_STRING_LITERAL,
47     T_WIDE_STRING_LITERAL,
48     T_UTF8_STRING_LITERAL,
49     T_UTF16_STRING_LITERAL,
50     T_UTF32_STRING_LITERAL,
51     T_FIRST_RAW_STRING_LITERAL,
52     T_RAW_STRING_LITERAL = T_FIRST_RAW_STRING_LITERAL,
53     T_RAW_WIDE_STRING_LITERAL,
54     T_RAW_UTF8_STRING_LITERAL,
55     T_RAW_UTF16_STRING_LITERAL,
56     T_RAW_UTF32_STRING_LITERAL,
57     T_LAST_RAW_STRING_LITERAL = T_RAW_UTF32_STRING_LITERAL,
58     T_AT_STRING_LITERAL,
59     T_ANGLE_STRING_LITERAL,
60     T_LAST_STRING_LITERAL = T_ANGLE_STRING_LITERAL,
61     T_LAST_LITERAL = T_ANGLE_STRING_LITERAL,
62 
63     T_FIRST_PUNCTUATION_OR_OPERATOR,
64     T_FIRST_PUNCTUATION = T_FIRST_PUNCTUATION_OR_OPERATOR,
65     T_COLON = T_FIRST_PUNCTUATION_OR_OPERATOR,
66     T_COLON_COLON,
67     T_COMMA,
68     T_GREATER,
69     T_LESS,
70     T_LBRACE,
71     T_LBRACKET,
72     T_LPAREN,
73     T_RBRACE,
74     T_RBRACKET,
75     T_RPAREN,
76     T_SEMICOLON,
77     T_LAST_PUNCTUATION = T_SEMICOLON,
78 
79     T_FIRST_OPERATOR,
80     T_AMPER = T_FIRST_OPERATOR,
81     T_AMPER_AMPER,
82     T_AMPER_EQUAL,
83     T_ARROW,
84     T_ARROW_STAR,
85     T_CARET,
86     T_CARET_EQUAL,
87     T_SLASH,
88     T_SLASH_EQUAL,
89     T_DOT,
90     T_DOT_DOT_DOT,
91     T_DOT_STAR,
92     T_EQUAL,
93     T_EQUAL_EQUAL,
94     T_EXCLAIM,
95     T_EXCLAIM_EQUAL,
96     T_GREATER_EQUAL,
97     T_GREATER_GREATER,
98     T_GREATER_GREATER_EQUAL,
99     T_LESS_EQUAL,
100     T_LESS_LESS,
101     T_LESS_LESS_EQUAL,
102     T_MINUS,
103     T_MINUS_EQUAL,
104     T_MINUS_MINUS,
105     T_PERCENT,
106     T_PERCENT_EQUAL,
107     T_PIPE,
108     T_PIPE_EQUAL,
109     T_PIPE_PIPE,
110     T_PLUS,
111     T_PLUS_EQUAL,
112     T_PLUS_PLUS,
113     T_POUND,
114     T_POUND_POUND,
115     T_QUESTION,
116     T_STAR,
117     T_STAR_EQUAL,
118     T_TILDE,
119     T_TILDE_EQUAL,
120     T_LAST_OPERATOR = T_TILDE_EQUAL,
121     T_LAST_PUNCTUATION_OR_OPERATOR = T_LAST_OPERATOR,
122 
123     T_FIRST_KEYWORD,
124     T_ALIGNAS = T_FIRST_KEYWORD,
125     T_ALIGNOF,
126     T_ASM,
127     T_AUTO,
128     T_BREAK,
129     T_CASE,
130     T_CATCH,
131     T_CLASS,
132     T_CONST,
133     T_CONST_CAST,
134     T_CONSTEXPR,
135     T_CONTINUE,
136     T_DECLTYPE,
137     T_DEFAULT,
138     T_DELETE,
139     T_DO,
140     T_DYNAMIC_CAST,
141     T_ELSE,
142     T_ENUM,
143     T_EXPLICIT,
144     T_EXPORT,
145     T_EXTERN,
146     T_FALSE,
147     T_FOR,
148     T_FRIEND,
149     T_GOTO,
150     T_IF,
151     T_INLINE,
152     T_MUTABLE,
153     T_NAMESPACE,
154     T_NEW,
155     T_NOEXCEPT,
156     T_NULLPTR,
157     T_OPERATOR,
158     T_PRIVATE,
159     T_PROTECTED,
160     T_PUBLIC,
161     T_REGISTER,
162     T_REINTERPRET_CAST,
163     T_RETURN,
164     T_SIZEOF,
165     T_STATIC,
166     T_STATIC_ASSERT,
167     T_STATIC_CAST,
168     T_STRUCT,
169     T_SWITCH,
170     T_TEMPLATE,
171     T_THIS,
172     T_THREAD_LOCAL,
173     T_THROW,
174     T_TRUE,
175     T_TRY,
176     T_TYPEDEF,
177     T_TYPEID,
178     T_TYPENAME,
179     T_UNION,
180     T_USING,
181     T_VIRTUAL,
182     T_VOLATILE,
183     T_WHILE,
184 
185     T___ATTRIBUTE__,
186     T___THREAD,
187     T___TYPEOF__,
188 
189     T___DECLSPEC,
190 
191     // obj c++ @ keywords
192     T_FIRST_OBJC_AT_KEYWORD,
193 
194     T_AT_CATCH = T_FIRST_OBJC_AT_KEYWORD,
195     T_AT_CLASS,
196     T_AT_COMPATIBILITY_ALIAS,
197     T_AT_DEFS,
198     T_AT_DYNAMIC,
199     T_AT_ENCODE,
200     T_AT_END,
201     T_AT_FINALLY,
202     T_AT_IMPLEMENTATION,
203     T_AT_INTERFACE,
204     T_AT_NOT_KEYWORD,
205     T_AT_OPTIONAL,
206     T_AT_PACKAGE,
207     T_AT_PRIVATE,
208     T_AT_PROPERTY,
209     T_AT_PROTECTED,
210     T_AT_PROTOCOL,
211     T_AT_PUBLIC,
212     T_AT_REQUIRED,
213     T_AT_SELECTOR,
214     T_AT_SYNCHRONIZED,
215     T_AT_SYNTHESIZE,
216     T_AT_THROW,
217     T_AT_TRY,
218 
219     T_LAST_OBJC_AT_KEYWORD = T_AT_TRY,
220 
221     // Primitive types
222     T_FIRST_PRIMITIVE,
223     T_BOOL = T_FIRST_PRIMITIVE,
224     T_CHAR,
225     T_CHAR16_T,
226     T_CHAR32_T,
227     T_DOUBLE,
228     T_FLOAT,
229     T_INT,
230     T_LONG,
231     T_SHORT,
232     T_SIGNED,
233     T_UNSIGNED,
234     T_VOID,
235     T_WCHAR_T,
236     T_LAST_PRIMITIVE = T_WCHAR_T,
237 
238     // Qt keywords
239     T_FIRST_QT_KEYWORD,
240     T_EMIT = T_FIRST_QT_KEYWORD,
241     T_SIGNAL,
242     T_SLOT,
243     T_Q_SIGNAL,
244     T_Q_SLOT,
245     T_Q_SIGNALS,
246     T_Q_SLOTS,
247     T_Q_FOREACH,
248     T_Q_D,
249     T_Q_Q,
250     T_Q_INVOKABLE,
251     T_Q_PROPERTY,
252     T_Q_PRIVATE_PROPERTY,
253     T_Q_INTERFACES,
254     T_Q_EMIT,
255     T_Q_ENUMS,
256     T_Q_FLAGS,
257     T_Q_PRIVATE_SLOT,
258     T_Q_DECLARE_INTERFACE,
259     T_Q_OBJECT,
260     T_Q_GADGET,
261     T_LAST_QT_KEYWORD = T_Q_GADGET,
262     T_LAST_KEYWORD = T_LAST_QT_KEYWORD,
263 
264     T_LAST_TOKEN = T_LAST_KEYWORD,  // keep this before the aliases below
265 
266     // aliases
267     T_OR = T_PIPE_PIPE,
268     T_AND = T_AMPER_AMPER,
269     T_NOT = T_EXCLAIM,
270     T_XOR = T_CARET,
271     T_BITOR = T_PIPE,
272     T_COMPL = T_TILDE,
273     T_OR_EQ = T_PIPE_EQUAL,
274     T_AND_EQ = T_AMPER_EQUAL,
275     T_BITAND = T_AMPER,
276     T_NOT_EQ = T_EXCLAIM_EQUAL,
277     T_XOR_EQ = T_CARET_EQUAL,
278 
279     T___ASM = T_ASM,
280     T___ASM__ = T_ASM,
281 
282     T_TYPEOF = T___TYPEOF__,
283     T___TYPEOF = T___TYPEOF__,
284 
285     T___DECLTYPE = T_DECLTYPE,
286 
287     T___INLINE = T_INLINE,
288     T___INLINE__ = T_INLINE,
289 
290     T___CONST = T_CONST,
291     T___CONST__ = T_CONST,
292 
293     T___VOLATILE = T_VOLATILE,
294     T___VOLATILE__ = T_VOLATILE,
295 
296     T___ATTRIBUTE = T___ATTRIBUTE__,
297     T___ALIGNOF__ = T_ALIGNOF,
298 
299     T_SLOTS = T_Q_SLOTS,
300     T_FOREACH = T_Q_FOREACH,
301     T_SIGNALS = T_Q_SIGNALS,
302     T_Q_OVERRIDE = T_Q_PROPERTY,
303     T__DECLSPEC = T___DECLSPEC,
304 };
305 
306 class CPLUSPLUS_EXPORT Token
307 {
308 public:
Token()309     Token() : flags(0), byteOffset(0), utf16charOffset(0), ptr(nullptr) {}
310 
is(unsigned k)311     inline bool is(unsigned k) const    { return f.kind == k; }
isNot(unsigned k)312     inline bool isNot(unsigned k) const { return f.kind != k; }
313     const char *spell() const;
314     void reset();
315 
kind()316     inline Kind kind() const { return static_cast<Kind>(f.kind); }
newline()317     inline bool newline() const { return f.newline; }
whitespace()318     inline bool whitespace() const { return f.whitespace; }
joined()319     inline bool joined() const { return f.joined; }
expanded()320     inline bool expanded() const { return f.expanded; }
generated()321     inline bool generated() const { return f.generated; }
userDefinedLiteral()322     inline bool userDefinedLiteral() const { return f.userDefinedLiteral; }
323 
bytes()324     inline int bytes() const { return f.bytes; }
bytesBegin()325     inline int bytesBegin() const { return byteOffset; }
bytesEnd()326     inline int bytesEnd() const { return byteOffset + f.bytes; }
327 
utf16chars()328     inline int utf16chars() const { return f.utf16chars; }
utf16charsBegin()329     inline int utf16charsBegin() const { return utf16charOffset; }
utf16charsEnd()330     inline int utf16charsEnd() const { return utf16charOffset + f.utf16chars; }
331 
isLiteral()332     inline bool isLiteral() const
333     { return f.kind >= T_FIRST_LITERAL && f.kind <= T_LAST_LITERAL; }
334 
isCharLiteral()335     inline bool isCharLiteral() const
336     { return f.kind >= T_FIRST_CHAR_LITERAL && f.kind <= T_LAST_CHAR_LITERAL; }
337 
isStringLiteral()338     inline bool isStringLiteral() const
339     { return f.kind >= T_FIRST_STRING_LITERAL && f.kind <= T_LAST_STRING_LITERAL; }
340 
isOperator()341     inline bool isOperator() const
342     { return f.kind >= T_FIRST_OPERATOR && f.kind <= T_LAST_OPERATOR; }
343 
isPunctuation()344     inline bool isPunctuation() const
345     { return f.kind >= T_FIRST_PUNCTUATION && f.kind <= T_LAST_PUNCTUATION; }
346 
isPunctuationOrOperator()347     inline bool isPunctuationOrOperator() const
348     { return f.kind >= T_FIRST_PUNCTUATION_OR_OPERATOR && f.kind <= T_LAST_PUNCTUATION_OR_OPERATOR; }
349 
isKeyword()350     inline bool isKeyword() const
351     { return f.kind >= T_FIRST_KEYWORD && f.kind < T_FIRST_PRIMITIVE; }
352 
isPrimitiveType()353     inline bool isPrimitiveType() const
354     { return f.kind >= T_FIRST_PRIMITIVE && f.kind <= T_LAST_PRIMITIVE; }
355 
isQtKeyword()356     inline bool isQtKeyword() const
357     { return f.kind >= T_FIRST_QT_KEYWORD && f.kind <= T_LAST_QT_KEYWORD; }
358 
isComment()359     inline bool isComment() const
360     { return f.kind == T_COMMENT || f.kind == T_DOXY_COMMENT ||
361       f.kind == T_CPP_COMMENT || f.kind == T_CPP_DOXY_COMMENT; }
362 
isObjCAtKeyword()363     inline bool isObjCAtKeyword() const
364     { return f.kind >= T_FIRST_OBJC_AT_KEYWORD && f.kind <= T_LAST_OBJC_AT_KEYWORD; }
365 
366     static const char *name(int kind);
367 
368 public:
369     struct Flags {
370         // The token kind.
371         unsigned kind          : 8;
372         // The token starts a new line.
373         unsigned newline       : 1;
374         // The token is preceeded by whitespace(s).
375         unsigned whitespace    : 1;
376         // The token is joined with the previous one.
377         unsigned joined        : 1;
378         // The token originates from a macro expansion.
379         unsigned expanded      : 1;
380         // The token originates from a macro expansion and does not correspond to an
381         // argument that went through substitution. Notice the example:
382         //
383         // #define FOO(a, b) a + b;
384         // FOO(1, 2)
385         //
386         // After preprocessing we would expect the following tokens: 1 + 2;
387         // Tokens '1', '+', '2', and ';' are all expanded. However only tokens '+' and ';'
388         // are generated.
389         unsigned generated     : 1;
390         // The token is C++11 user-defined literal such as:
391         // 12_km, 0.5_Pa, 'c'_X, "abd"_L, u16"xyz"_M
392         unsigned userDefinedLiteral : 1;
393         // Indicates the token is a trigraph
394         unsigned trigraph      : 1;
395         // Unused...
396         unsigned pad           : 1;
397         // The token length in bytes and UTF16 chars.
398         unsigned bytes         : 16;
399         unsigned utf16chars    : 16;
400     };
401     union {
402         unsigned long flags;
403         Flags f;
404     };
405 
406     unsigned byteOffset;
407     unsigned utf16charOffset;
408 
409     union {
410         void *ptr;
411         const Literal *literal;
412         const NumericLiteral *number;
413         const StringLiteral *string;
414         const Identifier *identifier;
415         unsigned close_brace;
416         int lineno;
417     };
418 };
419 
420 struct LanguageFeatures
421 {
LanguageFeaturesLanguageFeatures422     LanguageFeatures() : flags(0) {}
defaultFeaturesLanguageFeatures423     static LanguageFeatures defaultFeatures()
424     {
425         LanguageFeatures features;
426         features.flags = 0xffffffff; // Enable all flags
427         return features;
428     }
429 
430     bool operator==(const LanguageFeatures &other) const
431     {
432         return flags == other.flags;
433     }
434 
435     bool operator!=(const LanguageFeatures &other) const
436     {
437         return flags != other.flags;
438     }
439 
440     union {
441         unsigned int flags;
442         struct {
443             unsigned int qtEnabled : 1; // If Qt is used.
444             unsigned int qtMocRunEnabled : 1;
445             unsigned int qtKeywordsEnabled : 1; // If Qt is used but QT_NO_KEYWORDS defined
446             unsigned int cxxEnabled : 1;
447             unsigned int cxx11Enabled : 1;
448             unsigned int cxx14Enabled : 1;
449             unsigned int objCEnabled : 1;
450             unsigned int c99Enabled : 1;
451         };
452     };
453 };
454 
455 } // namespace CPlusPlus
456