1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of Qt Creator.
7 **
8 ** Commercial License Usage
9 ** Licensees holding valid commercial Qt licenses may use this file in
10 ** accordance with the commercial license agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and The Qt Company. For licensing terms
13 ** and conditions see https://www.qt.io/terms-conditions. For further
14 ** information use the contact form at https://www.qt.io/contact-us.
15 **
16 ** GNU General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU
18 ** General Public License version 3 as published by the Free Software
19 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
20 ** included in the packaging of this file. Please review the following
21 ** information to ensure the GNU General Public License requirements will
22 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
23 **
24 ****************************************************************************/
25
26 /*
27 Copyright 2005 Roberto Raggi <roberto@kdevelop.org>
28
29 Permission to use, copy, modify, distribute, and sell this software and its
30 documentation for any purpose is hereby granted without fee, provided that
31 the above copyright notice appear in all copies and that both that
32 copyright notice and this permission notice appear in supporting
33 documentation.
34
35 The above copyright notice and this permission notice shall be included in
36 all copies or substantial portions of the Software.
37
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 KDEVELOP TEAM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
42 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
43 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
44 */
45
46 #include "pp.h"
47 #include "pp-cctype.h"
48
49 #include <cplusplus/Control.h>
50 #include <cplusplus/Lexer.h>
51 #include <cplusplus/Token.h>
52 #include <cplusplus/Literals.h>
53 #include <cplusplus/cppassert.h>
54
55 #include <utils/executeondestruction.h>
56 #include <utils/scopedswap.h>
57
58 #include <QDebug>
59 #include <QList>
60 #include <QDate>
61 #include <QTime>
62 #include <QPair>
63
64 #include <cctype>
65 #include <list>
66 #include <algorithm>
67
68 #define NO_DEBUG
69
70 #ifndef NO_DEBUG
71 # include <iostream>
72 #endif // NO_DEBUG
73
74 #include <deque>
75
76 using namespace Utils;
77
78 namespace {
79 enum {
80 MAX_FUNCTION_LIKE_ARGUMENTS_COUNT = 100,
81 MAX_TOKEN_EXPANSION_COUNT = 5000,
82 MAX_TOKEN_BUFFER_DEPTH = 16000 // for when macros are using some kind of right-folding, this is the list of "delayed" buffers waiting to be expanded after the current one.
83 };
84 }
85
86 namespace {
same(const char * a,const char * b,int size)87 static bool same(const char *a, const char *b, int size)
88 {
89 return strncmp(a, b, size) == 0;
90 }
91
isQtReservedWord(const char * name,int size)92 static bool isQtReservedWord(const char *name, int size)
93 {
94 if (size < 3)
95 return false;
96
97 const char c = name[0];
98 if (c == 'Q') {
99 if (name[1] == '_') {
100 name += 2;
101 size -= 2;
102 switch (size) {
103 case 1:
104 return name[0] == 'D' || name[0] == 'Q';
105 case 4:
106 return same(name, "SLOT", size) || same(name, "EMIT", size);
107 case 5:
108 return same(name, "SLOTS", size) || same(name, "ENUMS", size)
109 || same(name, "FLAGS", size);
110 case 6:
111 return same(name, "SIGNAL", size);
112 case 7:
113 return same(name, "SIGNALS", size) || same(name, "FOREACH", size);
114 case 8:
115 return same(name, "PROPERTY", size);
116 case 9:
117 return same(name, "INVOKABLE", size);
118 case 10:
119 return same(name, "INTERFACES", size);
120 case 16:
121 return same(name, "PRIVATE_PROPERTY", size);
122 }
123 }
124 return false;
125 }
126
127 if (c == 'S')
128 return (size == 6 && same(name, "SIGNAL", size)) || (size == 4 && same(name, "SLOT", size));
129
130 if (c == 's')
131 return (size == 7 && same(name, "signals", size)) || (size == 5 && same(name, "slots", size));
132
133 if (c == 'f')
134 return size == 7 && same(name, "foreach", size);
135
136 if (c == 'e')
137 return size == 4 && same(name, "emit", size);
138
139 return false;
140 }
141
nestingTooDeep()142 static void nestingTooDeep()
143 {
144 #ifndef NO_DEBUG
145 std::cerr << "*** WARNING #if / #ifdef nesting exceeded the max level " << MAX_LEVEL << std::endl;
146 #endif
147 }
148
149 } // anonymous namespace
150
151 namespace CPlusPlus {
152
153 namespace Internal {
154 /// Buffers tokens for the Preprocessor::lex() to read next. Do not use this
155 /// class directly, but use Preprocessor::State::pushTokenBuffer .
156 ///
157 /// New tokens are added when undoing look-ahead, or after expanding a macro.
158 /// When macro expansion happened, the macro is passed in, and blocked until
159 /// all tokens generated by it (and by subsequent expansion of those generated
160 /// tokens) are read from the buffer. See Preprocessor::lex() for details on
161 /// exactly when the buffer (and subsequently a blocking macro) is removed.
162 struct TokenBuffer
163 {
164 std::deque<PPToken> tokens;
165 std::vector<QByteArray> blockedMacroNames;
166 const Macro *macro;
167 TokenBuffer *next;
168
TokenBufferCPlusPlus::Internal::TokenBuffer169 TokenBuffer(const PPToken *start, const PPToken *end, const Macro *macro, TokenBuffer *next)
170 : tokens(start, end), macro(macro), next(next)
171 {}
172
isBlockedCPlusPlus::Internal::TokenBuffer173 bool isBlocked(const Macro *macro) const {
174 if (!macro)
175 return false;
176
177 for (const TokenBuffer *it = this; it; it = it->next) {
178 if (it->macro && (it->macro == macro || it->macro->name() == macro->name()))
179 return true;
180 }
181 for (const QByteArray &blockedMacroName : blockedMacroNames) {
182 if (macro->name() == blockedMacroName)
183 return true;
184 }
185 return false;
186 }
187 };
188
189 struct Value
190 {
191 enum Kind {
192 Kind_Long,
193 Kind_ULong
194 };
195
196 Kind kind;
197
198 union {
199 long l;
200 unsigned long ul;
201 };
202
203
ValueCPlusPlus::Internal::Value204 Value()
205 : kind(Kind_Long), l(0)
206 { }
207
is_ulongCPlusPlus::Internal::Value208 inline bool is_ulong () const
209 { return kind == Kind_ULong; }
210
set_ulongCPlusPlus::Internal::Value211 inline void set_ulong (unsigned long v)
212 {
213 ul = v;
214 kind = Kind_ULong;
215 }
216
set_longCPlusPlus::Internal::Value217 inline void set_long (long v)
218 {
219 l = v;
220 kind = Kind_Long;
221 }
222
is_zeroCPlusPlus::Internal::Value223 inline bool is_zero () const
224 { return l == 0; }
225
226 #define PP_DEFINE_BIN_OP(name, op) \
227 inline Value operator op(const Value &other) const \
228 { \
229 Value v = *this; \
230 if (v.is_ulong () || other.is_ulong ()) \
231 v.set_ulong (v.ul op other.ul); \
232 else \
233 v.set_long (v.l op other.l); \
234 return v; \
235 }
236
237 PP_DEFINE_BIN_OP(op_add, +)
238 PP_DEFINE_BIN_OP(op_sub, -)
239 PP_DEFINE_BIN_OP(op_mult, *)
240 PP_DEFINE_BIN_OP(op_div, /)
241 PP_DEFINE_BIN_OP(op_mod, %)
242 PP_DEFINE_BIN_OP(op_lhs, <<)
243 PP_DEFINE_BIN_OP(op_rhs, >>)
244 PP_DEFINE_BIN_OP(op_lt, <)
245 PP_DEFINE_BIN_OP(op_gt, >)
246 PP_DEFINE_BIN_OP(op_le, <=)
247 PP_DEFINE_BIN_OP(op_ge, >=)
248 PP_DEFINE_BIN_OP(op_eq, ==)
249 PP_DEFINE_BIN_OP(op_ne, !=)
250 PP_DEFINE_BIN_OP(op_bit_and, &)
251 PP_DEFINE_BIN_OP(op_bit_or, |)
252 PP_DEFINE_BIN_OP(op_bit_xor, ^)
253 PP_DEFINE_BIN_OP(op_and, &&)
254 PP_DEFINE_BIN_OP(op_or, ||)
255
256 #undef PP_DEFINE_BIN_OP
257 };
258
259 } // namespace Internal
260 } // namespace CPlusPlus
261
262 using namespace CPlusPlus;
263 using namespace CPlusPlus::Internal;
264
265 namespace {
266
isContinuationToken(const PPToken & tk)267 inline bool isContinuationToken(const PPToken &tk)
268 {
269 return tk.isNot(T_EOF_SYMBOL) && (! tk.newline() || tk.joined());
270 }
271
macroDefinition(const ByteArrayRef & name,unsigned bytesOffset,unsigned utf16charsOffset,unsigned line,Environment * env,Client * client)272 Macro *macroDefinition(const ByteArrayRef &name,
273 unsigned bytesOffset,
274 unsigned utf16charsOffset,
275 unsigned line,
276 Environment *env,
277 Client *client)
278 {
279 Macro *m = env->resolve(name);
280 if (client) {
281 if (m)
282 client->passedMacroDefinitionCheck(bytesOffset, utf16charsOffset, line, *m);
283 else
284 client->failedMacroDefinitionCheck(bytesOffset, utf16charsOffset, name);
285 }
286 return m;
287 }
288
289 class RangeLexer
290 {
291 const Token *first;
292 const Token *last;
293 Token trivial;
294
295 public:
RangeLexer(const Token * first,const Token * last)296 inline RangeLexer(const Token *first, const Token *last)
297 : first(first), last(last)
298 {
299 // WARN: `last' must be a valid iterator.
300 trivial.byteOffset = last->byteOffset;
301 trivial.utf16charOffset = last->utf16charOffset;
302 }
303
operator bool() const304 inline explicit operator bool() const
305 { return first != last; }
306
isValid() const307 inline bool isValid() const
308 { return first != last; }
309
size() const310 inline int size() const
311 { return std::distance(first, last); }
312
dot() const313 inline const Token *dot() const
314 { return first; }
315
operator *() const316 inline const Token &operator*() const
317 {
318 if (first != last)
319 return *first;
320
321 return trivial;
322 }
323
operator ->() const324 inline const Token *operator->() const
325 {
326 if (first != last)
327 return first;
328
329 return &trivial;
330 }
331
operator ++()332 inline RangeLexer &operator++()
333 {
334 ++first;
335 return *this;
336 }
337 };
338
339 class ExpressionEvaluator
340 {
341 ExpressionEvaluator(const ExpressionEvaluator &other);
342 void operator = (const ExpressionEvaluator &other);
343
344 public:
ExpressionEvaluator(Client * client,Environment * env)345 ExpressionEvaluator(Client *client, Environment *env)
346 : client(client), env(env), _lex(nullptr)
347 { }
348
operator ()(const Token * firstToken,const Token * lastToken,const QByteArray & source)349 Value operator()(const Token *firstToken, const Token *lastToken,
350 const QByteArray &source)
351 {
352 this->source = source;
353 const Value previousValue = switchValue(Value());
354 RangeLexer tmp(firstToken, lastToken);
355 RangeLexer *previousLex = _lex;
356 _lex = &tmp;
357 process_expression();
358 _lex = previousLex;
359 return switchValue(previousValue);
360 }
361
362 protected:
switchValue(const Value & value)363 Value switchValue(const Value &value)
364 {
365 Value previousValue = _value;
366 _value = value;
367 return previousValue;
368 }
369
isTokenDefined() const370 bool isTokenDefined() const
371 {
372 if ((*_lex)->isNot(T_IDENTIFIER))
373 return false;
374 const ByteArrayRef spell = tokenSpell();
375 if (spell.size() != 7)
376 return false;
377 return spell == "defined";
378 }
379
tokenPosition() const380 const char *tokenPosition() const
381 {
382 return source.constData() + (*_lex)->byteOffset;
383 }
384
tokenLength() const385 int tokenLength() const
386 {
387 return (*_lex)->f.bytes;
388 }
389
tokenSpell() const390 ByteArrayRef tokenSpell() const
391 {
392 return ByteArrayRef(tokenPosition(), tokenLength());
393 }
394
process_expression()395 inline void process_expression()
396 { process_constant_expression(); }
397
process_primary()398 void process_primary()
399 {
400 if ((*_lex)->is(T_NUMERIC_LITERAL)) {
401 const char *spell = tokenPosition();
402 int len = tokenLength();
403 while (len) {
404 const char ch = spell[len - 1];
405
406 if (! (ch == 'u' || ch == 'U' || ch == 'l' || ch == 'L'))
407 break;
408 --len;
409 }
410
411 const char *end = spell + len;
412 char *vend = const_cast<char *>(end);
413 _value.set_long(strtol(spell, &vend, 0));
414 // TODO: if (vend != end) error(NaN)
415 // TODO: binary literals
416 // TODO: float literals
417 ++(*_lex);
418 } else if (isTokenDefined()) {
419 ++(*_lex);
420 if ((*_lex)->is(T_IDENTIFIER)) {
421 _value.set_long(macroDefinition(tokenSpell(),
422 (*_lex)->byteOffset,
423 (*_lex)->utf16charOffset,
424 (*_lex)->lineno, env, client)
425 != nullptr);
426 ++(*_lex);
427 } else if ((*_lex)->is(T_LPAREN)) {
428 ++(*_lex);
429 if ((*_lex)->is(T_IDENTIFIER)) {
430 _value.set_long(macroDefinition(tokenSpell(),
431 (*_lex)->byteOffset,
432 (*_lex)->utf16charOffset,
433 (*_lex)->lineno,
434 env, client)
435 != nullptr);
436 ++(*_lex);
437 if ((*_lex)->is(T_RPAREN))
438 ++(*_lex);
439 }
440 }
441 } else if ((*_lex)->is(T_IDENTIFIER)) {
442 _value.set_long(0);
443 ++(*_lex);
444 } else if ((*_lex)->is(T_MINUS)) {
445 ++(*_lex);
446 process_primary();
447 _value.set_long(- _value.l);
448 } else if ((*_lex)->is(T_PLUS)) {
449 ++(*_lex);
450 process_primary();
451 } else if ((*_lex)->is(T_TILDE)) {
452 ++(*_lex);
453 process_primary();
454 _value.set_long(~ _value.l);
455 } else if ((*_lex)->is(T_EXCLAIM)) {
456 ++(*_lex);
457 process_primary();
458 _value.set_long(_value.is_zero());
459 } else if ((*_lex)->is(T_LPAREN)) {
460 ++(*_lex);
461 process_expression();
462 if ((*_lex)->is(T_RPAREN))
463 ++(*_lex);
464 }
465 }
466
process_expression_with_operator_precedence(const Value & lhs,int minPrecedence)467 Value process_expression_with_operator_precedence(const Value &lhs, int minPrecedence)
468 {
469 Value result = lhs;
470
471 while (precedence((*_lex)->kind()) >= minPrecedence) {
472 const int oper = (*_lex)->kind();
473 const int operPrecedence = precedence(oper);
474 ++(*_lex);
475 process_primary();
476 Value rhs = _value;
477
478 for (int LA_token_kind = (*_lex)->kind(), LA_precedence = precedence(LA_token_kind);
479 LA_precedence > operPrecedence && isBinaryOperator(LA_token_kind);
480 LA_token_kind = (*_lex)->kind(), LA_precedence = precedence(LA_token_kind)) {
481 rhs = process_expression_with_operator_precedence(rhs, LA_precedence);
482 }
483
484 result = evaluate_expression(oper, result, rhs);
485 }
486
487 return result;
488 }
489
process_constant_expression()490 void process_constant_expression()
491 {
492 process_primary();
493 _value = process_expression_with_operator_precedence(_value, precedence(T_PIPE_PIPE));
494
495 if ((*_lex)->is(T_QUESTION)) {
496 const Value cond = _value;
497 ++(*_lex);
498 process_constant_expression();
499 Value left = _value, right;
500 if ((*_lex)->is(T_COLON)) {
501 ++(*_lex);
502 process_constant_expression();
503 right = _value;
504 }
505 _value = ! cond.is_zero() ? left : right;
506 }
507 }
508
509 private:
precedence(int tokenKind) const510 inline int precedence(int tokenKind) const
511 {
512 switch (tokenKind) {
513 case T_PIPE_PIPE: return 0;
514 case T_AMPER_AMPER: return 1;
515 case T_PIPE: return 2;
516 case T_CARET: return 3;
517 case T_AMPER: return 4;
518 case T_EQUAL_EQUAL:
519 case T_EXCLAIM_EQUAL: return 5;
520 case T_GREATER:
521 case T_LESS:
522 case T_LESS_EQUAL:
523 case T_GREATER_EQUAL: return 6;
524 case T_LESS_LESS:
525 case T_GREATER_GREATER: return 7;
526 case T_PLUS:
527 case T_MINUS: return 8;
528 case T_STAR:
529 case T_SLASH:
530 case T_PERCENT: return 9;
531
532 default:
533 return -1;
534 }
535 }
536
isBinaryOperator(int tokenKind)537 static inline bool isBinaryOperator(int tokenKind)
538 {
539 switch (tokenKind) {
540 case T_PIPE_PIPE:
541 case T_AMPER_AMPER:
542 case T_PIPE:
543 case T_CARET:
544 case T_AMPER:
545 case T_EQUAL_EQUAL:
546 case T_EXCLAIM_EQUAL:
547 case T_GREATER:
548 case T_LESS:
549 case T_LESS_EQUAL:
550 case T_GREATER_EQUAL:
551 case T_LESS_LESS:
552 case T_GREATER_GREATER:
553 case T_PLUS:
554 case T_MINUS:
555 case T_STAR:
556 case T_SLASH:
557 case T_PERCENT:
558 return true;
559
560 default:
561 return false;
562 }
563 }
564
evaluate_expression(int tokenKind,const Value & lhs,const Value & rhs)565 static inline Value evaluate_expression(int tokenKind, const Value &lhs, const Value &rhs)
566 {
567 switch (tokenKind) {
568 case T_PIPE_PIPE: return lhs || rhs;
569 case T_AMPER_AMPER: return lhs && rhs;
570 case T_PIPE: return lhs | rhs;
571 case T_CARET: return lhs ^ rhs;
572 case T_AMPER: return lhs & rhs;
573 case T_EQUAL_EQUAL: return lhs == rhs;
574 case T_EXCLAIM_EQUAL: return lhs != rhs;
575 case T_GREATER: return lhs > rhs;
576 case T_LESS: return lhs < rhs;
577 case T_LESS_EQUAL: return lhs <= rhs;
578 case T_GREATER_EQUAL: return lhs >= rhs;
579 case T_LESS_LESS: return lhs << rhs;
580 case T_GREATER_GREATER: return lhs >> rhs;
581 case T_PLUS: return lhs + rhs;
582 case T_MINUS: return lhs - rhs;
583 case T_STAR: return lhs * rhs;
584 case T_SLASH: return rhs.is_zero() ? Value() : lhs / rhs;
585 case T_PERCENT: return rhs.is_zero() ? Value() : lhs % rhs;
586
587 default:
588 return Value();
589 }
590 }
591
592 private:
593 Client *client;
594 Environment *env;
595 QByteArray source;
596 RangeLexer *_lex;
597 Value _value;
598 };
599
600 } // end of anonymous namespace
601
State()602 Preprocessor::State::State()
603 : m_lexer(nullptr)
604 , m_skipping(MAX_LEVEL)
605 , m_trueTest(MAX_LEVEL)
606 , m_ifLevel(0)
607 , m_tokenBufferDepth(0)
608 , m_tokenBuffer(nullptr)
609 , m_inPreprocessorDirective(false)
610 , m_markExpandedTokens(true)
611 , m_noLines(false)
612 , m_inCondition(false)
613 , m_bytesOffsetRef(0)
614 , m_utf16charsOffsetRef(0)
615 , m_result(nullptr)
616 , m_lineRef(1)
617 , m_currentExpansion(nullptr)
618 , m_includeGuardState(IncludeGuardState_BeforeIfndef)
619 {
620 m_skipping[m_ifLevel] = false;
621 m_trueTest[m_ifLevel] = false;
622
623 m_expansionResult.reserve(256);
624 setExpansionStatus(NotExpanding);
625 }
626
627 #define COMPRESS_TOKEN_BUFFER
pushTokenBuffer(const PPToken * start,const PPToken * end,const Macro * macro)628 void Preprocessor::State::pushTokenBuffer(const PPToken *start, const PPToken *end, const Macro *macro)
629 {
630 if (m_tokenBufferDepth <= MAX_TOKEN_BUFFER_DEPTH) {
631 #ifdef COMPRESS_TOKEN_BUFFER
632 if (macro || !m_tokenBuffer) {
633 // If there is a new blocking macro (or no token buffer yet), create
634 // one.
635 m_tokenBuffer = new TokenBuffer(start, end, macro, m_tokenBuffer);
636 ++m_tokenBufferDepth;
637 } else {
638 // No new blocking macro is passed in, so tokens can be prepended to
639 // the existing buffer.
640 m_tokenBuffer->tokens.insert(m_tokenBuffer->tokens.begin(), start, end);
641 }
642 #else
643 m_tokenBuffer = new TokenBuffer(start, end, macro, m_tokenBuffer);
644 ++m_tokenBufferDepth;
645 #endif
646 }
647 }
648
popTokenBuffer()649 void Preprocessor::State::popTokenBuffer()
650 {
651 TokenBuffer *r = m_tokenBuffer;
652 m_tokenBuffer = m_tokenBuffer->next;
653 delete r;
654
655 if (m_tokenBufferDepth)
656 --m_tokenBufferDepth;
657 }
658
659 #ifdef DEBUG_INCLUDE_GUARD_TRACKING
guardStateToString(int guardState)660 QString Preprocessor::State::guardStateToString(int guardState)
661 {
662 switch (guardState) {
663 case IncludeGuardState_NoGuard: return QLatin1String("NoGuard");
664 case IncludeGuardState_BeforeIfndef: return QLatin1String("BeforeIfndef");
665 case IncludeGuardState_AfterIfndef: return QLatin1String("AfterIfndef");
666 case IncludeGuardState_AfterDefine: return QLatin1String("AfterDefine");
667 case IncludeGuardState_AfterEndif: return QLatin1String("AfterEndif");
668 default: return QLatin1String("UNKNOWN");
669 }
670 }
671 #endif // DEBUG_INCLUDE_GUARD_TRACKING
672
673 /**
674 * @brief Update the include-guard tracking state.
675 *
676 * Include guards are the #ifdef/#define/#endif sequence typically found in
677 * header files to prevent repeated definition of the contents of that header
678 * file. So, for a file to have an include guard, it must look like this:
679 * \code
680 * #ifndef SOME_ID
681 * ... all declarations/definitions/etc. go here ...
682 * #endif
683 * \endcode
684 *
685 * SOME_ID is an identifier, and is also the include guard. The only tokens
686 * allowed before the #ifndef and after the #endif are comments (in any form)
687 * or #line directives. The only other requirement is that a #define SOME_ID
688 * occurs inside the #ifndef block, but not nested inside other
689 * #if/#ifdef/#ifndef blocks.
690 *
691 * This function tracks the state, and is called from \c updateIncludeGuardState
692 * which handles the most common no-op cases.
693 *
694 * @param hint indicates what kind of token is encountered in the input
695 * @param idToken the identifier token that ought to be in the input
696 * after a #ifndef or a #define .
697 */
updateIncludeGuardState_helper(IncludeGuardStateHint hint,PPToken * idToken)698 void Preprocessor::State::updateIncludeGuardState_helper(IncludeGuardStateHint hint, PPToken *idToken)
699 {
700 #ifdef DEBUG_INCLUDE_GUARD_TRACKING
701 int oldIncludeGuardState = m_includeGuardState;
702 QByteArray oldIncludeGuardMacroName = m_includeGuardMacroName;
703 #endif // DEBUG_INCLUDE_GUARD_TRACKING
704
705 switch (m_includeGuardState) {
706 case IncludeGuardState_NoGuard:
707 break;
708 case IncludeGuardState_BeforeIfndef:
709 if (hint == IncludeGuardStateHint_Ifndef
710 && idToken && idToken->is(T_IDENTIFIER)) {
711 m_includeGuardMacroName = idToken->asByteArrayRef().toByteArray();
712 m_includeGuardState = IncludeGuardState_AfterIfndef;
713 } else {
714 m_includeGuardState = IncludeGuardState_NoGuard;
715 }
716 break;
717 case IncludeGuardState_AfterIfndef:
718 if (hint == IncludeGuardStateHint_Define
719 && idToken && idToken->is(T_IDENTIFIER)
720 && idToken->asByteArrayRef() == m_includeGuardMacroName)
721 m_includeGuardState = IncludeGuardState_AfterDefine;
722 break;
723 case IncludeGuardState_AfterDefine:
724 if (hint == IncludeGuardStateHint_Endif)
725 m_includeGuardState = IncludeGuardState_AfterEndif;
726 break;
727 case IncludeGuardState_AfterEndif:
728 m_includeGuardState = IncludeGuardState_NoGuard;
729 m_includeGuardMacroName.clear();
730 break;
731 }
732
733 #ifdef DEBUG_INCLUDE_GUARD_TRACKING
734 qDebug() << "***" << guardStateToString(oldIncludeGuardState)
735 << "->" << guardStateToString(m_includeGuardState)
736 << "hint:" << hint
737 << "guard:" << oldIncludeGuardMacroName << "->" << m_includeGuardMacroName;
738 #endif // DEBUG_INCLUDE_GUARD_TRACKING
739 }
740
configurationFileName()741 QString Preprocessor::configurationFileName() { return QStringLiteral("<configuration>"); }
742
Preprocessor(Client * client,Environment * env)743 Preprocessor::Preprocessor(Client *client, Environment *env)
744 : m_client(client)
745 , m_env(env)
746 , m_expandFunctionlikeMacros(true)
747 , m_keepComments(false)
748 {
749 }
750
run(const QString & fileName,const QString & source)751 QByteArray Preprocessor::run(const QString &fileName, const QString &source)
752 {
753 return run(fileName, source.toUtf8());
754 }
755
run(const QString & fileName,const QByteArray & source,bool noLines,bool markGeneratedTokens)756 QByteArray Preprocessor::run(const QString &fileName,
757 const QByteArray &source,
758 bool noLines,
759 bool markGeneratedTokens)
760 {
761 m_scratchBuffer.clear();
762
763 QByteArray preprocessed, includeGuardMacroName;
764 preprocessed.reserve(source.size() * 2); // multiply by 2 because we insert #gen lines.
765 preprocess(fileName, source, &preprocessed, &includeGuardMacroName, noLines,
766 markGeneratedTokens, false);
767 if (m_client && !includeGuardMacroName.isEmpty())
768 m_client->markAsIncludeGuard(includeGuardMacroName);
769 return preprocessed;
770 }
771
setCancelChecker(const Preprocessor::CancelChecker & cancelChecker)772 void Preprocessor::setCancelChecker(const Preprocessor::CancelChecker &cancelChecker)
773 {
774 m_cancelChecker = cancelChecker;
775 }
776
expandFunctionlikeMacros() const777 bool Preprocessor::expandFunctionlikeMacros() const
778 {
779 return m_expandFunctionlikeMacros;
780 }
781
setExpandFunctionlikeMacros(bool expandMacros)782 void Preprocessor::setExpandFunctionlikeMacros(bool expandMacros)
783 {
784 m_expandFunctionlikeMacros = expandMacros;
785 }
786
keepComments() const787 bool Preprocessor::keepComments() const
788 {
789 return m_keepComments;
790 }
791
setKeepComments(bool keepComments)792 void Preprocessor::setKeepComments(bool keepComments)
793 {
794 m_keepComments = keepComments;
795 }
796
generateOutputLineMarker(unsigned lineno)797 void Preprocessor::generateOutputLineMarker(unsigned lineno)
798 {
799 maybeStartOutputLine();
800 QByteArray &marker = currentOutputBuffer();
801 marker.append("# ");
802 marker.append(QByteArray::number(lineno));
803 marker.append(" \"");
804 marker.append(m_env->currentFileUtf8);
805 marker.append("\"\n");
806 }
807
handleDefined(PPToken * tk)808 void Preprocessor::handleDefined(PPToken *tk)
809 {
810 ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
811 unsigned lineno = tk->lineno;
812 lex(tk); // consume "defined" token
813 bool lparenSeen = tk->is(T_LPAREN);
814 if (lparenSeen)
815 lex(tk); // consume "(" token
816 if (tk->isNot(T_IDENTIFIER))
817 //### TODO: generate error message
818 return;
819 PPToken idToken = *tk;
820 do {
821 lex(tk);
822 if (tk->isNot(T_POUND_POUND))
823 break;
824 lex(tk);
825 if (tk->is(T_IDENTIFIER))
826 idToken = generateConcatenated(idToken, *tk);
827 else
828 break;
829 } while (isContinuationToken(*tk));
830
831
832 if (lparenSeen && tk->is(T_RPAREN))
833 lex(tk);
834
835 pushToken(tk);
836
837 QByteArray result(1, '0');
838 const ByteArrayRef macroName = idToken.asByteArrayRef();
839 if (macroDefinition(macroName,
840 idToken.byteOffset + m_state.m_bytesOffsetRef,
841 idToken.utf16charOffset + m_state.m_utf16charsOffsetRef,
842 idToken.lineno, m_env, m_client)) {
843 result[0] = '1';
844 }
845 *tk = generateToken(T_NUMERIC_LITERAL, result.constData(), result.size(), lineno, false);
846 }
847
pushToken(Preprocessor::PPToken * tk)848 void Preprocessor::pushToken(Preprocessor::PPToken *tk)
849 {
850 const PPToken currentTokenBuffer[] = {*tk};
851 m_state.pushTokenBuffer(currentTokenBuffer, currentTokenBuffer + 1, nullptr);
852 }
853
lex(PPToken * tk)854 void Preprocessor::lex(PPToken *tk)
855 {
856 again:
857 if (m_state.m_tokenBuffer) {
858 // There is a token buffer, so read from there.
859 if (m_state.m_tokenBuffer->tokens.empty()) {
860 // The token buffer is empty, so pop it, and start over.
861 m_state.popTokenBuffer();
862 goto again;
863 }
864 *tk = m_state.m_tokenBuffer->tokens.front();
865 m_state.m_tokenBuffer->tokens.pop_front();
866 // The token buffer might now be empty. We leave it in, because the
867 // token we just read might expand into new tokens, or might be a call
868 // to the macro that generated this token. In either case, the macro
869 // that generated the token still needs to be blocked (!), which is
870 // recorded in the token buffer. Removing the blocked macro and the
871 // empty token buffer happens the next time that this function is called.
872 } else {
873 // No token buffer, so have the lexer scan the next token.
874 tk->setSource(m_state.m_source);
875 m_state.m_lexer->scan(tk);
876 }
877
878 // Adjust token's line number in order to take into account the environment reference.
879 tk->lineno += m_state.m_lineRef - 1;
880
881 reclassify:
882 if (! m_state.m_inPreprocessorDirective) {
883 if (tk->newline() && tk->is(T_POUND)) {
884 handlePreprocessorDirective(tk);
885 goto reclassify;
886 } else if (tk->newline() && skipping()) {
887 ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
888 do {
889 lex(tk);
890 } while (isContinuationToken(*tk));
891 goto reclassify;
892 } else if (tk->is(T_IDENTIFIER) && !isQtReservedWord(tk->tokenStart(), tk->bytes())) {
893 m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
894 if (m_state.m_inCondition && tk->asByteArrayRef() == "defined") {
895 handleDefined(tk);
896 } else {
897 synchronizeOutputLines(*tk);
898 if (handleIdentifier(tk))
899 goto again;
900 }
901 } else if (tk->isNot(T_COMMENT) && tk->isNot(T_EOF_SYMBOL)) {
902 m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
903 }
904 }
905 }
906
skipPreprocesorDirective(PPToken * tk)907 void Preprocessor::skipPreprocesorDirective(PPToken *tk)
908 {
909 ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
910
911 while (isContinuationToken(*tk)) {
912 scanComment(tk);
913 lex(tk);
914 }
915 }
916
handleIdentifier(PPToken * tk)917 bool Preprocessor::handleIdentifier(PPToken *tk)
918 {
919 ScopedBoolSwap s(m_state.m_inPreprocessorDirective, !tk->f.expanded);
920
921 static const QByteArray ppLine("__LINE__");
922 static const QByteArray ppFile("__FILE__");
923 static const QByteArray ppDate("__DATE__");
924 static const QByteArray ppTime("__TIME__");
925
926 ByteArrayRef macroNameRef = tk->asByteArrayRef();
927
928 if (macroNameRef.size() == 8
929 && macroNameRef[0] == '_'
930 && macroNameRef[1] == '_') {
931 PPToken newTk;
932 if (macroNameRef == ppLine) {
933 QByteArray txt = QByteArray::number(tk->lineno);
934 newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
935 } else if (macroNameRef == ppFile) {
936 QByteArray txt;
937 txt.append('"');
938 txt.append(m_env->currentFileUtf8);
939 txt.append('"');
940 newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
941 } else if (macroNameRef == ppDate) {
942 QByteArray txt;
943 txt.append('"');
944 txt.append(QDate::currentDate().toString().toUtf8());
945 txt.append('"');
946 newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
947 } else if (macroNameRef == ppTime) {
948 QByteArray txt;
949 txt.append('"');
950 txt.append(QTime::currentTime().toString().toUtf8());
951 txt.append('"');
952 newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
953 }
954
955 if (newTk.hasSource()) {
956 newTk.f.newline = tk->newline();
957 newTk.f.whitespace = tk->whitespace();
958 *tk = newTk;
959 return false;
960 }
961 }
962
963 Macro *macro = m_env->resolve(macroNameRef);
964 if (!macro
965 || (tk->expanded() && m_state.m_tokenBuffer && m_state.m_tokenBuffer->isBlocked(macro))) {
966 return false;
967 }
968 // qDebug() << "expanding" << macro->name() << "on line" << tk->lineno;
969
970 // Keep track the of the macro identifier token.
971 PPToken idTk = *tk;
972
973 // Expanded tokens which are not generated ones preserve the original line number from
974 // their corresponding argument in macro substitution. For expanded tokens which are
975 // generated, this information must be taken from somewhere else. What we do is to keep
976 // a "reference" line initialize set to the line where expansion happens.
977 unsigned baseLine = idTk.lineno - m_state.m_lineRef + 1;
978
979 QVector<PPToken> body = macro->definitionTokens();
980
981 // Within nested expansion we might reach a previously added marker token. In this case,
982 // we need to move it from its current possition to outside the nesting.
983 PPToken oldMarkerTk;
984
985 if (macro->isFunctionLike()) {
986 if (!expandFunctionlikeMacros()
987 // Still expand if this originally started with an object-like macro.
988 && m_state.m_expansionStatus != Expanding) {
989 if (m_client) {
990 m_client->notifyMacroReference(m_state.m_bytesOffsetRef + idTk.byteOffset,
991 m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
992 idTk.lineno,
993 *macro);
994 }
995 return false;
996 }
997
998 // Collect individual tokens that form the macro arguments.
999 QVector<QVector<PPToken> > allArgTks;
1000 bool hasArgs = collectActualArguments(tk, &allArgTks, macro->name());
1001
1002 // Check whether collecting arguments failed due to a previously added marker
1003 // that goot nested in a sequence of expansions. If so, store it and try again.
1004 if (!hasArgs
1005 && !tk->hasSource()
1006 && m_state.m_markExpandedTokens
1007 && (m_state.m_expansionStatus == Expanding
1008 || m_state.m_expansionStatus == ReadyForExpansion)) {
1009 oldMarkerTk = *tk;
1010 hasArgs = collectActualArguments(tk, &allArgTks, macro->name());
1011 }
1012
1013 // Check for matching parameter/argument count.
1014 bool hasMatchingArgs = false;
1015 if (hasArgs) {
1016 const int expectedArgCount = macro->formals().size();
1017 if (macro->isVariadic() && allArgTks.size() == expectedArgCount - 1)
1018 allArgTks.push_back(QVector<PPToken>());
1019 const int actualArgCount = allArgTks.size();
1020 if (expectedArgCount == actualArgCount
1021 || (macro->isVariadic() && actualArgCount > expectedArgCount - 1)
1022 // Handle '#define foo()' when invoked as 'foo()'
1023 || (expectedArgCount == 0
1024 && actualArgCount == 1
1025 && allArgTks.at(0).isEmpty())) {
1026 hasMatchingArgs = true;
1027 }
1028 }
1029
1030 if (!hasArgs || !hasMatchingArgs) {
1031 //### TODO: error message
1032 pushToken(tk);
1033 // If a previous marker was found, make sure to put it back.
1034 if (oldMarkerTk.bytes())
1035 pushToken(&oldMarkerTk);
1036 *tk = idTk;
1037 return false;
1038 }
1039
1040 if (m_client && !idTk.generated()) {
1041 // Bundle each token sequence into a macro argument "reference" for notification.
1042 // Even empty ones, which are not necessarily important on its own, but for the matter
1043 // of couting their number - such as in foo(,)
1044 QVector<MacroArgumentReference> argRefs;
1045 for (int i = 0; i < allArgTks.size(); ++i) {
1046 const QVector<PPToken> &argTks = allArgTks.at(i);
1047 if (argTks.isEmpty()) {
1048 argRefs.push_back(MacroArgumentReference());
1049 } else {
1050
1051 argRefs.push_back(MacroArgumentReference(
1052 m_state.m_bytesOffsetRef + argTks.first().bytesBegin(),
1053 argTks.last().bytesBegin() + argTks.last().bytes()
1054 - argTks.first().bytesBegin(),
1055 m_state.m_utf16charsOffsetRef + argTks.first().utf16charsBegin(),
1056 argTks.last().utf16charsBegin() + argTks.last().utf16chars()
1057 - argTks.first().utf16charsBegin()));
1058 }
1059 }
1060
1061 m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset,
1062 m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
1063 idTk.lineno,
1064 *macro,
1065 argRefs);
1066 }
1067
1068 if (allArgTks.size() > MAX_FUNCTION_LIKE_ARGUMENTS_COUNT)
1069 return false;
1070
1071 if (!handleFunctionLikeMacro(macro, body, allArgTks, baseLine)) {
1072 if (m_client && !idTk.expanded())
1073 m_client->stopExpandingMacro(idTk.byteOffset, *macro);
1074 return false;
1075 }
1076 } else if (m_client && !idTk.generated()) {
1077 m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset,
1078 m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
1079 idTk.lineno, *macro);
1080 }
1081
1082 if (body.isEmpty()) {
1083 if (m_state.m_markExpandedTokens
1084 && (m_state.m_expansionStatus == NotExpanding
1085 || m_state.m_expansionStatus == JustFinishedExpansion)) {
1086 // This is not the most beautiful approach but it's quite reasonable. What we do here
1087 // is to create a fake identifier token which is only composed by whitespaces. It's
1088 // also not marked as expanded so it it can be treated as a regular token.
1089 const QByteArray content(int(idTk.bytes() + computeDistance(idTk)), ' ');
1090 PPToken fakeIdentifier = generateToken(T_IDENTIFIER,
1091 content.constData(), content.length(),
1092 idTk.lineno, false, false);
1093 fakeIdentifier.f.whitespace = true;
1094 fakeIdentifier.f.expanded = false;
1095 fakeIdentifier.f.generated = false;
1096 body.push_back(fakeIdentifier);
1097 }
1098 } else {
1099 // The first body token replaces the macro invocation so its whitespace and
1100 // newline info is replicated.
1101 PPToken &bodyTk = body[0];
1102 bodyTk.f.whitespace = idTk.whitespace();
1103 bodyTk.f.newline = idTk.newline();
1104
1105 // Expansions are tracked from a "top-level" basis. This means that each expansion
1106 // section in the output corresponds to a direct use of a macro (either object-like
1107 // or function-like) in the source code and all its recurring expansions - they are
1108 // surrounded by two marker tokens, one at the begin and the other at the end.
1109 // For instance, the following code will generate 3 expansions in total, but the
1110 // output will aggregate the tokens in only 2 expansion sections.
1111 // - The first corresponds to BAR expanding to FOO and then FOO expanding to T o;
1112 // - The second corresponds to FOO expanding to T o;
1113 //
1114 // #define FOO(T, o) T o;
1115 // #define BAR(T, o) FOO(T, o)
1116 // BAR(Test, x) FOO(Test, y)
1117 if (m_state.m_markExpandedTokens) {
1118 if (m_state.m_expansionStatus == NotExpanding
1119 || m_state.m_expansionStatus == JustFinishedExpansion) {
1120 PPToken marker;
1121 marker.f.expanded = true;
1122 marker.f.bytes = idTk.bytes();
1123 marker.byteOffset = idTk.byteOffset;
1124 marker.lineno = idTk.lineno;
1125 body.prepend(marker);
1126 body.append(marker);
1127 m_state.setExpansionStatus(ReadyForExpansion);
1128 } else if (oldMarkerTk.bytes()
1129 && (m_state.m_expansionStatus == ReadyForExpansion
1130 || m_state.m_expansionStatus == Expanding)) {
1131 body.append(oldMarkerTk);
1132 }
1133 }
1134 }
1135
1136 m_state.pushTokenBuffer(body.constBegin(), body.constEnd(), macro);
1137
1138 if (m_client && !idTk.generated())
1139 m_client->stopExpandingMacro(idTk.byteOffset, *macro);
1140
1141 return true;
1142 }
1143
handleFunctionLikeMacro(const Macro * macro,QVector<PPToken> & body,const QVector<QVector<PPToken>> & actuals,unsigned baseLine)1144 bool Preprocessor::handleFunctionLikeMacro(const Macro *macro,
1145 QVector<PPToken> &body,
1146 const QVector<QVector<PPToken> > &actuals,
1147 unsigned baseLine)
1148 {
1149 QVector<PPToken> expanded;
1150 expanded.reserve(MAX_TOKEN_EXPANSION_COUNT);
1151
1152 const size_t bodySize = body.size();
1153 for (size_t i = 0; i < bodySize && expanded.size() < MAX_TOKEN_EXPANSION_COUNT;
1154 ++i) {
1155 int expandedSize = expanded.size();
1156 PPToken bodyTk = body.at(int(i));
1157
1158 if (bodyTk.is(T_IDENTIFIER)) {
1159 const ByteArrayRef id = bodyTk.asByteArrayRef();
1160 const QVector<QByteArray> &formals = macro->formals();
1161 int j = 0;
1162 for (; j < formals.size() && expanded.size() < MAX_TOKEN_EXPANSION_COUNT; ++j) {
1163 if (formals[j] == id) {
1164 QVector<PPToken> actualsForThisParam = actuals.at(j);
1165 unsigned lineno = baseLine;
1166
1167 // Collect variadic arguments
1168 if (id == "__VA_ARGS__" || (macro->isVariadic() && j + 1 == formals.size())) {
1169 for (int k = j + 1; k < actuals.size(); ++k) {
1170 actualsForThisParam.append(generateToken(T_COMMA, ",", 1, lineno, true));
1171 actualsForThisParam += actuals.at(k);
1172 }
1173 }
1174
1175 const int actualsSize = actualsForThisParam.size();
1176
1177 if (i > 0 && body[int(i) - 1].is(T_POUND)) {
1178 QByteArray enclosedString;
1179 enclosedString.reserve(256);
1180
1181 for (int i = 0; i < actualsSize; ++i) {
1182 const PPToken &t = actualsForThisParam.at(i);
1183 if (i == 0)
1184 lineno = t.lineno;
1185 else if (t.whitespace())
1186 enclosedString.append(' ');
1187 enclosedString.append(t.tokenStart(), t.bytes());
1188 }
1189 enclosedString.replace("\\", "\\\\");
1190 enclosedString.replace("\"", "\\\"");
1191
1192 expanded.push_back(generateToken(T_STRING_LITERAL,
1193 enclosedString.constData(),
1194 enclosedString.size(),
1195 lineno, true));
1196 } else {
1197 for (int k = 0; k < actualsSize; ++k) {
1198 // Mark the actual tokens (which are the replaced version of the
1199 // body's one) as expanded. For the first token we replicate the
1200 // body's whitespace info.
1201 PPToken actual = actualsForThisParam.at(k);
1202 actual.f.expanded = true;
1203 if (k == 0)
1204 actual.f.whitespace = bodyTk.whitespace();
1205 expanded += actual;
1206 if (k == actualsSize - 1)
1207 lineno = actual.lineno;
1208 }
1209 }
1210
1211 // Get a better (more up-to-date) value for the base line.
1212 baseLine = lineno;
1213
1214 break;
1215 }
1216 }
1217
1218 if (j == formals.size()) {
1219 // No formal macro parameter for this identifier in the body.
1220 bodyTk.f.generated = true;
1221 bodyTk.lineno = baseLine;
1222 expanded.push_back(std::move(bodyTk));
1223 }
1224 } else if (bodyTk.isNot(T_POUND) && bodyTk.isNot(T_POUND_POUND)) {
1225 bodyTk.f.generated = true;
1226 bodyTk.lineno = baseLine;
1227 expanded.push_back(std::move(bodyTk));
1228 }
1229
1230 if (i > 1 && body[int(i) - 1].is(T_POUND_POUND)) {
1231 if (expandedSize < 1 || expanded.size() == expandedSize) //### TODO: [cpp.concat] placemarkers
1232 continue;
1233 const PPToken &leftTk = expanded[expandedSize - 1];
1234 const PPToken &rightTk = expanded[expandedSize];
1235 expanded[expandedSize - 1] = generateConcatenated(leftTk, rightTk);
1236 expanded.remove(expandedSize);
1237 }
1238 }
1239
1240 // The "new" body.
1241 body = expanded;
1242 body.squeeze();
1243
1244 return true;
1245 }
1246
trackExpansionCycles(PPToken * tk)1247 void Preprocessor::trackExpansionCycles(PPToken *tk)
1248 {
1249 if (m_state.m_markExpandedTokens) {
1250 // Identify a macro expansion section. The format is as follows:
1251 //
1252 // # expansion begin x,y ~g l:c
1253 // ...
1254 // # expansion end
1255 //
1256 // The x and y correspond, respectively, to the offset where the macro invocation happens
1257 // and the macro name's length. Following that there might be an unlimited number of
1258 // token marks which are directly mapped to each token that appears in the expansion.
1259 // Something like ~g indicates that the following g tokens are all generated. While
1260 // something like l:c indicates that the following token is expanded but not generated
1261 // and is positioned on line l and column c. Example:
1262 //
1263 // #define FOO(X) int f(X = 0) // line 1
1264 // FOO(int
1265 // a);
1266 //
1267 // Output would be:
1268 // # expansion begin 8,3 ~3 2:4 3:4 ~3
1269 // int f(int a = 0)
1270 // # expansion end
1271 // # 3 filename
1272 // ;
1273 if (tk->expanded() && !tk->hasSource()) {
1274 if (m_state.m_expansionStatus == ReadyForExpansion) {
1275 m_state.setExpansionStatus(Expanding);
1276 m_state.m_expansionResult.clear();
1277 m_state.m_expandedTokensInfo.clear();
1278 } else if (m_state.m_expansionStatus == Expanding) {
1279 m_state.setExpansionStatus(JustFinishedExpansion);
1280
1281 QByteArray &buffer = currentOutputBuffer();
1282 maybeStartOutputLine();
1283
1284 // Offset and length of the macro invocation
1285 char chunk[40];
1286 qsnprintf(chunk, sizeof(chunk), "# expansion begin %d,%d", tk->byteOffset,
1287 tk->bytes());
1288 buffer.append(chunk);
1289
1290 // Expanded tokens
1291 unsigned generatedCount = 0;
1292 for (int i = 0; i < m_state.m_expandedTokensInfo.size(); ++i) {
1293 const QPair<unsigned, unsigned> &p = m_state.m_expandedTokensInfo.at(i);
1294 if (p.first) {
1295 if (generatedCount) {
1296 qsnprintf(chunk, sizeof(chunk), " ~%d", generatedCount);
1297 buffer.append(chunk);
1298 generatedCount = 0;
1299 }
1300 qsnprintf(chunk, sizeof(chunk), " %d:%d", p.first, p.second);
1301 buffer.append(chunk);
1302 } else {
1303 ++generatedCount;
1304 }
1305 }
1306 if (generatedCount) {
1307 qsnprintf(chunk, sizeof(chunk), " ~%d", generatedCount);
1308 buffer.append(chunk);
1309 }
1310 buffer.append('\n');
1311 buffer.append(m_state.m_expansionResult);
1312 maybeStartOutputLine();
1313 buffer.append("# expansion end\n");
1314 }
1315
1316 lex(tk);
1317
1318 if (tk->expanded() && !tk->hasSource())
1319 trackExpansionCycles(tk);
1320 }
1321 }
1322 }
1323
adjustForCommentOrStringNewlines(int * currentLine,const PPToken & tk)1324 static void adjustForCommentOrStringNewlines(int *currentLine, const PPToken &tk)
1325 {
1326 if (tk.isComment() || tk.isStringLiteral())
1327 (*currentLine) += tk.asByteArrayRef().count('\n');
1328 }
1329
synchronizeOutputLines(const PPToken & tk,bool forceLine)1330 void Preprocessor::synchronizeOutputLines(const PPToken &tk, bool forceLine)
1331 {
1332 if (m_state.m_expansionStatus != NotExpanding
1333 || (!forceLine && m_env->currentLine == tk.lineno)) {
1334 adjustForCommentOrStringNewlines(&m_env->currentLine, tk);
1335 return;
1336 }
1337
1338 if (forceLine || m_env->currentLine > tk.lineno || tk.lineno - m_env->currentLine >= 9) {
1339 if (m_state.m_noLines) {
1340 if (!m_state.m_markExpandedTokens)
1341 currentOutputBuffer().append(' ');
1342 } else {
1343 generateOutputLineMarker(tk.lineno);
1344 }
1345 } else {
1346 for (int i = m_env->currentLine; i < tk.lineno; ++i)
1347 currentOutputBuffer().append('\n');
1348 }
1349
1350 m_env->currentLine = tk.lineno;
1351 adjustForCommentOrStringNewlines(&m_env->currentLine, tk);
1352 }
1353
computeDistance(const Preprocessor::PPToken & tk,bool forceTillLine)1354 std::size_t Preprocessor::computeDistance(const Preprocessor::PPToken &tk, bool forceTillLine)
1355 {
1356 // Find previous non-space character or line begin.
1357 const char *buffer = tk.bufferStart();
1358 const char *tokenBegin = tk.tokenStart();
1359 const char *it = tokenBegin - 1;
1360 for (; it >= buffer; --it) {
1361 if (*it == '\n'|| (!pp_isspace(*it) && !forceTillLine))
1362 break;
1363 }
1364 ++it;
1365
1366 return tokenBegin - it;
1367 }
1368
1369
enforceSpacing(const Preprocessor::PPToken & tk,bool forceSpacing)1370 void Preprocessor::enforceSpacing(const Preprocessor::PPToken &tk, bool forceSpacing)
1371 {
1372 if (tk.whitespace() || forceSpacing) {
1373 QByteArray &buffer = currentOutputBuffer();
1374 // For expanded tokens we simply add a whitespace, if necessary - the exact amount of
1375 // whitespaces is irrelevant within an expansion section. For real tokens we must be
1376 // more specific and get the information from the original source.
1377 if (tk.expanded() && !atStartOfOutputLine()) {
1378 buffer.append(' ');
1379 } else {
1380 const std::size_t spacing = computeDistance(tk, forceSpacing);
1381 const char *tokenBegin = tk.tokenStart();
1382 const char *it = tokenBegin - spacing;
1383
1384 // Reproduce the content as in the original line.
1385 for (; it != tokenBegin; ++it)
1386 buffer.append(pp_isspace(*it) ? *it : ' ');
1387 }
1388 }
1389 }
1390
1391 /// invalid pp-tokens are used as markers to force whitespace checks.
preprocess(const QString & fileName,const QByteArray & source,QByteArray * result,QByteArray * includeGuardMacroName,bool noLines,bool markGeneratedTokens,bool inCondition,unsigned bytesOffsetRef,unsigned utf16charOffsetRef,unsigned lineRef)1392 void Preprocessor::preprocess(const QString &fileName, const QByteArray &source,
1393 QByteArray *result, QByteArray *includeGuardMacroName,
1394 bool noLines,
1395 bool markGeneratedTokens, bool inCondition,
1396 unsigned bytesOffsetRef, unsigned utf16charOffsetRef,
1397 unsigned lineRef)
1398 {
1399 if (source.isEmpty())
1400 return;
1401
1402 ScopedSwap<State> savedState(m_state, State());
1403 m_state.m_currentFileName = fileName;
1404 m_state.m_source = source;
1405 m_state.m_lexer = new Lexer(source.constBegin(), source.constEnd());
1406 m_state.m_lexer->setScanKeywords(false);
1407 m_state.m_lexer->setScanAngleStringLiteralTokens(false);
1408 m_state.m_lexer->setPreprocessorMode(true);
1409 if (m_keepComments)
1410 m_state.m_lexer->setScanCommentTokens(true);
1411 m_state.m_result = result;
1412 m_state.setExpansionStatus(m_state.m_expansionStatus); // Re-set m_currentExpansion
1413 m_state.m_noLines = noLines;
1414 m_state.m_markExpandedTokens = markGeneratedTokens;
1415 m_state.m_inCondition = inCondition;
1416 m_state.m_bytesOffsetRef = bytesOffsetRef;
1417 m_state.m_utf16charsOffsetRef = utf16charOffsetRef;
1418 m_state.m_lineRef = lineRef;
1419
1420 ScopedSwap<QString> savedFileName(m_env->currentFile, fileName);
1421 ScopedSwap<QByteArray> savedUtf8FileName(m_env->currentFileUtf8, fileName.toUtf8());
1422 ScopedSwap<int> savedCurrentLine(m_env->currentLine, 1);
1423
1424 if (!m_state.m_noLines)
1425 generateOutputLineMarker(1);
1426
1427 PPToken tk(m_state.m_source);
1428 do {
1429 lex(&tk);
1430
1431 // Track the start and end of macro expansion cycles.
1432 trackExpansionCycles(&tk);
1433
1434 bool macroExpanded = false;
1435 if (m_state.m_expansionStatus == Expanding) {
1436 // Collect the line and column from the tokens undergoing expansion. Those will
1437 // be available in the expansion section for further referencing about their real
1438 // location.
1439 unsigned trackedLine = 0;
1440 unsigned trackedColumn = 0;
1441 if (tk.expanded() && !tk.generated()) {
1442 trackedLine = tk.lineno;
1443 trackedColumn = unsigned(computeDistance(tk, true));
1444 }
1445 m_state.m_expandedTokensInfo.append(qMakePair(trackedLine, trackedColumn));
1446 } else if (m_state.m_expansionStatus == JustFinishedExpansion) {
1447 m_state.setExpansionStatus(NotExpanding);
1448 macroExpanded = true;
1449 }
1450
1451 // Update environment line information.
1452 synchronizeOutputLines(tk, macroExpanded);
1453
1454 // Make sure spacing between tokens is handled properly.
1455 enforceSpacing(tk, macroExpanded);
1456
1457 // Finally output the token.
1458 if (!tk.f.trigraph) {
1459 currentOutputBuffer().append(tk.tokenStart(), tk.bytes());
1460 } else {
1461 switch (tk.kind()) {
1462 case T_LBRACKET: currentOutputBuffer().append("["); break;
1463 case T_RBRACKET: currentOutputBuffer().append("]"); break;
1464 case T_LBRACE: currentOutputBuffer().append("{"); break;
1465 case T_RBRACE: currentOutputBuffer().append("}"); break;
1466 case T_POUND: currentOutputBuffer().append("#"); break;
1467 case T_POUND_POUND: currentOutputBuffer().append("##"); break;
1468 case T_CARET: currentOutputBuffer().append("^"); break;
1469 case T_CARET_EQUAL: currentOutputBuffer().append("^="); break;
1470 case T_PIPE: currentOutputBuffer().append("|"); break;
1471 case T_PIPE_EQUAL: currentOutputBuffer().append("|="); break;
1472 case T_TILDE: currentOutputBuffer().append("~"); break;
1473 case T_TILDE_EQUAL: currentOutputBuffer().append("~="); break;
1474 default: CPP_ASSERT(0, qDebug() << tk.spell()); break;
1475 }
1476 }
1477
1478 } while (tk.isNot(T_EOF_SYMBOL));
1479
1480 if (includeGuardMacroName) {
1481 if (m_state.m_includeGuardState == State::IncludeGuardState_AfterDefine
1482 || m_state.m_includeGuardState == State::IncludeGuardState_AfterEndif)
1483 *includeGuardMacroName = m_state.m_includeGuardMacroName;
1484 }
1485 delete m_state.m_lexer;
1486 while (m_state.m_tokenBuffer)
1487 m_state.popTokenBuffer();
1488 }
1489
scanComment(Preprocessor::PPToken * tk)1490 bool Preprocessor::scanComment(Preprocessor::PPToken *tk)
1491 {
1492 if (!tk->isComment())
1493 return false;
1494 synchronizeOutputLines(*tk);
1495 enforceSpacing(*tk, true);
1496 currentOutputBuffer().append(tk->tokenStart(), tk->bytes());
1497 return true;
1498 }
1499
consumeComments(PPToken * tk)1500 bool Preprocessor::consumeComments(PPToken *tk)
1501 {
1502 while (scanComment(tk))
1503 lex(tk);
1504 return tk->isNot(T_EOF_SYMBOL);
1505 }
1506
collectActualArguments(PPToken * tk,QVector<QVector<PPToken>> * actuals,const QByteArray & parentMacroName)1507 bool Preprocessor::collectActualArguments(PPToken *tk, QVector<QVector<PPToken> > *actuals,
1508 const QByteArray &parentMacroName)
1509 {
1510 Q_ASSERT(tk);
1511 Q_ASSERT(actuals);
1512
1513 ExecuteOnDestruction removeBlockedName;
1514 if (m_state.m_tokenBuffer) {
1515 removeBlockedName.reset([this] {
1516 if (m_state.m_tokenBuffer && !m_state.m_tokenBuffer->blockedMacroNames.empty())
1517 m_state.m_tokenBuffer->blockedMacroNames.pop_back();
1518 });
1519 m_state.m_tokenBuffer->blockedMacroNames.push_back(parentMacroName);
1520 }
1521
1522 lex(tk); // consume the identifier
1523
1524 bool lastCommentIsCpp = false;
1525 while (scanComment(tk)) {
1526 /* After C++ comments we need to add a new line
1527 e.g.
1528 #define foo(a, b) int a = b
1529 foo // comment
1530 (x, 3);
1531 can result in
1532 // commentint
1533 x = 3;
1534 */
1535 lastCommentIsCpp = tk->is(T_CPP_COMMENT) || tk->is(T_CPP_DOXY_COMMENT);
1536 lex(tk);
1537 }
1538 if (lastCommentIsCpp)
1539 maybeStartOutputLine();
1540
1541 if (tk->isNot(T_LPAREN))
1542 //### TODO: error message
1543 return false;
1544
1545 QVector<PPToken> tokens;
1546 lex(tk);
1547 scanActualArgument(tk, &tokens);
1548
1549 actuals->append(tokens);
1550
1551 while (tk->is(T_COMMA)) {
1552 lex(tk);
1553
1554 QVector<PPToken> tokens;
1555 scanActualArgument(tk, &tokens);
1556 actuals->append(tokens);
1557 }
1558
1559 if (!tk->is(T_RPAREN)) {
1560 return false;
1561 //###TODO: error message
1562 }
1563 return true;
1564 }
1565
scanActualArgument(PPToken * tk,QVector<PPToken> * tokens)1566 void Preprocessor::scanActualArgument(PPToken *tk, QVector<PPToken> *tokens)
1567 {
1568 Q_ASSERT(tokens);
1569
1570 int count = 0;
1571
1572 while (tk->isNot(T_EOF_SYMBOL)) {
1573 if (tk->is(T_LPAREN)) {
1574 ++count;
1575 } else if (tk->is(T_RPAREN)) {
1576 if (! count)
1577 break;
1578 --count;
1579 } else if (! count && tk->is(T_COMMA)) {
1580 break;
1581 }
1582
1583 if (m_keepComments
1584 && (tk->is(T_CPP_COMMENT) || tk->is(T_CPP_DOXY_COMMENT))) {
1585 // Even in keep comments mode, we cannot preserve C++ style comments inside the
1586 // expansion. We stick with GCC's approach which is to replace them by C style
1587 // comments (currently clang just gets rid of them) and transform internals */
1588 // into *|.
1589 QByteArray text = m_state.m_source.mid(tk->bytesBegin() + 2,
1590 tk->bytesEnd() - tk->bytesBegin() - 2);
1591 const QByteArray &comment = "/*" + text.replace("*/", "*|") + "*/";
1592 tokens->append(generateToken(T_COMMENT,
1593 comment.constData(), comment.size(),
1594 tk->lineno, false));
1595 } else {
1596 tokens->append(*tk);
1597 }
1598
1599 lex(tk);
1600 }
1601 }
1602
handlePreprocessorDirective(PPToken * tk)1603 void Preprocessor::handlePreprocessorDirective(PPToken *tk)
1604 {
1605 ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
1606
1607 PPToken poundToken = *tk;
1608 lex(tk); // scan the directive
1609
1610 if (tk->newline() && ! tk->joined())
1611 return; // nothing to do.
1612
1613 static const QByteArray ppDefine("define");
1614 static const QByteArray ppIf("if");
1615 static const QByteArray ppIfDef("ifdef");
1616 static const QByteArray ppIfNDef("ifndef");
1617 static const QByteArray ppEndIf("endif");
1618 static const QByteArray ppElse("else");
1619 static const QByteArray ppUndef("undef");
1620 static const QByteArray ppElif("elif");
1621 static const QByteArray ppInclude("include");
1622 static const QByteArray ppIncludeNext("include_next");
1623 static const QByteArray ppImport("import");
1624 //### TODO:
1625 // line
1626 // error
1627 // pragma
1628
1629 if (tk->is(T_IDENTIFIER)) {
1630 const ByteArrayRef directive = tk->asByteArrayRef();
1631
1632 if (!skipping() && directive == ppDefine) {
1633 handleDefineDirective(tk);
1634 } else if (directive == ppIfNDef) {
1635 handleIfDefDirective(true, tk);
1636 } else if (directive == ppEndIf) {
1637 handleEndIfDirective(tk, poundToken);
1638 } else {
1639 m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
1640
1641 if (!skipping() && directive == ppUndef)
1642 handleUndefDirective(tk);
1643 else if (!skipping() && (directive == ppInclude
1644 || directive == ppImport))
1645 handleIncludeDirective(tk, false);
1646 else if (!skipping() && directive == ppIncludeNext)
1647 handleIncludeDirective(tk, true);
1648 else if (directive == ppIf)
1649 handleIfDirective(tk);
1650 else if (directive == ppIfDef)
1651 handleIfDefDirective(false, tk);
1652 else if (directive == ppElse)
1653 handleElseDirective(tk, poundToken);
1654 else if (directive == ppElif)
1655 handleElifDirective(tk, poundToken);
1656 }
1657 }
1658
1659 skipPreprocesorDirective(tk);
1660 }
1661
1662
handleIncludeDirective(PPToken * tk,bool includeNext)1663 void Preprocessor::handleIncludeDirective(PPToken *tk, bool includeNext)
1664 {
1665 if (m_cancelChecker && m_cancelChecker())
1666 return;
1667
1668 m_state.m_lexer->setScanAngleStringLiteralTokens(true);
1669 lex(tk); // consume "include" token
1670 m_state.m_lexer->setScanAngleStringLiteralTokens(false);
1671 const unsigned line = tk->lineno;
1672 QByteArray included;
1673
1674 if (tk->is(T_STRING_LITERAL) || tk->is(T_ANGLE_STRING_LITERAL)) {
1675 included = tk->asByteArrayRef().toByteArray();
1676 lex(tk); // consume string token
1677 } else {
1678 included = expand(tk);
1679 }
1680 included = included.trimmed();
1681
1682 if (included.isEmpty()) {
1683 //### TODO: error message
1684 return;
1685 }
1686
1687 // qDebug("include [[%s]]", included.constData());
1688 Client::IncludeType mode;
1689 if (includeNext)
1690 mode = Client::IncludeNext;
1691 else if (included.at(0) == '"')
1692 mode = Client::IncludeLocal;
1693 else if (included.at(0) == '<')
1694 mode = Client::IncludeGlobal;
1695 else
1696 return; //### TODO: add error message?
1697
1698 if (m_client) {
1699 QString inc = QString::fromUtf8(included.constData() + 1, included.size() - 2);
1700 m_client->sourceNeeded(line, inc, mode);
1701 }
1702 }
1703
handleDefineDirective(PPToken * tk)1704 void Preprocessor::handleDefineDirective(PPToken *tk)
1705 {
1706 const unsigned defineOffset = tk->byteOffset;
1707 lex(tk); // consume "define" token
1708
1709 if (!consumeComments(tk))
1710 return;
1711
1712 if (tk->isNot(T_IDENTIFIER))
1713 return;
1714
1715 Macro macro;
1716 macro.setFileName(m_env->currentFile);
1717 macro.setLine(tk->lineno);
1718 QByteArray macroName = tk->asByteArrayRef().toByteArray();
1719 macro.setName(macroName);
1720 macro.setBytesOffset(tk->byteOffset);
1721 macro.setUtf16charOffset(tk->utf16charOffset);
1722
1723 PPToken idToken(*tk);
1724
1725 lex(tk);
1726
1727 if (isContinuationToken(*tk) && tk->is(T_LPAREN) && ! tk->whitespace()) {
1728 macro.setFunctionLike(true);
1729
1730 lex(tk); // skip `('
1731 if (!consumeComments(tk))
1732 return;
1733
1734 bool hasIdentifier = false;
1735 if (isContinuationToken(*tk) && tk->is(T_IDENTIFIER)) {
1736 hasIdentifier = true;
1737 macro.addFormal(tk->asByteArrayRef().toByteArray());
1738
1739 lex(tk);
1740 if (!consumeComments(tk))
1741 return;
1742
1743 while (isContinuationToken(*tk) && tk->is(T_COMMA)) {
1744 lex(tk);
1745 if (!consumeComments(tk))
1746 return;
1747
1748 if (isContinuationToken(*tk) && tk->is(T_IDENTIFIER)) {
1749 macro.addFormal(tk->asByteArrayRef().toByteArray());
1750 lex(tk);
1751 if (!consumeComments(tk))
1752 return;
1753 } else {
1754 hasIdentifier = false;
1755 }
1756 }
1757 }
1758
1759 if (tk->is(T_DOT_DOT_DOT)) {
1760 macro.setVariadic(true);
1761 if (!hasIdentifier)
1762 macro.addFormal("__VA_ARGS__");
1763 lex(tk); // consume elipsis token
1764 if (!consumeComments(tk))
1765 return;
1766 }
1767 if (isContinuationToken(*tk) && tk->is(T_RPAREN))
1768 lex(tk); // consume ")" token
1769 } else {
1770 if (m_state.m_ifLevel == 1)
1771 m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Define, &idToken);
1772 }
1773
1774 QVector<PPToken> bodyTokens;
1775 unsigned previousBytesOffset = 0;
1776 unsigned previousUtf16charsOffset = 0;
1777 unsigned previousLine = 0;
1778 Macro *macroReference = nullptr;
1779 while (isContinuationToken(*tk)) {
1780 // Macro tokens are always marked as expanded. However, only for object-like macros
1781 // we mark them as generated too. For function-like macros we postpone it until the
1782 // formals are identified in the bodies.
1783 tk->f.expanded = true;
1784 if (!macro.isFunctionLike())
1785 tk->f.generated = true;
1786
1787 // Identifiers must not be eagerly expanded inside defines, but we should still notify
1788 // in the case they are macros.
1789 if (tk->is(T_IDENTIFIER) && m_client) {
1790 macroReference = m_env->resolve(tk->asByteArrayRef());
1791 if (macroReference) {
1792 if (!macroReference->isFunctionLike()) {
1793 m_client->notifyMacroReference(tk->byteOffset, tk->utf16charOffset,
1794 tk->lineno, *macroReference);
1795 macroReference = nullptr;
1796 }
1797 }
1798 } else if (macroReference) {
1799 if (m_client && tk->is(T_LPAREN)) {
1800 m_client->notifyMacroReference(previousBytesOffset, previousUtf16charsOffset,
1801 previousLine, *macroReference);
1802 }
1803 macroReference = nullptr;
1804 }
1805
1806 previousBytesOffset = tk->byteOffset;
1807 previousUtf16charsOffset = tk->utf16charOffset;
1808 previousLine = tk->lineno;
1809
1810 if (!scanComment(tk))
1811 bodyTokens.push_back(*tk);
1812
1813 lex(tk);
1814 }
1815
1816 if (isQtReservedWord(macroName.data(), macroName.size())) {
1817 QByteArray macroId = macro.name();
1818
1819 if (macro.isFunctionLike()) {
1820 macroId += '(';
1821 bool fst = true;
1822 foreach (const QByteArray &formal, macro.formals()) {
1823 if (! fst)
1824 macroId += ", ";
1825 fst = false;
1826 macroId += formal;
1827 }
1828 macroId += ')';
1829 }
1830
1831 bodyTokens.clear();
1832 macro.setDefinition(macroId, bodyTokens);
1833 } else if (!bodyTokens.isEmpty()) {
1834 PPToken &firstBodyToken = bodyTokens[0];
1835 int start = firstBodyToken.byteOffset;
1836 int len = tk->byteOffset - start;
1837 QByteArray bodyText = firstBodyToken.source().mid(start, len).trimmed();
1838
1839 const int bodySize = bodyTokens.size();
1840 for (int i = 0; i < bodySize; ++i) {
1841 PPToken &t = bodyTokens[i];
1842 if (t.hasSource())
1843 t.squeezeSource();
1844 }
1845 macro.setDefinition(bodyText, bodyTokens);
1846 }
1847
1848 macro.setLength(tk->byteOffset - defineOffset);
1849 m_env->bind(macro);
1850
1851 // qDebug() << "adding macro" << macro.name() << "defined at" << macro.fileName() << ":"<<macro.line();
1852
1853 if (m_client)
1854 m_client->macroAdded(macro);
1855 }
1856
expand(PPToken * tk,PPToken * lastConditionToken)1857 QByteArray Preprocessor::expand(PPToken *tk, PPToken *lastConditionToken)
1858 {
1859 unsigned line = tk->lineno;
1860 unsigned bytesBegin = tk->bytesBegin();
1861 const int originalOffset = tk->originalOffset();
1862 unsigned utf16charsBegin = tk->utf16charsBegin();
1863 PPToken lastTk;
1864 while (isContinuationToken(*tk)) {
1865 lastTk = *tk;
1866 lex(tk);
1867 }
1868 // Gather the exact spelling of the content in the source.
1869 QByteArray condition(m_state.m_source.mid(originalOffset, lastTk.originalOffset() + lastTk.bytes()
1870 - originalOffset));
1871
1872 // qDebug("*** Condition before: [%s]", condition.constData());
1873 QByteArray result;
1874 result.reserve(256);
1875 preprocess(m_state.m_currentFileName, condition, &result, nullptr, true, false, true,
1876 bytesBegin, utf16charsBegin, line);
1877 result.squeeze();
1878 // qDebug("*** Condition after: [%s]", result.constData());
1879
1880 if (lastConditionToken)
1881 *lastConditionToken = lastTk;
1882
1883 return result;
1884 }
1885
evalExpression(PPToken * tk,Value & result)1886 const PPToken Preprocessor::evalExpression(PPToken *tk, Value &result)
1887 {
1888 PPToken lastConditionToken;
1889 const QByteArray expanded = expand(tk, &lastConditionToken);
1890 Lexer lexer(expanded.constData(), expanded.constData() + expanded.size());
1891 lexer.setPreprocessorMode(true);
1892 std::vector<Token> buf;
1893 Token t;
1894 do {
1895 lexer.scan(&t);
1896 buf.push_back(t);
1897 } while (t.isNot(T_EOF_SYMBOL));
1898 ExpressionEvaluator eval(m_client, m_env);
1899 result = eval(&buf[0], &buf[buf.size() - 1], expanded);
1900 return lastConditionToken;
1901 }
1902
handleIfDirective(PPToken * tk)1903 void Preprocessor::handleIfDirective(PPToken *tk)
1904 {
1905 lex(tk); // consume "if" token
1906 Value result;
1907 const PPToken lastExpressionToken = evalExpression(tk, result);
1908
1909 if (m_state.m_ifLevel >= MAX_LEVEL - 1) {
1910 nestingTooDeep();
1911 return;
1912 }
1913
1914 const bool value = !result.is_zero();
1915
1916 const bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
1917 ++m_state.m_ifLevel;
1918 m_state.m_trueTest[m_state.m_ifLevel] = value;
1919 if (wasSkipping) {
1920 m_state.m_skipping[m_state.m_ifLevel] = wasSkipping;
1921 } else {
1922 bool startSkipping = !value;
1923 m_state.m_skipping[m_state.m_ifLevel] = startSkipping;
1924 if (startSkipping && m_client)
1925 startSkippingBlocks(lastExpressionToken);
1926 }
1927
1928 }
1929
handleElifDirective(PPToken * tk,const PPToken & poundToken)1930 void Preprocessor::handleElifDirective(PPToken *tk, const PPToken £Token)
1931 {
1932 if (m_state.m_ifLevel == 0) {
1933 // std::cerr << "*** WARNING #elif without #if" << std::endl;
1934 handleIfDirective(tk);
1935 } else {
1936 lex(tk); // consume "elif" token
1937 if (m_state.m_skipping[m_state.m_ifLevel - 1]) {
1938 // we keep on skipping because we are nested in a skipped block
1939 m_state.m_skipping[m_state.m_ifLevel] = true;
1940 } else if (m_state.m_trueTest[m_state.m_ifLevel]) {
1941 if (!m_state.m_skipping[m_state.m_ifLevel]) {
1942 // start skipping because the preceding then-part was not skipped
1943 m_state.m_skipping[m_state.m_ifLevel] = true;
1944 if (m_client)
1945 startSkippingBlocks(poundToken);
1946 }
1947 } else {
1948 // preceding then-part was skipped, so calculate if we should start
1949 // skipping, depending on the condition
1950 Value result;
1951 evalExpression(tk, result);
1952
1953 bool startSkipping = result.is_zero();
1954 m_state.m_trueTest[m_state.m_ifLevel] = !startSkipping;
1955 m_state.m_skipping[m_state.m_ifLevel] = startSkipping;
1956 if (m_client && !startSkipping)
1957 m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1);
1958 }
1959 }
1960 }
1961
handleElseDirective(PPToken * tk,const PPToken & poundToken)1962 void Preprocessor::handleElseDirective(PPToken *tk, const PPToken £Token)
1963 {
1964 lex(tk); // consume "else" token
1965
1966 if (m_state.m_ifLevel != 0) {
1967 if (m_state.m_skipping[m_state.m_ifLevel - 1]) {
1968 // we keep on skipping because we are nested in a skipped block
1969 m_state.m_skipping[m_state.m_ifLevel] = true;
1970 } else {
1971 bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
1972 bool startSkipping = m_state.m_trueTest[m_state.m_ifLevel];
1973 m_state.m_skipping[m_state.m_ifLevel] = startSkipping;
1974
1975 if (m_client && wasSkipping && !startSkipping)
1976 m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1);
1977 else if (m_client && !wasSkipping && startSkipping)
1978 startSkippingBlocks(poundToken);
1979 }
1980 #ifndef NO_DEBUG
1981 } else {
1982 std::cerr << "*** WARNING #else without #if" << std::endl;
1983 #endif // NO_DEBUG
1984 }
1985 }
1986
handleEndIfDirective(PPToken * tk,const PPToken & poundToken)1987 void Preprocessor::handleEndIfDirective(PPToken *tk, const PPToken £Token)
1988 {
1989 if (m_state.m_ifLevel == 0) {
1990 #ifndef NO_DEBUG
1991 std::cerr << "*** WARNING #endif without #if";
1992 if (!tk->generated())
1993 std::cerr << " on line " << tk->lineno << " of file " << m_state.m_currentFileName.toUtf8().constData();
1994 std::cerr << std::endl;
1995 #endif // NO_DEBUG
1996 } else {
1997 bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
1998 m_state.m_skipping[m_state.m_ifLevel] = false;
1999 m_state.m_trueTest[m_state.m_ifLevel] = false;
2000 --m_state.m_ifLevel;
2001 if (m_client && wasSkipping && !m_state.m_skipping[m_state.m_ifLevel])
2002 m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1);
2003
2004 if (m_state.m_ifLevel == 0)
2005 m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Endif);
2006 }
2007
2008 lex(tk); // consume "endif" token
2009 }
2010
handleIfDefDirective(bool checkUndefined,PPToken * tk)2011 void Preprocessor::handleIfDefDirective(bool checkUndefined, PPToken *tk)
2012 {
2013 lex(tk); // consume "ifdef" token
2014 if (tk->is(T_IDENTIFIER)) {
2015 if (checkUndefined && m_state.m_ifLevel == 0)
2016 m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Ifndef, tk);
2017
2018 bool value = false;
2019 const ByteArrayRef macroName = tk->asByteArrayRef();
2020 if (Macro *macro = macroDefinition(macroName, tk->byteOffset, tk->utf16charOffset,
2021 tk->lineno, m_env, m_client)) {
2022 value = true;
2023
2024 // the macro is a feature constraint(e.g. QT_NO_XXX)
2025 if (checkUndefined && macroName.startsWith("QT_NO_")) {
2026 if (macro->fileName() == configurationFileName()) {
2027 // and it' defined in a pro file (e.g. DEFINES += QT_NO_QOBJECT)
2028
2029 value = false; // take the branch
2030 }
2031 }
2032 } else if (Environment::isBuiltinMacro(macroName)) {
2033 value = true;
2034 }
2035
2036 if (checkUndefined)
2037 value = !value;
2038
2039 const bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
2040
2041 if (m_state.m_ifLevel < MAX_LEVEL - 1) {
2042 ++m_state.m_ifLevel;
2043 m_state.m_trueTest[m_state.m_ifLevel] = value;
2044 m_state.m_skipping[m_state.m_ifLevel] = wasSkipping ? wasSkipping : !value;
2045
2046 if (m_client && !wasSkipping && !value)
2047 startSkippingBlocks(*tk);
2048 } else {
2049 nestingTooDeep();
2050 }
2051
2052 lex(tk); // consume the identifier
2053 #ifndef NO_DEBUG
2054 } else {
2055 std::cerr << "*** WARNING #ifdef without identifier" << std::endl;
2056 #endif // NO_DEBUG
2057 }
2058 }
2059
handleUndefDirective(PPToken * tk)2060 void Preprocessor::handleUndefDirective(PPToken *tk)
2061 {
2062 lex(tk); // consume "undef" token
2063 if (tk->is(T_IDENTIFIER)) {
2064 const ByteArrayRef macroName = tk->asByteArrayRef();
2065 const unsigned bytesOffset = tk->byteOffset + m_state.m_bytesOffsetRef;
2066 const unsigned utf16charsOffset = tk->utf16charOffset + m_state.m_utf16charsOffsetRef;
2067 // Track macro use if previously defined
2068 if (m_client) {
2069 if (const Macro *existingMacro = m_env->resolve(macroName)) {
2070 m_client->notifyMacroReference(bytesOffset, utf16charsOffset,
2071 tk->lineno, *existingMacro);
2072 }
2073 }
2074 synchronizeOutputLines(*tk);
2075 Macro *macro = m_env->remove(macroName);
2076
2077 if (m_client && macro) {
2078 macro->setBytesOffset(bytesOffset);
2079 macro->setUtf16charOffset(utf16charsOffset);
2080 m_client->macroAdded(*macro);
2081 }
2082 lex(tk); // consume macro name
2083 #ifndef NO_DEBUG
2084 } else {
2085 std::cerr << "*** WARNING #undef without identifier" << std::endl;
2086 #endif // NO_DEBUG
2087 }
2088 }
2089
generateToken(enum Kind kind,const char * content,int length,unsigned lineno,bool addQuotes,bool addToControl)2090 PPToken Preprocessor::generateToken(enum Kind kind,
2091 const char *content, int length,
2092 unsigned lineno,
2093 bool addQuotes,
2094 bool addToControl)
2095 {
2096 // When the token is a generated token, the column position cannot be
2097 // reconstructed, but we also have to prevent it from searching the whole
2098 // scratch buffer. So inserting a newline before the new token will give
2099 // an indent width of 0 (zero).
2100 m_scratchBuffer.append('\n');
2101
2102 const size_t pos = m_scratchBuffer.size();
2103
2104 if (kind == T_STRING_LITERAL && addQuotes)
2105 m_scratchBuffer.append('"');
2106 m_scratchBuffer.append(content, length);
2107 if (kind == T_STRING_LITERAL && addQuotes) {
2108 m_scratchBuffer.append('"');
2109 length += 2;
2110 }
2111
2112 PPToken tk(m_scratchBuffer);
2113 tk.f.kind = kind;
2114 if (m_state.m_lexer->control() && addToControl) {
2115 if (kind == T_STRING_LITERAL)
2116 tk.string = m_state.m_lexer->control()->stringLiteral(m_scratchBuffer.constData() + pos, length);
2117 else if (kind == T_IDENTIFIER)
2118 tk.identifier = m_state.m_lexer->control()->identifier(m_scratchBuffer.constData() + pos, length);
2119 else if (kind == T_NUMERIC_LITERAL)
2120 tk.number = m_state.m_lexer->control()->numericLiteral(m_scratchBuffer.constData() + pos, length);
2121 }
2122 tk.byteOffset = unsigned(pos);
2123 tk.f.bytes = length;
2124 tk.f.generated = true;
2125 tk.f.expanded = true;
2126 tk.lineno = lineno;
2127
2128 return tk;
2129 }
2130
generateConcatenated(const PPToken & leftTk,const PPToken & rightTk)2131 PPToken Preprocessor::generateConcatenated(const PPToken &leftTk, const PPToken &rightTk)
2132 {
2133 QByteArray newText;
2134 newText.reserve(leftTk.bytes() + rightTk.bytes());
2135 newText.append(leftTk.tokenStart(), leftTk.bytes());
2136 newText.append(rightTk.tokenStart(), rightTk.bytes());
2137 PPToken result = generateToken(T_IDENTIFIER, newText.constData(), newText.size(), leftTk.lineno, true);
2138 result.f.whitespace = leftTk.whitespace();
2139 return result;
2140 }
2141
startSkippingBlocks(const Preprocessor::PPToken & tk) const2142 void Preprocessor::startSkippingBlocks(const Preprocessor::PPToken &tk) const
2143 {
2144 if (!m_client)
2145 return;
2146
2147 unsigned utf16charIter = tk.utf16charsEnd();
2148 const char *source = tk.source().constData() + tk.bytesEnd();
2149 const char *sourceEnd = tk.source().constEnd();
2150 unsigned char yychar = *source;
2151
2152 do {
2153 if (yychar == '\n') {
2154 m_client->startSkippingBlocks(utf16charIter + 1);
2155 return;
2156 }
2157 Lexer::yyinp_utf8(source, yychar, utf16charIter);
2158 } while (source < sourceEnd);
2159 }
2160
atStartOfOutputLine() const2161 bool Preprocessor::atStartOfOutputLine() const
2162 {
2163 const QByteArray *buffer = m_state.m_currentExpansion;
2164 return buffer->isEmpty() || buffer->endsWith('\n');
2165 }
2166
maybeStartOutputLine()2167 void Preprocessor::maybeStartOutputLine()
2168 {
2169 QByteArray &buffer = currentOutputBuffer();
2170 if (buffer.isEmpty())
2171 return;
2172 if (!buffer.endsWith('\n'))
2173 buffer.append('\n');
2174 // If previous line ends with \ (possibly followed by whitespace), add another \n
2175 const char *start = buffer.constData();
2176 const char *ch = start + buffer.length() - 2;
2177 while (ch > start && (*ch != '\n') && pp_isspace(*ch))
2178 --ch;
2179 if (*ch == '\\')
2180 buffer.append('\n');
2181 }
2182