1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of Qt Creator.
7 **
8 ** Commercial License Usage
9 ** Licensees holding valid commercial Qt licenses may use this file in
10 ** accordance with the commercial license agreement provided with the
11 ** Software or, alternatively, in accordance with the terms contained in
12 ** a written agreement between you and The Qt Company. For licensing terms
13 ** and conditions see https://www.qt.io/terms-conditions. For further
14 ** information use the contact form at https://www.qt.io/contact-us.
15 **
16 ** GNU General Public License Usage
17 ** Alternatively, this file may be used under the terms of the GNU
18 ** General Public License version 3 as published by the Free Software
19 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
20 ** included in the packaging of this file. Please review the following
21 ** information to ensure the GNU General Public License requirements will
22 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
23 **
24 ****************************************************************************/
25 
26 /*
27   Copyright 2005 Roberto Raggi <roberto@kdevelop.org>
28 
29   Permission to use, copy, modify, distribute, and sell this software and its
30   documentation for any purpose is hereby granted without fee, provided that
31   the above copyright notice appear in all copies and that both that
32   copyright notice and this permission notice appear in supporting
33   documentation.
34 
35   The above copyright notice and this permission notice shall be included in
36   all copies or substantial portions of the Software.
37 
38   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
41   KDEVELOP TEAM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
42   AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
43   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
44 */
45 
46 #include "pp.h"
47 #include "pp-cctype.h"
48 
49 #include <cplusplus/Control.h>
50 #include <cplusplus/Lexer.h>
51 #include <cplusplus/Token.h>
52 #include <cplusplus/Literals.h>
53 #include <cplusplus/cppassert.h>
54 
55 #include <utils/executeondestruction.h>
56 #include <utils/scopedswap.h>
57 
58 #include <QDebug>
59 #include <QList>
60 #include <QDate>
61 #include <QTime>
62 #include <QPair>
63 
64 #include <cctype>
65 #include <list>
66 #include <algorithm>
67 
68 #define NO_DEBUG
69 
70 #ifndef NO_DEBUG
71 #  include <iostream>
72 #endif // NO_DEBUG
73 
74 #include <deque>
75 
76 using namespace Utils;
77 
78 namespace {
79 enum {
80     MAX_FUNCTION_LIKE_ARGUMENTS_COUNT = 100,
81     MAX_TOKEN_EXPANSION_COUNT = 5000,
82     MAX_TOKEN_BUFFER_DEPTH = 16000 // for when macros are using some kind of right-folding, this is the list of "delayed" buffers waiting to be expanded after the current one.
83 };
84 }
85 
86 namespace {
same(const char * a,const char * b,int size)87 static bool same(const char *a, const char *b, int size)
88 {
89     return strncmp(a, b, size) == 0;
90 }
91 
isQtReservedWord(const char * name,int size)92 static bool isQtReservedWord(const char *name, int size)
93 {
94     if (size < 3)
95         return false;
96 
97     const char c = name[0];
98     if (c == 'Q') {
99         if (name[1] == '_') {
100             name += 2;
101             size -= 2;
102             switch (size) {
103             case 1:
104                 return name[0] == 'D' || name[0] == 'Q';
105             case 4:
106                 return same(name, "SLOT", size) || same(name, "EMIT", size);
107             case 5:
108                 return same(name, "SLOTS", size) || same(name, "ENUMS", size)
109                         || same(name, "FLAGS", size);
110             case 6:
111                 return same(name, "SIGNAL", size);
112             case 7:
113                 return same(name, "SIGNALS", size) || same(name, "FOREACH", size);
114             case 8:
115                 return same(name, "PROPERTY", size);
116             case 9:
117                 return same(name, "INVOKABLE", size);
118             case 10:
119                 return same(name, "INTERFACES", size);
120             case 16:
121                 return same(name, "PRIVATE_PROPERTY", size);
122             }
123         }
124         return false;
125     }
126 
127     if (c == 'S')
128         return (size == 6 && same(name, "SIGNAL", size)) || (size == 4 && same(name, "SLOT", size));
129 
130     if (c == 's')
131         return (size == 7 && same(name, "signals", size)) || (size == 5 && same(name, "slots", size));
132 
133     if (c == 'f')
134         return size == 7 && same(name, "foreach", size);
135 
136     if (c == 'e')
137         return size == 4 && same(name, "emit", size);
138 
139     return false;
140 }
141 
nestingTooDeep()142 static void nestingTooDeep()
143 {
144 #ifndef NO_DEBUG
145         std::cerr << "*** WARNING #if / #ifdef nesting exceeded the max level " << MAX_LEVEL << std::endl;
146 #endif
147 }
148 
149 } // anonymous namespace
150 
151 namespace CPlusPlus {
152 
153 namespace Internal {
154 /// Buffers tokens for the Preprocessor::lex() to read next. Do not use  this
155 /// class directly, but use Preprocessor::State::pushTokenBuffer .
156 ///
157 /// New tokens are added when undoing look-ahead, or after expanding a macro.
158 /// When macro expansion happened, the macro is passed in, and blocked until
159 /// all tokens generated by it (and by subsequent expansion of those generated
160 /// tokens) are read from the buffer. See Preprocessor::lex() for details on
161 /// exactly when the buffer (and subsequently a blocking macro) is removed.
162 struct TokenBuffer
163 {
164     std::deque<PPToken> tokens;
165     std::vector<QByteArray> blockedMacroNames;
166     const Macro *macro;
167     TokenBuffer *next;
168 
TokenBufferCPlusPlus::Internal::TokenBuffer169     TokenBuffer(const PPToken *start, const PPToken *end, const Macro *macro, TokenBuffer *next)
170         : tokens(start, end), macro(macro), next(next)
171     {}
172 
isBlockedCPlusPlus::Internal::TokenBuffer173     bool isBlocked(const Macro *macro) const {
174         if (!macro)
175             return false;
176 
177         for (const TokenBuffer *it = this; it; it = it->next) {
178             if (it->macro && (it->macro == macro || it->macro->name() == macro->name()))
179                 return true;
180         }
181         for (const QByteArray &blockedMacroName : blockedMacroNames) {
182             if (macro->name() == blockedMacroName)
183                 return true;
184         }
185         return false;
186     }
187 };
188 
189 struct Value
190 {
191     enum Kind {
192         Kind_Long,
193         Kind_ULong
194     };
195 
196     Kind kind;
197 
198     union {
199         long l;
200         unsigned long ul;
201     };
202 
203 
ValueCPlusPlus::Internal::Value204     Value()
205         : kind(Kind_Long), l(0)
206     { }
207 
is_ulongCPlusPlus::Internal::Value208     inline bool is_ulong () const
209     { return kind == Kind_ULong; }
210 
set_ulongCPlusPlus::Internal::Value211     inline void set_ulong (unsigned long v)
212     {
213         ul = v;
214         kind = Kind_ULong;
215     }
216 
set_longCPlusPlus::Internal::Value217     inline void set_long (long v)
218     {
219         l = v;
220         kind = Kind_Long;
221     }
222 
is_zeroCPlusPlus::Internal::Value223     inline bool is_zero () const
224     { return l == 0; }
225 
226 #define PP_DEFINE_BIN_OP(name, op) \
227     inline Value operator op(const Value &other) const \
228     { \
229         Value v = *this; \
230         if (v.is_ulong () || other.is_ulong ()) \
231             v.set_ulong (v.ul op other.ul); \
232         else \
233             v.set_long (v.l op other.l); \
234         return v; \
235     }
236 
237     PP_DEFINE_BIN_OP(op_add, +)
238     PP_DEFINE_BIN_OP(op_sub, -)
239     PP_DEFINE_BIN_OP(op_mult, *)
240     PP_DEFINE_BIN_OP(op_div, /)
241     PP_DEFINE_BIN_OP(op_mod, %)
242     PP_DEFINE_BIN_OP(op_lhs, <<)
243     PP_DEFINE_BIN_OP(op_rhs, >>)
244     PP_DEFINE_BIN_OP(op_lt, <)
245     PP_DEFINE_BIN_OP(op_gt, >)
246     PP_DEFINE_BIN_OP(op_le, <=)
247     PP_DEFINE_BIN_OP(op_ge, >=)
248     PP_DEFINE_BIN_OP(op_eq, ==)
249     PP_DEFINE_BIN_OP(op_ne, !=)
250     PP_DEFINE_BIN_OP(op_bit_and, &)
251     PP_DEFINE_BIN_OP(op_bit_or, |)
252     PP_DEFINE_BIN_OP(op_bit_xor, ^)
253     PP_DEFINE_BIN_OP(op_and, &&)
254     PP_DEFINE_BIN_OP(op_or, ||)
255 
256 #undef PP_DEFINE_BIN_OP
257 };
258 
259 } // namespace Internal
260 } // namespace CPlusPlus
261 
262 using namespace CPlusPlus;
263 using namespace CPlusPlus::Internal;
264 
265 namespace {
266 
isContinuationToken(const PPToken & tk)267 inline bool isContinuationToken(const PPToken &tk)
268 {
269     return tk.isNot(T_EOF_SYMBOL) && (! tk.newline() || tk.joined());
270 }
271 
macroDefinition(const ByteArrayRef & name,unsigned bytesOffset,unsigned utf16charsOffset,unsigned line,Environment * env,Client * client)272 Macro *macroDefinition(const ByteArrayRef &name,
273                        unsigned bytesOffset,
274                        unsigned utf16charsOffset,
275                        unsigned line,
276                        Environment *env,
277                        Client *client)
278 {
279     Macro *m = env->resolve(name);
280     if (client) {
281         if (m)
282             client->passedMacroDefinitionCheck(bytesOffset, utf16charsOffset, line, *m);
283         else
284             client->failedMacroDefinitionCheck(bytesOffset, utf16charsOffset, name);
285     }
286     return m;
287 }
288 
289 class RangeLexer
290 {
291     const Token *first;
292     const Token *last;
293     Token trivial;
294 
295 public:
RangeLexer(const Token * first,const Token * last)296     inline RangeLexer(const Token *first, const Token *last)
297         : first(first), last(last)
298     {
299         // WARN: `last' must be a valid iterator.
300         trivial.byteOffset = last->byteOffset;
301         trivial.utf16charOffset = last->utf16charOffset;
302     }
303 
operator bool() const304     inline explicit operator bool() const
305     { return first != last; }
306 
isValid() const307     inline bool isValid() const
308     { return first != last; }
309 
size() const310     inline int size() const
311     { return std::distance(first, last); }
312 
dot() const313     inline const Token *dot() const
314     { return first; }
315 
operator *() const316     inline const Token &operator*() const
317     {
318         if (first != last)
319             return *first;
320 
321         return trivial;
322     }
323 
operator ->() const324     inline const Token *operator->() const
325     {
326         if (first != last)
327             return first;
328 
329         return &trivial;
330     }
331 
operator ++()332     inline RangeLexer &operator++()
333     {
334         ++first;
335         return *this;
336     }
337 };
338 
339 class ExpressionEvaluator
340 {
341     ExpressionEvaluator(const ExpressionEvaluator &other);
342     void operator = (const ExpressionEvaluator &other);
343 
344 public:
ExpressionEvaluator(Client * client,Environment * env)345     ExpressionEvaluator(Client *client, Environment *env)
346         : client(client), env(env), _lex(nullptr)
347     { }
348 
operator ()(const Token * firstToken,const Token * lastToken,const QByteArray & source)349     Value operator()(const Token *firstToken, const Token *lastToken,
350                      const QByteArray &source)
351     {
352         this->source = source;
353         const Value previousValue = switchValue(Value());
354         RangeLexer tmp(firstToken, lastToken);
355         RangeLexer *previousLex = _lex;
356         _lex = &tmp;
357         process_expression();
358         _lex = previousLex;
359         return switchValue(previousValue);
360     }
361 
362 protected:
switchValue(const Value & value)363     Value switchValue(const Value &value)
364     {
365         Value previousValue = _value;
366         _value = value;
367         return previousValue;
368     }
369 
isTokenDefined() const370     bool isTokenDefined() const
371     {
372         if ((*_lex)->isNot(T_IDENTIFIER))
373             return false;
374         const ByteArrayRef spell = tokenSpell();
375         if (spell.size() != 7)
376             return false;
377         return spell == "defined";
378     }
379 
tokenPosition() const380     const char *tokenPosition() const
381     {
382         return source.constData() + (*_lex)->byteOffset;
383     }
384 
tokenLength() const385     int tokenLength() const
386     {
387         return (*_lex)->f.bytes;
388     }
389 
tokenSpell() const390     ByteArrayRef tokenSpell() const
391     {
392         return ByteArrayRef(tokenPosition(), tokenLength());
393     }
394 
process_expression()395     inline void process_expression()
396     { process_constant_expression(); }
397 
process_primary()398     void process_primary()
399     {
400         if ((*_lex)->is(T_NUMERIC_LITERAL)) {
401             const char *spell = tokenPosition();
402             int len = tokenLength();
403             while (len) {
404                 const char ch = spell[len - 1];
405 
406                 if (! (ch == 'u' || ch == 'U' || ch == 'l' || ch == 'L'))
407                     break;
408                 --len;
409             }
410 
411             const char *end = spell + len;
412             char *vend = const_cast<char *>(end);
413             _value.set_long(strtol(spell, &vend, 0));
414             // TODO: if (vend != end) error(NaN)
415             // TODO: binary literals
416             // TODO: float literals
417             ++(*_lex);
418         } else if (isTokenDefined()) {
419             ++(*_lex);
420             if ((*_lex)->is(T_IDENTIFIER)) {
421                 _value.set_long(macroDefinition(tokenSpell(),
422                                                 (*_lex)->byteOffset,
423                                                 (*_lex)->utf16charOffset,
424                                                 (*_lex)->lineno, env, client)
425                                 != nullptr);
426                 ++(*_lex);
427             } else if ((*_lex)->is(T_LPAREN)) {
428                 ++(*_lex);
429                 if ((*_lex)->is(T_IDENTIFIER)) {
430                     _value.set_long(macroDefinition(tokenSpell(),
431                                                     (*_lex)->byteOffset,
432                                                     (*_lex)->utf16charOffset,
433                                                     (*_lex)->lineno,
434                                                     env, client)
435                                     != nullptr);
436                     ++(*_lex);
437                     if ((*_lex)->is(T_RPAREN))
438                         ++(*_lex);
439                 }
440             }
441         } else if ((*_lex)->is(T_IDENTIFIER)) {
442             _value.set_long(0);
443             ++(*_lex);
444         } else if ((*_lex)->is(T_MINUS)) {
445             ++(*_lex);
446             process_primary();
447             _value.set_long(- _value.l);
448         } else if ((*_lex)->is(T_PLUS)) {
449             ++(*_lex);
450             process_primary();
451         } else if ((*_lex)->is(T_TILDE)) {
452             ++(*_lex);
453             process_primary();
454             _value.set_long(~ _value.l);
455         } else if ((*_lex)->is(T_EXCLAIM)) {
456             ++(*_lex);
457             process_primary();
458             _value.set_long(_value.is_zero());
459         } else if ((*_lex)->is(T_LPAREN)) {
460             ++(*_lex);
461             process_expression();
462             if ((*_lex)->is(T_RPAREN))
463                 ++(*_lex);
464         }
465     }
466 
process_expression_with_operator_precedence(const Value & lhs,int minPrecedence)467     Value process_expression_with_operator_precedence(const Value &lhs, int minPrecedence)
468     {
469         Value result = lhs;
470 
471         while (precedence((*_lex)->kind()) >= minPrecedence) {
472             const int oper = (*_lex)->kind();
473             const int operPrecedence = precedence(oper);
474             ++(*_lex);
475             process_primary();
476             Value rhs = _value;
477 
478             for (int LA_token_kind = (*_lex)->kind(), LA_precedence = precedence(LA_token_kind);
479                     LA_precedence > operPrecedence && isBinaryOperator(LA_token_kind);
480                     LA_token_kind = (*_lex)->kind(), LA_precedence = precedence(LA_token_kind)) {
481                 rhs = process_expression_with_operator_precedence(rhs, LA_precedence);
482             }
483 
484             result = evaluate_expression(oper, result, rhs);
485         }
486 
487         return result;
488     }
489 
process_constant_expression()490     void process_constant_expression()
491     {
492         process_primary();
493         _value = process_expression_with_operator_precedence(_value, precedence(T_PIPE_PIPE));
494 
495         if ((*_lex)->is(T_QUESTION)) {
496             const Value cond = _value;
497             ++(*_lex);
498             process_constant_expression();
499             Value left = _value, right;
500             if ((*_lex)->is(T_COLON)) {
501                 ++(*_lex);
502                 process_constant_expression();
503                 right = _value;
504             }
505             _value = ! cond.is_zero() ? left : right;
506         }
507     }
508 
509 private:
precedence(int tokenKind) const510     inline int precedence(int tokenKind) const
511     {
512         switch (tokenKind) {
513         case T_PIPE_PIPE:       return 0;
514         case T_AMPER_AMPER:     return 1;
515         case T_PIPE:            return 2;
516         case T_CARET:           return 3;
517         case T_AMPER:           return 4;
518         case T_EQUAL_EQUAL:
519         case T_EXCLAIM_EQUAL:   return 5;
520         case T_GREATER:
521         case T_LESS:
522         case T_LESS_EQUAL:
523         case T_GREATER_EQUAL:   return 6;
524         case T_LESS_LESS:
525         case T_GREATER_GREATER: return 7;
526         case T_PLUS:
527         case T_MINUS:           return 8;
528         case T_STAR:
529         case T_SLASH:
530         case T_PERCENT:         return 9;
531 
532         default:
533             return -1;
534         }
535     }
536 
isBinaryOperator(int tokenKind)537     static inline bool isBinaryOperator(int tokenKind)
538     {
539         switch (tokenKind) {
540         case T_PIPE_PIPE:
541         case T_AMPER_AMPER:
542         case T_PIPE:
543         case T_CARET:
544         case T_AMPER:
545         case T_EQUAL_EQUAL:
546         case T_EXCLAIM_EQUAL:
547         case T_GREATER:
548         case T_LESS:
549         case T_LESS_EQUAL:
550         case T_GREATER_EQUAL:
551         case T_LESS_LESS:
552         case T_GREATER_GREATER:
553         case T_PLUS:
554         case T_MINUS:
555         case T_STAR:
556         case T_SLASH:
557         case T_PERCENT:
558             return true;
559 
560         default:
561             return false;
562         }
563     }
564 
evaluate_expression(int tokenKind,const Value & lhs,const Value & rhs)565     static inline Value evaluate_expression(int tokenKind, const Value &lhs, const Value &rhs)
566     {
567         switch (tokenKind) {
568         case T_PIPE_PIPE:       return lhs || rhs;
569         case T_AMPER_AMPER:     return lhs && rhs;
570         case T_PIPE:            return lhs | rhs;
571         case T_CARET:           return lhs ^ rhs;
572         case T_AMPER:           return lhs & rhs;
573         case T_EQUAL_EQUAL:     return lhs == rhs;
574         case T_EXCLAIM_EQUAL:   return lhs != rhs;
575         case T_GREATER:         return lhs > rhs;
576         case T_LESS:            return lhs < rhs;
577         case T_LESS_EQUAL:      return lhs <= rhs;
578         case T_GREATER_EQUAL:   return lhs >= rhs;
579         case T_LESS_LESS:       return lhs << rhs;
580         case T_GREATER_GREATER: return lhs >> rhs;
581         case T_PLUS:            return lhs + rhs;
582         case T_MINUS:           return lhs - rhs;
583         case T_STAR:            return lhs * rhs;
584         case T_SLASH:           return rhs.is_zero() ? Value() : lhs / rhs;
585         case T_PERCENT:         return rhs.is_zero() ? Value() : lhs % rhs;
586 
587         default:
588             return Value();
589         }
590     }
591 
592 private:
593     Client *client;
594     Environment *env;
595     QByteArray source;
596     RangeLexer *_lex;
597     Value _value;
598 };
599 
600 } // end of anonymous namespace
601 
State()602 Preprocessor::State::State()
603     : m_lexer(nullptr)
604     , m_skipping(MAX_LEVEL)
605     , m_trueTest(MAX_LEVEL)
606     , m_ifLevel(0)
607     , m_tokenBufferDepth(0)
608     , m_tokenBuffer(nullptr)
609     , m_inPreprocessorDirective(false)
610     , m_markExpandedTokens(true)
611     , m_noLines(false)
612     , m_inCondition(false)
613     , m_bytesOffsetRef(0)
614     , m_utf16charsOffsetRef(0)
615     , m_result(nullptr)
616     , m_lineRef(1)
617     , m_currentExpansion(nullptr)
618     , m_includeGuardState(IncludeGuardState_BeforeIfndef)
619 {
620     m_skipping[m_ifLevel] = false;
621     m_trueTest[m_ifLevel] = false;
622 
623     m_expansionResult.reserve(256);
624     setExpansionStatus(NotExpanding);
625 }
626 
627 #define COMPRESS_TOKEN_BUFFER
pushTokenBuffer(const PPToken * start,const PPToken * end,const Macro * macro)628 void Preprocessor::State::pushTokenBuffer(const PPToken *start, const PPToken *end, const Macro *macro)
629 {
630     if (m_tokenBufferDepth <= MAX_TOKEN_BUFFER_DEPTH) {
631 #ifdef COMPRESS_TOKEN_BUFFER
632         if (macro || !m_tokenBuffer) {
633             // If there is a new blocking macro (or no token buffer yet), create
634             // one.
635             m_tokenBuffer = new TokenBuffer(start, end, macro, m_tokenBuffer);
636             ++m_tokenBufferDepth;
637         } else {
638             // No new blocking macro is passed in, so tokens can be prepended to
639             // the existing buffer.
640             m_tokenBuffer->tokens.insert(m_tokenBuffer->tokens.begin(), start, end);
641         }
642 #else
643         m_tokenBuffer = new TokenBuffer(start, end, macro, m_tokenBuffer);
644         ++m_tokenBufferDepth;
645 #endif
646     }
647 }
648 
popTokenBuffer()649 void Preprocessor::State::popTokenBuffer()
650 {
651     TokenBuffer *r = m_tokenBuffer;
652     m_tokenBuffer = m_tokenBuffer->next;
653     delete r;
654 
655     if (m_tokenBufferDepth)
656         --m_tokenBufferDepth;
657 }
658 
659 #ifdef DEBUG_INCLUDE_GUARD_TRACKING
guardStateToString(int guardState)660 QString Preprocessor::State::guardStateToString(int guardState)
661 {
662     switch (guardState) {
663     case IncludeGuardState_NoGuard: return QLatin1String("NoGuard");
664     case IncludeGuardState_BeforeIfndef: return QLatin1String("BeforeIfndef");
665     case IncludeGuardState_AfterIfndef: return QLatin1String("AfterIfndef");
666     case IncludeGuardState_AfterDefine: return QLatin1String("AfterDefine");
667     case IncludeGuardState_AfterEndif: return QLatin1String("AfterEndif");
668     default: return QLatin1String("UNKNOWN");
669     }
670 }
671 #endif // DEBUG_INCLUDE_GUARD_TRACKING
672 
673 /**
674  * @brief Update the include-guard tracking state.
675  *
676  * Include guards are the #ifdef/#define/#endif sequence typically found in
677  * header files to prevent repeated definition of the contents of that header
678  * file. So, for a file to have an include guard, it must look like this:
679  * \code
680  * #ifndef SOME_ID
681  * ... all declarations/definitions/etc. go here ...
682  * #endif
683  * \endcode
684  *
685  * SOME_ID is an identifier, and is also the include guard. The only tokens
686  * allowed before the #ifndef and after the #endif are comments (in any form)
687  * or #line directives. The only other requirement is that a #define SOME_ID
688  * occurs inside the #ifndef block, but not nested inside other
689  * #if/#ifdef/#ifndef blocks.
690  *
691  * This function tracks the state, and is called from \c updateIncludeGuardState
692  * which handles the most common no-op cases.
693  *
694  * @param hint indicates what kind of token is encountered in the input
695  * @param idToken the identifier token that ought to be in the input
696  *        after a #ifndef or a #define .
697  */
updateIncludeGuardState_helper(IncludeGuardStateHint hint,PPToken * idToken)698 void Preprocessor::State::updateIncludeGuardState_helper(IncludeGuardStateHint hint, PPToken *idToken)
699 {
700 #ifdef DEBUG_INCLUDE_GUARD_TRACKING
701     int oldIncludeGuardState = m_includeGuardState;
702     QByteArray oldIncludeGuardMacroName = m_includeGuardMacroName;
703 #endif // DEBUG_INCLUDE_GUARD_TRACKING
704 
705     switch (m_includeGuardState) {
706     case IncludeGuardState_NoGuard:
707         break;
708     case IncludeGuardState_BeforeIfndef:
709         if (hint == IncludeGuardStateHint_Ifndef
710                 && idToken && idToken->is(T_IDENTIFIER)) {
711             m_includeGuardMacroName = idToken->asByteArrayRef().toByteArray();
712             m_includeGuardState = IncludeGuardState_AfterIfndef;
713         } else {
714             m_includeGuardState = IncludeGuardState_NoGuard;
715         }
716         break;
717     case IncludeGuardState_AfterIfndef:
718         if (hint == IncludeGuardStateHint_Define
719                 && idToken && idToken->is(T_IDENTIFIER)
720                 && idToken->asByteArrayRef() == m_includeGuardMacroName)
721             m_includeGuardState = IncludeGuardState_AfterDefine;
722         break;
723     case IncludeGuardState_AfterDefine:
724         if (hint == IncludeGuardStateHint_Endif)
725             m_includeGuardState = IncludeGuardState_AfterEndif;
726         break;
727     case IncludeGuardState_AfterEndif:
728         m_includeGuardState = IncludeGuardState_NoGuard;
729         m_includeGuardMacroName.clear();
730         break;
731     }
732 
733 #ifdef DEBUG_INCLUDE_GUARD_TRACKING
734     qDebug() << "***" << guardStateToString(oldIncludeGuardState)
735              << "->" << guardStateToString(m_includeGuardState)
736              << "hint:" << hint
737              << "guard:" << oldIncludeGuardMacroName << "->" << m_includeGuardMacroName;
738 #endif // DEBUG_INCLUDE_GUARD_TRACKING
739 }
740 
configurationFileName()741 QString Preprocessor::configurationFileName() { return QStringLiteral("<configuration>"); }
742 
Preprocessor(Client * client,Environment * env)743 Preprocessor::Preprocessor(Client *client, Environment *env)
744     : m_client(client)
745     , m_env(env)
746     , m_expandFunctionlikeMacros(true)
747     , m_keepComments(false)
748 {
749 }
750 
run(const QString & fileName,const QString & source)751 QByteArray Preprocessor::run(const QString &fileName, const QString &source)
752 {
753     return run(fileName, source.toUtf8());
754 }
755 
run(const QString & fileName,const QByteArray & source,bool noLines,bool markGeneratedTokens)756 QByteArray Preprocessor::run(const QString &fileName,
757                              const QByteArray &source,
758                              bool noLines,
759                              bool markGeneratedTokens)
760 {
761     m_scratchBuffer.clear();
762 
763     QByteArray preprocessed, includeGuardMacroName;
764     preprocessed.reserve(source.size() * 2); // multiply by 2 because we insert #gen lines.
765     preprocess(fileName, source, &preprocessed, &includeGuardMacroName, noLines,
766                markGeneratedTokens, false);
767     if (m_client && !includeGuardMacroName.isEmpty())
768         m_client->markAsIncludeGuard(includeGuardMacroName);
769     return preprocessed;
770 }
771 
setCancelChecker(const Preprocessor::CancelChecker & cancelChecker)772 void Preprocessor::setCancelChecker(const Preprocessor::CancelChecker &cancelChecker)
773 {
774     m_cancelChecker = cancelChecker;
775 }
776 
expandFunctionlikeMacros() const777 bool Preprocessor::expandFunctionlikeMacros() const
778 {
779     return m_expandFunctionlikeMacros;
780 }
781 
setExpandFunctionlikeMacros(bool expandMacros)782 void Preprocessor::setExpandFunctionlikeMacros(bool expandMacros)
783 {
784     m_expandFunctionlikeMacros = expandMacros;
785 }
786 
keepComments() const787 bool Preprocessor::keepComments() const
788 {
789     return m_keepComments;
790 }
791 
setKeepComments(bool keepComments)792 void Preprocessor::setKeepComments(bool keepComments)
793 {
794     m_keepComments = keepComments;
795 }
796 
generateOutputLineMarker(unsigned lineno)797 void Preprocessor::generateOutputLineMarker(unsigned lineno)
798 {
799     maybeStartOutputLine();
800     QByteArray &marker = currentOutputBuffer();
801     marker.append("# ");
802     marker.append(QByteArray::number(lineno));
803     marker.append(" \"");
804     marker.append(m_env->currentFileUtf8);
805     marker.append("\"\n");
806 }
807 
handleDefined(PPToken * tk)808 void Preprocessor::handleDefined(PPToken *tk)
809 {
810     ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
811     unsigned lineno = tk->lineno;
812     lex(tk); // consume "defined" token
813     bool lparenSeen = tk->is(T_LPAREN);
814     if (lparenSeen)
815         lex(tk); // consume "(" token
816     if (tk->isNot(T_IDENTIFIER))
817         //### TODO: generate error message
818         return;
819     PPToken idToken = *tk;
820     do {
821         lex(tk);
822         if (tk->isNot(T_POUND_POUND))
823             break;
824         lex(tk);
825         if (tk->is(T_IDENTIFIER))
826             idToken = generateConcatenated(idToken, *tk);
827         else
828             break;
829     } while (isContinuationToken(*tk));
830 
831 
832     if (lparenSeen && tk->is(T_RPAREN))
833         lex(tk);
834 
835     pushToken(tk);
836 
837     QByteArray result(1, '0');
838     const ByteArrayRef macroName = idToken.asByteArrayRef();
839     if (macroDefinition(macroName,
840                         idToken.byteOffset + m_state.m_bytesOffsetRef,
841                         idToken.utf16charOffset + m_state.m_utf16charsOffsetRef,
842                         idToken.lineno, m_env, m_client)) {
843         result[0] = '1';
844     }
845     *tk = generateToken(T_NUMERIC_LITERAL, result.constData(), result.size(), lineno, false);
846 }
847 
pushToken(Preprocessor::PPToken * tk)848 void Preprocessor::pushToken(Preprocessor::PPToken *tk)
849 {
850     const PPToken currentTokenBuffer[] = {*tk};
851     m_state.pushTokenBuffer(currentTokenBuffer, currentTokenBuffer + 1, nullptr);
852 }
853 
lex(PPToken * tk)854 void Preprocessor::lex(PPToken *tk)
855 {
856 again:
857     if (m_state.m_tokenBuffer) {
858         // There is a token buffer, so read from there.
859         if (m_state.m_tokenBuffer->tokens.empty()) {
860             // The token buffer is empty, so pop it, and start over.
861             m_state.popTokenBuffer();
862             goto again;
863         }
864         *tk = m_state.m_tokenBuffer->tokens.front();
865         m_state.m_tokenBuffer->tokens.pop_front();
866         // The token buffer might now be empty. We leave it in, because the
867         // token we just read might expand into new tokens, or might be a call
868         // to the macro that generated this token. In either case, the macro
869         // that generated the token still needs to be blocked (!), which is
870         // recorded in the token buffer. Removing the blocked macro and the
871         // empty token buffer happens the next time that this function is called.
872     } else {
873         // No token buffer, so have the lexer scan the next token.
874         tk->setSource(m_state.m_source);
875         m_state.m_lexer->scan(tk);
876     }
877 
878     // Adjust token's line number in order to take into account the environment reference.
879     tk->lineno += m_state.m_lineRef - 1;
880 
881 reclassify:
882     if (! m_state.m_inPreprocessorDirective) {
883         if (tk->newline() && tk->is(T_POUND)) {
884             handlePreprocessorDirective(tk);
885             goto reclassify;
886         } else if (tk->newline() && skipping()) {
887             ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
888             do {
889                 lex(tk);
890             } while (isContinuationToken(*tk));
891             goto reclassify;
892         } else if (tk->is(T_IDENTIFIER) && !isQtReservedWord(tk->tokenStart(), tk->bytes())) {
893             m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
894             if (m_state.m_inCondition && tk->asByteArrayRef() == "defined") {
895                 handleDefined(tk);
896             } else {
897                 synchronizeOutputLines(*tk);
898                 if (handleIdentifier(tk))
899                     goto again;
900             }
901         } else if (tk->isNot(T_COMMENT) && tk->isNot(T_EOF_SYMBOL)) {
902             m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
903         }
904     }
905 }
906 
skipPreprocesorDirective(PPToken * tk)907 void Preprocessor::skipPreprocesorDirective(PPToken *tk)
908 {
909     ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
910 
911     while (isContinuationToken(*tk)) {
912         scanComment(tk);
913         lex(tk);
914     }
915 }
916 
handleIdentifier(PPToken * tk)917 bool Preprocessor::handleIdentifier(PPToken *tk)
918 {
919     ScopedBoolSwap s(m_state.m_inPreprocessorDirective, !tk->f.expanded);
920 
921     static const QByteArray ppLine("__LINE__");
922     static const QByteArray ppFile("__FILE__");
923     static const QByteArray ppDate("__DATE__");
924     static const QByteArray ppTime("__TIME__");
925 
926     ByteArrayRef macroNameRef = tk->asByteArrayRef();
927 
928     if (macroNameRef.size() == 8
929             && macroNameRef[0] == '_'
930             && macroNameRef[1] == '_') {
931         PPToken newTk;
932         if (macroNameRef == ppLine) {
933             QByteArray txt = QByteArray::number(tk->lineno);
934             newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
935         } else if (macroNameRef == ppFile) {
936             QByteArray txt;
937             txt.append('"');
938             txt.append(m_env->currentFileUtf8);
939             txt.append('"');
940             newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
941         } else if (macroNameRef == ppDate) {
942             QByteArray txt;
943             txt.append('"');
944             txt.append(QDate::currentDate().toString().toUtf8());
945             txt.append('"');
946             newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
947         } else if (macroNameRef == ppTime) {
948             QByteArray txt;
949             txt.append('"');
950             txt.append(QTime::currentTime().toString().toUtf8());
951             txt.append('"');
952             newTk = generateToken(T_STRING_LITERAL, txt.constData(), txt.size(), tk->lineno, false);
953         }
954 
955         if (newTk.hasSource()) {
956             newTk.f.newline = tk->newline();
957             newTk.f.whitespace = tk->whitespace();
958             *tk = newTk;
959             return false;
960         }
961     }
962 
963     Macro *macro = m_env->resolve(macroNameRef);
964     if (!macro
965         || (tk->expanded() && m_state.m_tokenBuffer && m_state.m_tokenBuffer->isBlocked(macro))) {
966         return false;
967     }
968 //    qDebug() << "expanding" << macro->name() << "on line" << tk->lineno;
969 
970     // Keep track the of the macro identifier token.
971     PPToken idTk = *tk;
972 
973     // Expanded tokens which are not generated ones preserve the original line number from
974     // their corresponding argument in macro substitution. For expanded tokens which are
975     // generated, this information must be taken from somewhere else. What we do is to keep
976     // a "reference" line initialize set to the line where expansion happens.
977     unsigned baseLine = idTk.lineno - m_state.m_lineRef + 1;
978 
979     QVector<PPToken> body = macro->definitionTokens();
980 
981     // Within nested expansion we might reach a previously added marker token. In this case,
982     // we need to move it from its current possition to outside the nesting.
983     PPToken oldMarkerTk;
984 
985     if (macro->isFunctionLike()) {
986         if (!expandFunctionlikeMacros()
987                 // Still expand if this originally started with an object-like macro.
988                 && m_state.m_expansionStatus != Expanding) {
989             if (m_client) {
990                 m_client->notifyMacroReference(m_state.m_bytesOffsetRef + idTk.byteOffset,
991                                                m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
992                                                idTk.lineno,
993                                                *macro);
994             }
995             return false;
996         }
997 
998         // Collect individual tokens that form the macro arguments.
999         QVector<QVector<PPToken> > allArgTks;
1000         bool hasArgs = collectActualArguments(tk, &allArgTks, macro->name());
1001 
1002         // Check whether collecting arguments failed due to a previously added marker
1003         // that goot nested in a sequence of expansions. If so, store it and try again.
1004         if (!hasArgs
1005                 && !tk->hasSource()
1006                 && m_state.m_markExpandedTokens
1007                 && (m_state.m_expansionStatus == Expanding
1008                     || m_state.m_expansionStatus == ReadyForExpansion)) {
1009             oldMarkerTk = *tk;
1010             hasArgs = collectActualArguments(tk, &allArgTks, macro->name());
1011         }
1012 
1013         // Check for matching parameter/argument count.
1014         bool hasMatchingArgs = false;
1015         if (hasArgs) {
1016             const int expectedArgCount = macro->formals().size();
1017             if (macro->isVariadic() && allArgTks.size() == expectedArgCount - 1)
1018                 allArgTks.push_back(QVector<PPToken>());
1019             const int actualArgCount = allArgTks.size();
1020             if (expectedArgCount == actualArgCount
1021                     || (macro->isVariadic() && actualArgCount > expectedArgCount - 1)
1022                     // Handle '#define foo()' when invoked as 'foo()'
1023                     || (expectedArgCount == 0
1024                         && actualArgCount == 1
1025                         && allArgTks.at(0).isEmpty())) {
1026                 hasMatchingArgs = true;
1027             }
1028         }
1029 
1030         if (!hasArgs || !hasMatchingArgs) {
1031             //### TODO: error message
1032             pushToken(tk);
1033             // If a previous marker was found, make sure to put it back.
1034             if (oldMarkerTk.bytes())
1035                 pushToken(&oldMarkerTk);
1036             *tk = idTk;
1037             return false;
1038         }
1039 
1040         if (m_client && !idTk.generated()) {
1041             // Bundle each token sequence into a macro argument "reference" for notification.
1042             // Even empty ones, which are not necessarily important on its own, but for the matter
1043             // of couting their number - such as in foo(,)
1044             QVector<MacroArgumentReference> argRefs;
1045             for (int i = 0; i < allArgTks.size(); ++i) {
1046                 const QVector<PPToken> &argTks = allArgTks.at(i);
1047                 if (argTks.isEmpty()) {
1048                     argRefs.push_back(MacroArgumentReference());
1049                 } else {
1050 
1051                     argRefs.push_back(MacroArgumentReference(
1052                                   m_state.m_bytesOffsetRef + argTks.first().bytesBegin(),
1053                                   argTks.last().bytesBegin() + argTks.last().bytes()
1054                                     - argTks.first().bytesBegin(),
1055                                   m_state.m_utf16charsOffsetRef + argTks.first().utf16charsBegin(),
1056                                   argTks.last().utf16charsBegin() + argTks.last().utf16chars()
1057                                     - argTks.first().utf16charsBegin()));
1058                 }
1059             }
1060 
1061             m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset,
1062                                           m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
1063                                           idTk.lineno,
1064                                           *macro,
1065                                           argRefs);
1066         }
1067 
1068         if (allArgTks.size() > MAX_FUNCTION_LIKE_ARGUMENTS_COUNT)
1069             return false;
1070 
1071         if (!handleFunctionLikeMacro(macro, body, allArgTks, baseLine)) {
1072             if (m_client && !idTk.expanded())
1073                 m_client->stopExpandingMacro(idTk.byteOffset, *macro);
1074             return false;
1075         }
1076     } else if (m_client && !idTk.generated()) {
1077         m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset,
1078                                       m_state.m_utf16charsOffsetRef + idTk.utf16charOffset,
1079                                       idTk.lineno, *macro);
1080     }
1081 
1082     if (body.isEmpty()) {
1083         if (m_state.m_markExpandedTokens
1084                 && (m_state.m_expansionStatus == NotExpanding
1085                     || m_state.m_expansionStatus == JustFinishedExpansion)) {
1086             // This is not the most beautiful approach but it's quite reasonable. What we do here
1087             // is to create a fake identifier token which is only composed by whitespaces. It's
1088             // also not marked as expanded so it it can be treated as a regular token.
1089             const QByteArray content(int(idTk.bytes() + computeDistance(idTk)), ' ');
1090             PPToken fakeIdentifier = generateToken(T_IDENTIFIER,
1091                                                    content.constData(), content.length(),
1092                                                    idTk.lineno, false, false);
1093             fakeIdentifier.f.whitespace = true;
1094             fakeIdentifier.f.expanded = false;
1095             fakeIdentifier.f.generated = false;
1096             body.push_back(fakeIdentifier);
1097         }
1098     } else {
1099         // The first body token replaces the macro invocation so its whitespace and
1100         // newline info is replicated.
1101         PPToken &bodyTk = body[0];
1102         bodyTk.f.whitespace = idTk.whitespace();
1103         bodyTk.f.newline = idTk.newline();
1104 
1105         // Expansions are tracked from a "top-level" basis. This means that each expansion
1106         // section in the output corresponds to a direct use of a macro (either object-like
1107         // or function-like) in the source code and all its recurring expansions - they are
1108         // surrounded by two marker tokens, one at the begin and the other at the end.
1109         // For instance, the following code will generate 3 expansions in total, but the
1110         // output will aggregate the tokens in only 2 expansion sections.
1111         //  - The first corresponds to BAR expanding to FOO and then FOO expanding to T o;
1112         //  - The second corresponds to FOO expanding to T o;
1113         //
1114         // #define FOO(T, o) T o;
1115         // #define BAR(T, o) FOO(T, o)
1116         // BAR(Test, x) FOO(Test, y)
1117         if (m_state.m_markExpandedTokens) {
1118             if (m_state.m_expansionStatus == NotExpanding
1119                     || m_state.m_expansionStatus == JustFinishedExpansion) {
1120                 PPToken marker;
1121                 marker.f.expanded = true;
1122                 marker.f.bytes = idTk.bytes();
1123                 marker.byteOffset = idTk.byteOffset;
1124                 marker.lineno = idTk.lineno;
1125                 body.prepend(marker);
1126                 body.append(marker);
1127                 m_state.setExpansionStatus(ReadyForExpansion);
1128             } else if (oldMarkerTk.bytes()
1129                        && (m_state.m_expansionStatus == ReadyForExpansion
1130                            || m_state.m_expansionStatus == Expanding)) {
1131                 body.append(oldMarkerTk);
1132             }
1133         }
1134     }
1135 
1136     m_state.pushTokenBuffer(body.constBegin(), body.constEnd(), macro);
1137 
1138     if (m_client && !idTk.generated())
1139         m_client->stopExpandingMacro(idTk.byteOffset, *macro);
1140 
1141     return true;
1142 }
1143 
handleFunctionLikeMacro(const Macro * macro,QVector<PPToken> & body,const QVector<QVector<PPToken>> & actuals,unsigned baseLine)1144 bool Preprocessor::handleFunctionLikeMacro(const Macro *macro,
1145                                            QVector<PPToken> &body,
1146                                            const QVector<QVector<PPToken> > &actuals,
1147                                            unsigned baseLine)
1148 {
1149     QVector<PPToken> expanded;
1150     expanded.reserve(MAX_TOKEN_EXPANSION_COUNT);
1151 
1152     const size_t bodySize = body.size();
1153     for (size_t i = 0; i < bodySize && expanded.size() < MAX_TOKEN_EXPANSION_COUNT;
1154             ++i) {
1155         int expandedSize = expanded.size();
1156         PPToken bodyTk = body.at(int(i));
1157 
1158         if (bodyTk.is(T_IDENTIFIER)) {
1159             const ByteArrayRef id = bodyTk.asByteArrayRef();
1160             const QVector<QByteArray> &formals = macro->formals();
1161             int j = 0;
1162             for (; j < formals.size() && expanded.size() < MAX_TOKEN_EXPANSION_COUNT; ++j) {
1163                 if (formals[j] == id) {
1164                     QVector<PPToken> actualsForThisParam = actuals.at(j);
1165                     unsigned lineno = baseLine;
1166 
1167                     // Collect variadic arguments
1168                     if (id == "__VA_ARGS__" || (macro->isVariadic() && j + 1 == formals.size())) {
1169                         for (int k = j + 1; k < actuals.size(); ++k) {
1170                             actualsForThisParam.append(generateToken(T_COMMA, ",", 1, lineno, true));
1171                             actualsForThisParam += actuals.at(k);
1172                         }
1173                     }
1174 
1175                     const int actualsSize = actualsForThisParam.size();
1176 
1177                     if (i > 0 && body[int(i) - 1].is(T_POUND)) {
1178                         QByteArray enclosedString;
1179                         enclosedString.reserve(256);
1180 
1181                         for (int i = 0; i < actualsSize; ++i) {
1182                             const PPToken &t = actualsForThisParam.at(i);
1183                             if (i == 0)
1184                                 lineno = t.lineno;
1185                             else if (t.whitespace())
1186                                 enclosedString.append(' ');
1187                             enclosedString.append(t.tokenStart(), t.bytes());
1188                         }
1189                         enclosedString.replace("\\", "\\\\");
1190                         enclosedString.replace("\"", "\\\"");
1191 
1192                         expanded.push_back(generateToken(T_STRING_LITERAL,
1193                                                          enclosedString.constData(),
1194                                                          enclosedString.size(),
1195                                                          lineno, true));
1196                     } else {
1197                         for (int k = 0; k < actualsSize; ++k) {
1198                             // Mark the actual tokens (which are the replaced version of the
1199                             // body's one) as expanded. For the first token we replicate the
1200                             // body's whitespace info.
1201                             PPToken actual = actualsForThisParam.at(k);
1202                             actual.f.expanded = true;
1203                             if (k == 0)
1204                                 actual.f.whitespace = bodyTk.whitespace();
1205                             expanded += actual;
1206                             if (k == actualsSize - 1)
1207                                 lineno = actual.lineno;
1208                         }
1209                     }
1210 
1211                     // Get a better (more up-to-date) value for the base line.
1212                     baseLine = lineno;
1213 
1214                     break;
1215                 }
1216             }
1217 
1218             if (j == formals.size()) {
1219                 // No formal macro parameter for this identifier in the body.
1220                 bodyTk.f.generated = true;
1221                 bodyTk.lineno = baseLine;
1222                 expanded.push_back(std::move(bodyTk));
1223             }
1224         } else if (bodyTk.isNot(T_POUND) && bodyTk.isNot(T_POUND_POUND)) {
1225             bodyTk.f.generated = true;
1226             bodyTk.lineno = baseLine;
1227             expanded.push_back(std::move(bodyTk));
1228         }
1229 
1230         if (i > 1 && body[int(i) - 1].is(T_POUND_POUND)) {
1231             if (expandedSize < 1 || expanded.size() == expandedSize) //### TODO: [cpp.concat] placemarkers
1232                 continue;
1233             const PPToken &leftTk = expanded[expandedSize - 1];
1234             const PPToken &rightTk = expanded[expandedSize];
1235             expanded[expandedSize - 1] = generateConcatenated(leftTk, rightTk);
1236             expanded.remove(expandedSize);
1237         }
1238     }
1239 
1240     // The "new" body.
1241     body = expanded;
1242     body.squeeze();
1243 
1244     return true;
1245 }
1246 
trackExpansionCycles(PPToken * tk)1247 void Preprocessor::trackExpansionCycles(PPToken *tk)
1248 {
1249     if (m_state.m_markExpandedTokens) {
1250         // Identify a macro expansion section. The format is as follows:
1251         //
1252         // # expansion begin x,y ~g l:c
1253         // ...
1254         // # expansion end
1255         //
1256         // The x and y correspond, respectively, to the offset where the macro invocation happens
1257         // and the macro name's length. Following that there might be an unlimited number of
1258         // token marks which are directly mapped to each token that appears in the expansion.
1259         // Something like ~g indicates that the following g tokens are all generated. While
1260         // something like l:c indicates that the following token is expanded but not generated
1261         // and is positioned on line l and column c. Example:
1262         //
1263         // #define FOO(X) int f(X = 0)  // line 1
1264         // FOO(int
1265         //     a);
1266         //
1267         // Output would be:
1268         // # expansion begin 8,3 ~3 2:4 3:4 ~3
1269         // int f(int a = 0)
1270         // # expansion end
1271         // # 3 filename
1272         //       ;
1273         if (tk->expanded() && !tk->hasSource()) {
1274             if (m_state.m_expansionStatus == ReadyForExpansion) {
1275                 m_state.setExpansionStatus(Expanding);
1276                 m_state.m_expansionResult.clear();
1277                 m_state.m_expandedTokensInfo.clear();
1278             } else if (m_state.m_expansionStatus == Expanding) {
1279                 m_state.setExpansionStatus(JustFinishedExpansion);
1280 
1281                 QByteArray &buffer = currentOutputBuffer();
1282                 maybeStartOutputLine();
1283 
1284                 // Offset and length of the macro invocation
1285                 char chunk[40];
1286                 qsnprintf(chunk, sizeof(chunk), "# expansion begin %d,%d", tk->byteOffset,
1287                           tk->bytes());
1288                 buffer.append(chunk);
1289 
1290                 // Expanded tokens
1291                 unsigned generatedCount = 0;
1292                 for (int i = 0; i < m_state.m_expandedTokensInfo.size(); ++i) {
1293                     const QPair<unsigned, unsigned> &p = m_state.m_expandedTokensInfo.at(i);
1294                     if (p.first) {
1295                         if (generatedCount) {
1296                             qsnprintf(chunk, sizeof(chunk), " ~%d", generatedCount);
1297                             buffer.append(chunk);
1298                             generatedCount = 0;
1299                         }
1300                         qsnprintf(chunk, sizeof(chunk), " %d:%d", p.first, p.second);
1301                         buffer.append(chunk);
1302                     } else {
1303                         ++generatedCount;
1304                     }
1305                 }
1306                 if (generatedCount) {
1307                     qsnprintf(chunk, sizeof(chunk), " ~%d", generatedCount);
1308                     buffer.append(chunk);
1309                 }
1310                 buffer.append('\n');
1311                 buffer.append(m_state.m_expansionResult);
1312                 maybeStartOutputLine();
1313                 buffer.append("# expansion end\n");
1314             }
1315 
1316             lex(tk);
1317 
1318             if (tk->expanded() && !tk->hasSource())
1319                 trackExpansionCycles(tk);
1320         }
1321     }
1322 }
1323 
adjustForCommentOrStringNewlines(int * currentLine,const PPToken & tk)1324 static void adjustForCommentOrStringNewlines(int *currentLine, const PPToken &tk)
1325 {
1326     if (tk.isComment() || tk.isStringLiteral())
1327         (*currentLine) += tk.asByteArrayRef().count('\n');
1328 }
1329 
synchronizeOutputLines(const PPToken & tk,bool forceLine)1330 void Preprocessor::synchronizeOutputLines(const PPToken &tk, bool forceLine)
1331 {
1332     if (m_state.m_expansionStatus != NotExpanding
1333             || (!forceLine && m_env->currentLine == tk.lineno)) {
1334         adjustForCommentOrStringNewlines(&m_env->currentLine, tk);
1335         return;
1336     }
1337 
1338     if (forceLine || m_env->currentLine > tk.lineno || tk.lineno - m_env->currentLine >= 9) {
1339         if (m_state.m_noLines) {
1340             if (!m_state.m_markExpandedTokens)
1341                 currentOutputBuffer().append(' ');
1342         } else {
1343             generateOutputLineMarker(tk.lineno);
1344         }
1345     } else {
1346         for (int i = m_env->currentLine; i < tk.lineno; ++i)
1347             currentOutputBuffer().append('\n');
1348     }
1349 
1350     m_env->currentLine = tk.lineno;
1351     adjustForCommentOrStringNewlines(&m_env->currentLine, tk);
1352 }
1353 
computeDistance(const Preprocessor::PPToken & tk,bool forceTillLine)1354 std::size_t Preprocessor::computeDistance(const Preprocessor::PPToken &tk, bool forceTillLine)
1355 {
1356     // Find previous non-space character or line begin.
1357     const char *buffer = tk.bufferStart();
1358     const char *tokenBegin = tk.tokenStart();
1359     const char *it = tokenBegin - 1;
1360     for (; it >= buffer; --it) {
1361         if (*it == '\n'|| (!pp_isspace(*it) && !forceTillLine))
1362             break;
1363     }
1364     ++it;
1365 
1366     return tokenBegin - it;
1367 }
1368 
1369 
enforceSpacing(const Preprocessor::PPToken & tk,bool forceSpacing)1370 void Preprocessor::enforceSpacing(const Preprocessor::PPToken &tk, bool forceSpacing)
1371 {
1372     if (tk.whitespace() || forceSpacing) {
1373         QByteArray &buffer = currentOutputBuffer();
1374         // For expanded tokens we simply add a whitespace, if necessary - the exact amount of
1375         // whitespaces is irrelevant within an expansion section. For real tokens we must be
1376         // more specific and get the information from the original source.
1377         if (tk.expanded() && !atStartOfOutputLine()) {
1378             buffer.append(' ');
1379         } else {
1380             const std::size_t spacing = computeDistance(tk, forceSpacing);
1381             const char *tokenBegin = tk.tokenStart();
1382             const char *it = tokenBegin - spacing;
1383 
1384             // Reproduce the content as in the original line.
1385             for (; it != tokenBegin; ++it)
1386                 buffer.append(pp_isspace(*it) ? *it : ' ');
1387         }
1388     }
1389 }
1390 
1391 /// invalid pp-tokens are used as markers to force whitespace checks.
preprocess(const QString & fileName,const QByteArray & source,QByteArray * result,QByteArray * includeGuardMacroName,bool noLines,bool markGeneratedTokens,bool inCondition,unsigned bytesOffsetRef,unsigned utf16charOffsetRef,unsigned lineRef)1392 void Preprocessor::preprocess(const QString &fileName, const QByteArray &source,
1393                               QByteArray *result, QByteArray *includeGuardMacroName,
1394                               bool noLines,
1395                               bool markGeneratedTokens, bool inCondition,
1396                               unsigned bytesOffsetRef, unsigned utf16charOffsetRef,
1397                               unsigned lineRef)
1398 {
1399     if (source.isEmpty())
1400         return;
1401 
1402     ScopedSwap<State> savedState(m_state, State());
1403     m_state.m_currentFileName = fileName;
1404     m_state.m_source = source;
1405     m_state.m_lexer = new Lexer(source.constBegin(), source.constEnd());
1406     m_state.m_lexer->setScanKeywords(false);
1407     m_state.m_lexer->setScanAngleStringLiteralTokens(false);
1408     m_state.m_lexer->setPreprocessorMode(true);
1409     if (m_keepComments)
1410         m_state.m_lexer->setScanCommentTokens(true);
1411     m_state.m_result = result;
1412     m_state.setExpansionStatus(m_state.m_expansionStatus); // Re-set m_currentExpansion
1413     m_state.m_noLines = noLines;
1414     m_state.m_markExpandedTokens = markGeneratedTokens;
1415     m_state.m_inCondition = inCondition;
1416     m_state.m_bytesOffsetRef = bytesOffsetRef;
1417     m_state.m_utf16charsOffsetRef = utf16charOffsetRef;
1418     m_state.m_lineRef = lineRef;
1419 
1420     ScopedSwap<QString> savedFileName(m_env->currentFile, fileName);
1421     ScopedSwap<QByteArray> savedUtf8FileName(m_env->currentFileUtf8, fileName.toUtf8());
1422     ScopedSwap<int> savedCurrentLine(m_env->currentLine, 1);
1423 
1424     if (!m_state.m_noLines)
1425         generateOutputLineMarker(1);
1426 
1427     PPToken tk(m_state.m_source);
1428     do {
1429         lex(&tk);
1430 
1431         // Track the start and end of macro expansion cycles.
1432         trackExpansionCycles(&tk);
1433 
1434         bool macroExpanded = false;
1435         if (m_state.m_expansionStatus == Expanding) {
1436             // Collect the line and column from the tokens undergoing expansion. Those will
1437             // be available in the expansion section for further referencing about their real
1438             // location.
1439             unsigned trackedLine = 0;
1440             unsigned trackedColumn = 0;
1441             if (tk.expanded() && !tk.generated()) {
1442                 trackedLine = tk.lineno;
1443                 trackedColumn = unsigned(computeDistance(tk, true));
1444             }
1445             m_state.m_expandedTokensInfo.append(qMakePair(trackedLine, trackedColumn));
1446         } else if (m_state.m_expansionStatus == JustFinishedExpansion) {
1447             m_state.setExpansionStatus(NotExpanding);
1448             macroExpanded = true;
1449         }
1450 
1451         // Update environment line information.
1452         synchronizeOutputLines(tk, macroExpanded);
1453 
1454         // Make sure spacing between tokens is handled properly.
1455         enforceSpacing(tk, macroExpanded);
1456 
1457         // Finally output the token.
1458         if (!tk.f.trigraph) {
1459             currentOutputBuffer().append(tk.tokenStart(), tk.bytes());
1460         } else {
1461             switch (tk.kind()) {
1462             case T_LBRACKET:    currentOutputBuffer().append("["); break;
1463             case T_RBRACKET:    currentOutputBuffer().append("]"); break;
1464             case T_LBRACE:      currentOutputBuffer().append("{"); break;
1465             case T_RBRACE:      currentOutputBuffer().append("}"); break;
1466             case T_POUND:       currentOutputBuffer().append("#"); break;
1467             case T_POUND_POUND: currentOutputBuffer().append("##"); break;
1468             case T_CARET:       currentOutputBuffer().append("^"); break;
1469             case T_CARET_EQUAL: currentOutputBuffer().append("^="); break;
1470             case T_PIPE:        currentOutputBuffer().append("|"); break;
1471             case T_PIPE_EQUAL:  currentOutputBuffer().append("|="); break;
1472             case T_TILDE:       currentOutputBuffer().append("~"); break;
1473             case T_TILDE_EQUAL: currentOutputBuffer().append("~="); break;
1474             default: CPP_ASSERT(0, qDebug() << tk.spell()); break;
1475             }
1476         }
1477 
1478     } while (tk.isNot(T_EOF_SYMBOL));
1479 
1480     if (includeGuardMacroName) {
1481         if (m_state.m_includeGuardState == State::IncludeGuardState_AfterDefine
1482                 || m_state.m_includeGuardState == State::IncludeGuardState_AfterEndif)
1483             *includeGuardMacroName = m_state.m_includeGuardMacroName;
1484     }
1485     delete m_state.m_lexer;
1486     while (m_state.m_tokenBuffer)
1487         m_state.popTokenBuffer();
1488 }
1489 
scanComment(Preprocessor::PPToken * tk)1490 bool Preprocessor::scanComment(Preprocessor::PPToken *tk)
1491 {
1492     if (!tk->isComment())
1493         return false;
1494     synchronizeOutputLines(*tk);
1495     enforceSpacing(*tk, true);
1496     currentOutputBuffer().append(tk->tokenStart(), tk->bytes());
1497     return true;
1498 }
1499 
consumeComments(PPToken * tk)1500 bool Preprocessor::consumeComments(PPToken *tk)
1501 {
1502     while (scanComment(tk))
1503         lex(tk);
1504     return tk->isNot(T_EOF_SYMBOL);
1505 }
1506 
collectActualArguments(PPToken * tk,QVector<QVector<PPToken>> * actuals,const QByteArray & parentMacroName)1507 bool Preprocessor::collectActualArguments(PPToken *tk, QVector<QVector<PPToken> > *actuals,
1508                                           const QByteArray &parentMacroName)
1509 {
1510     Q_ASSERT(tk);
1511     Q_ASSERT(actuals);
1512 
1513     ExecuteOnDestruction removeBlockedName;
1514     if (m_state.m_tokenBuffer) {
1515         removeBlockedName.reset([this] {
1516             if (m_state.m_tokenBuffer && !m_state.m_tokenBuffer->blockedMacroNames.empty())
1517                 m_state.m_tokenBuffer->blockedMacroNames.pop_back();
1518         });
1519         m_state.m_tokenBuffer->blockedMacroNames.push_back(parentMacroName);
1520     }
1521 
1522     lex(tk); // consume the identifier
1523 
1524     bool lastCommentIsCpp = false;
1525     while (scanComment(tk)) {
1526         /* After C++ comments we need to add a new line
1527            e.g.
1528              #define foo(a, b) int a = b
1529              foo // comment
1530              (x, 3);
1531            can result in
1532                  // commentint
1533              x = 3;
1534         */
1535         lastCommentIsCpp = tk->is(T_CPP_COMMENT) || tk->is(T_CPP_DOXY_COMMENT);
1536         lex(tk);
1537     }
1538     if (lastCommentIsCpp)
1539         maybeStartOutputLine();
1540 
1541     if (tk->isNot(T_LPAREN))
1542         //### TODO: error message
1543         return false;
1544 
1545     QVector<PPToken> tokens;
1546     lex(tk);
1547     scanActualArgument(tk, &tokens);
1548 
1549     actuals->append(tokens);
1550 
1551     while (tk->is(T_COMMA)) {
1552         lex(tk);
1553 
1554         QVector<PPToken> tokens;
1555         scanActualArgument(tk, &tokens);
1556         actuals->append(tokens);
1557     }
1558 
1559     if (!tk->is(T_RPAREN)) {
1560         return false;
1561         //###TODO: error message
1562     }
1563     return true;
1564 }
1565 
scanActualArgument(PPToken * tk,QVector<PPToken> * tokens)1566 void Preprocessor::scanActualArgument(PPToken *tk, QVector<PPToken> *tokens)
1567 {
1568     Q_ASSERT(tokens);
1569 
1570     int count = 0;
1571 
1572     while (tk->isNot(T_EOF_SYMBOL)) {
1573         if (tk->is(T_LPAREN)) {
1574             ++count;
1575         } else if (tk->is(T_RPAREN)) {
1576             if (! count)
1577                 break;
1578             --count;
1579         } else if (! count && tk->is(T_COMMA)) {
1580             break;
1581         }
1582 
1583         if (m_keepComments
1584                 && (tk->is(T_CPP_COMMENT) || tk->is(T_CPP_DOXY_COMMENT))) {
1585             // Even in keep comments mode, we cannot preserve C++ style comments inside the
1586             // expansion. We stick with GCC's approach which is to replace them by C style
1587             // comments (currently clang just gets rid of them) and transform internals */
1588             // into *|.
1589             QByteArray text = m_state.m_source.mid(tk->bytesBegin() + 2,
1590                                                    tk->bytesEnd() - tk->bytesBegin() - 2);
1591             const QByteArray &comment = "/*" + text.replace("*/", "*|") + "*/";
1592             tokens->append(generateToken(T_COMMENT,
1593                                          comment.constData(), comment.size(),
1594                                          tk->lineno, false));
1595         } else {
1596             tokens->append(*tk);
1597         }
1598 
1599         lex(tk);
1600     }
1601 }
1602 
handlePreprocessorDirective(PPToken * tk)1603 void Preprocessor::handlePreprocessorDirective(PPToken *tk)
1604 {
1605     ScopedBoolSwap s(m_state.m_inPreprocessorDirective, true);
1606 
1607     PPToken poundToken = *tk;
1608     lex(tk); // scan the directive
1609 
1610     if (tk->newline() && ! tk->joined())
1611         return; // nothing to do.
1612 
1613     static const QByteArray ppDefine("define");
1614     static const QByteArray ppIf("if");
1615     static const QByteArray ppIfDef("ifdef");
1616     static const QByteArray ppIfNDef("ifndef");
1617     static const QByteArray ppEndIf("endif");
1618     static const QByteArray ppElse("else");
1619     static const QByteArray ppUndef("undef");
1620     static const QByteArray ppElif("elif");
1621     static const QByteArray ppInclude("include");
1622     static const QByteArray ppIncludeNext("include_next");
1623     static const QByteArray ppImport("import");
1624     //### TODO:
1625     // line
1626     // error
1627     // pragma
1628 
1629     if (tk->is(T_IDENTIFIER)) {
1630         const ByteArrayRef directive = tk->asByteArrayRef();
1631 
1632         if (!skipping() && directive == ppDefine) {
1633             handleDefineDirective(tk);
1634         } else if (directive == ppIfNDef) {
1635             handleIfDefDirective(true, tk);
1636         } else if (directive == ppEndIf) {
1637             handleEndIfDirective(tk, poundToken);
1638         } else {
1639             m_state.updateIncludeGuardState(State::IncludeGuardStateHint_OtherToken);
1640 
1641             if (!skipping() && directive == ppUndef)
1642                 handleUndefDirective(tk);
1643             else if (!skipping() && (directive == ppInclude
1644                                     || directive == ppImport))
1645                 handleIncludeDirective(tk, false);
1646             else if (!skipping() && directive == ppIncludeNext)
1647                 handleIncludeDirective(tk, true);
1648             else if (directive == ppIf)
1649                 handleIfDirective(tk);
1650             else if (directive == ppIfDef)
1651                 handleIfDefDirective(false, tk);
1652             else if (directive == ppElse)
1653                 handleElseDirective(tk, poundToken);
1654             else if (directive == ppElif)
1655                 handleElifDirective(tk, poundToken);
1656         }
1657     }
1658 
1659     skipPreprocesorDirective(tk);
1660 }
1661 
1662 
handleIncludeDirective(PPToken * tk,bool includeNext)1663 void Preprocessor::handleIncludeDirective(PPToken *tk, bool includeNext)
1664 {
1665     if (m_cancelChecker && m_cancelChecker())
1666         return;
1667 
1668     m_state.m_lexer->setScanAngleStringLiteralTokens(true);
1669     lex(tk); // consume "include" token
1670     m_state.m_lexer->setScanAngleStringLiteralTokens(false);
1671     const unsigned line = tk->lineno;
1672     QByteArray included;
1673 
1674     if (tk->is(T_STRING_LITERAL) || tk->is(T_ANGLE_STRING_LITERAL)) {
1675         included = tk->asByteArrayRef().toByteArray();
1676         lex(tk); // consume string token
1677     } else {
1678         included = expand(tk);
1679     }
1680     included = included.trimmed();
1681 
1682     if (included.isEmpty()) {
1683         //### TODO: error message
1684         return;
1685     }
1686 
1687 //    qDebug("include [[%s]]", included.constData());
1688     Client::IncludeType mode;
1689     if (includeNext)
1690         mode = Client::IncludeNext;
1691     else if (included.at(0) == '"')
1692         mode = Client::IncludeLocal;
1693     else if (included.at(0) == '<')
1694         mode = Client::IncludeGlobal;
1695     else
1696         return; //### TODO: add error message?
1697 
1698     if (m_client) {
1699         QString inc = QString::fromUtf8(included.constData() + 1, included.size() - 2);
1700         m_client->sourceNeeded(line, inc, mode);
1701     }
1702 }
1703 
handleDefineDirective(PPToken * tk)1704 void Preprocessor::handleDefineDirective(PPToken *tk)
1705 {
1706     const unsigned defineOffset = tk->byteOffset;
1707     lex(tk); // consume "define" token
1708 
1709     if (!consumeComments(tk))
1710         return;
1711 
1712     if (tk->isNot(T_IDENTIFIER))
1713         return;
1714 
1715     Macro macro;
1716     macro.setFileName(m_env->currentFile);
1717     macro.setLine(tk->lineno);
1718     QByteArray macroName = tk->asByteArrayRef().toByteArray();
1719     macro.setName(macroName);
1720     macro.setBytesOffset(tk->byteOffset);
1721     macro.setUtf16charOffset(tk->utf16charOffset);
1722 
1723     PPToken idToken(*tk);
1724 
1725     lex(tk);
1726 
1727     if (isContinuationToken(*tk) && tk->is(T_LPAREN) && ! tk->whitespace()) {
1728         macro.setFunctionLike(true);
1729 
1730         lex(tk); // skip `('
1731         if (!consumeComments(tk))
1732             return;
1733 
1734         bool hasIdentifier = false;
1735         if (isContinuationToken(*tk) && tk->is(T_IDENTIFIER)) {
1736             hasIdentifier = true;
1737             macro.addFormal(tk->asByteArrayRef().toByteArray());
1738 
1739             lex(tk);
1740             if (!consumeComments(tk))
1741                 return;
1742 
1743             while (isContinuationToken(*tk) && tk->is(T_COMMA)) {
1744                 lex(tk);
1745                 if (!consumeComments(tk))
1746                     return;
1747 
1748                 if (isContinuationToken(*tk) && tk->is(T_IDENTIFIER)) {
1749                     macro.addFormal(tk->asByteArrayRef().toByteArray());
1750                     lex(tk);
1751                     if (!consumeComments(tk))
1752                         return;
1753                 } else {
1754                     hasIdentifier = false;
1755                 }
1756             }
1757         }
1758 
1759         if (tk->is(T_DOT_DOT_DOT)) {
1760             macro.setVariadic(true);
1761             if (!hasIdentifier)
1762                 macro.addFormal("__VA_ARGS__");
1763             lex(tk); // consume elipsis token
1764             if (!consumeComments(tk))
1765                 return;
1766         }
1767         if (isContinuationToken(*tk) && tk->is(T_RPAREN))
1768             lex(tk); // consume ")" token
1769     } else {
1770         if (m_state.m_ifLevel == 1)
1771             m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Define, &idToken);
1772     }
1773 
1774     QVector<PPToken> bodyTokens;
1775     unsigned previousBytesOffset = 0;
1776     unsigned previousUtf16charsOffset = 0;
1777     unsigned previousLine = 0;
1778     Macro *macroReference = nullptr;
1779     while (isContinuationToken(*tk)) {
1780         // Macro tokens are always marked as expanded. However, only for object-like macros
1781         // we mark them as generated too. For function-like macros we postpone it until the
1782         // formals are identified in the bodies.
1783         tk->f.expanded = true;
1784         if (!macro.isFunctionLike())
1785             tk->f.generated = true;
1786 
1787         // Identifiers must not be eagerly expanded inside defines, but we should still notify
1788         // in the case they are macros.
1789         if (tk->is(T_IDENTIFIER) && m_client) {
1790             macroReference = m_env->resolve(tk->asByteArrayRef());
1791             if (macroReference) {
1792                 if (!macroReference->isFunctionLike()) {
1793                     m_client->notifyMacroReference(tk->byteOffset, tk->utf16charOffset,
1794                                                    tk->lineno, *macroReference);
1795                     macroReference = nullptr;
1796                 }
1797             }
1798         } else if (macroReference) {
1799             if (m_client && tk->is(T_LPAREN)) {
1800                 m_client->notifyMacroReference(previousBytesOffset, previousUtf16charsOffset,
1801                                                previousLine, *macroReference);
1802             }
1803             macroReference = nullptr;
1804         }
1805 
1806         previousBytesOffset = tk->byteOffset;
1807         previousUtf16charsOffset = tk->utf16charOffset;
1808         previousLine = tk->lineno;
1809 
1810         if (!scanComment(tk))
1811             bodyTokens.push_back(*tk);
1812 
1813         lex(tk);
1814     }
1815 
1816     if (isQtReservedWord(macroName.data(), macroName.size())) {
1817         QByteArray macroId = macro.name();
1818 
1819         if (macro.isFunctionLike()) {
1820             macroId += '(';
1821             bool fst = true;
1822             foreach (const QByteArray &formal, macro.formals()) {
1823                 if (! fst)
1824                     macroId += ", ";
1825                 fst = false;
1826                 macroId += formal;
1827             }
1828             macroId += ')';
1829         }
1830 
1831         bodyTokens.clear();
1832         macro.setDefinition(macroId, bodyTokens);
1833     } else if (!bodyTokens.isEmpty()) {
1834         PPToken &firstBodyToken = bodyTokens[0];
1835         int start = firstBodyToken.byteOffset;
1836         int len = tk->byteOffset - start;
1837         QByteArray bodyText = firstBodyToken.source().mid(start, len).trimmed();
1838 
1839         const int bodySize = bodyTokens.size();
1840         for (int i = 0; i < bodySize; ++i) {
1841             PPToken &t = bodyTokens[i];
1842             if (t.hasSource())
1843                 t.squeezeSource();
1844         }
1845         macro.setDefinition(bodyText, bodyTokens);
1846     }
1847 
1848     macro.setLength(tk->byteOffset - defineOffset);
1849     m_env->bind(macro);
1850 
1851 //    qDebug() << "adding macro" << macro.name() << "defined at" << macro.fileName() << ":"<<macro.line();
1852 
1853     if (m_client)
1854         m_client->macroAdded(macro);
1855 }
1856 
expand(PPToken * tk,PPToken * lastConditionToken)1857 QByteArray Preprocessor::expand(PPToken *tk, PPToken *lastConditionToken)
1858 {
1859     unsigned line = tk->lineno;
1860     unsigned bytesBegin = tk->bytesBegin();
1861     const int originalOffset = tk->originalOffset();
1862     unsigned utf16charsBegin = tk->utf16charsBegin();
1863     PPToken lastTk;
1864     while (isContinuationToken(*tk)) {
1865         lastTk = *tk;
1866         lex(tk);
1867     }
1868     // Gather the exact spelling of the content in the source.
1869     QByteArray condition(m_state.m_source.mid(originalOffset, lastTk.originalOffset() + lastTk.bytes()
1870                                               - originalOffset));
1871 
1872 //    qDebug("*** Condition before: [%s]", condition.constData());
1873     QByteArray result;
1874     result.reserve(256);
1875     preprocess(m_state.m_currentFileName, condition, &result, nullptr, true, false, true,
1876                bytesBegin, utf16charsBegin, line);
1877     result.squeeze();
1878 //    qDebug("*** Condition after: [%s]", result.constData());
1879 
1880     if (lastConditionToken)
1881         *lastConditionToken = lastTk;
1882 
1883     return result;
1884 }
1885 
evalExpression(PPToken * tk,Value & result)1886 const PPToken Preprocessor::evalExpression(PPToken *tk, Value &result)
1887 {
1888     PPToken lastConditionToken;
1889     const QByteArray expanded = expand(tk, &lastConditionToken);
1890     Lexer lexer(expanded.constData(), expanded.constData() + expanded.size());
1891     lexer.setPreprocessorMode(true);
1892     std::vector<Token> buf;
1893     Token t;
1894     do {
1895         lexer.scan(&t);
1896         buf.push_back(t);
1897     } while (t.isNot(T_EOF_SYMBOL));
1898     ExpressionEvaluator eval(m_client, m_env);
1899     result = eval(&buf[0], &buf[buf.size() - 1], expanded);
1900     return lastConditionToken;
1901 }
1902 
handleIfDirective(PPToken * tk)1903 void Preprocessor::handleIfDirective(PPToken *tk)
1904 {
1905     lex(tk); // consume "if" token
1906     Value result;
1907     const PPToken lastExpressionToken = evalExpression(tk, result);
1908 
1909     if (m_state.m_ifLevel >= MAX_LEVEL - 1) {
1910         nestingTooDeep();
1911         return;
1912     }
1913 
1914     const bool value = !result.is_zero();
1915 
1916     const bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
1917     ++m_state.m_ifLevel;
1918     m_state.m_trueTest[m_state.m_ifLevel] = value;
1919     if (wasSkipping) {
1920         m_state.m_skipping[m_state.m_ifLevel] = wasSkipping;
1921     } else {
1922         bool startSkipping = !value;
1923         m_state.m_skipping[m_state.m_ifLevel] = startSkipping;
1924         if (startSkipping && m_client)
1925             startSkippingBlocks(lastExpressionToken);
1926     }
1927 
1928 }
1929 
handleElifDirective(PPToken * tk,const PPToken & poundToken)1930 void Preprocessor::handleElifDirective(PPToken *tk, const PPToken &poundToken)
1931 {
1932     if (m_state.m_ifLevel == 0) {
1933 //        std::cerr << "*** WARNING #elif without #if" << std::endl;
1934         handleIfDirective(tk);
1935     } else {
1936         lex(tk); // consume "elif" token
1937         if (m_state.m_skipping[m_state.m_ifLevel - 1]) {
1938             // we keep on skipping because we are nested in a skipped block
1939             m_state.m_skipping[m_state.m_ifLevel] = true;
1940         } else if (m_state.m_trueTest[m_state.m_ifLevel]) {
1941             if (!m_state.m_skipping[m_state.m_ifLevel]) {
1942                 // start skipping because the preceding then-part was not skipped
1943                 m_state.m_skipping[m_state.m_ifLevel] = true;
1944                 if (m_client)
1945                     startSkippingBlocks(poundToken);
1946             }
1947         } else {
1948             // preceding then-part was skipped, so calculate if we should start
1949             // skipping, depending on the condition
1950             Value result;
1951             evalExpression(tk, result);
1952 
1953             bool startSkipping = result.is_zero();
1954             m_state.m_trueTest[m_state.m_ifLevel] = !startSkipping;
1955             m_state.m_skipping[m_state.m_ifLevel] = startSkipping;
1956             if (m_client && !startSkipping)
1957                 m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1);
1958         }
1959     }
1960 }
1961 
handleElseDirective(PPToken * tk,const PPToken & poundToken)1962 void Preprocessor::handleElseDirective(PPToken *tk, const PPToken &poundToken)
1963 {
1964     lex(tk); // consume "else" token
1965 
1966     if (m_state.m_ifLevel != 0) {
1967         if (m_state.m_skipping[m_state.m_ifLevel - 1]) {
1968             // we keep on skipping because we are nested in a skipped block
1969             m_state.m_skipping[m_state.m_ifLevel] = true;
1970         } else {
1971             bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
1972             bool startSkipping = m_state.m_trueTest[m_state.m_ifLevel];
1973             m_state.m_skipping[m_state.m_ifLevel] = startSkipping;
1974 
1975             if (m_client && wasSkipping && !startSkipping)
1976                 m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1);
1977             else if (m_client && !wasSkipping && startSkipping)
1978                 startSkippingBlocks(poundToken);
1979         }
1980 #ifndef NO_DEBUG
1981     } else {
1982         std::cerr << "*** WARNING #else without #if" << std::endl;
1983 #endif // NO_DEBUG
1984     }
1985 }
1986 
handleEndIfDirective(PPToken * tk,const PPToken & poundToken)1987 void Preprocessor::handleEndIfDirective(PPToken *tk, const PPToken &poundToken)
1988 {
1989     if (m_state.m_ifLevel == 0) {
1990 #ifndef NO_DEBUG
1991         std::cerr << "*** WARNING #endif without #if";
1992         if (!tk->generated())
1993             std::cerr << " on line " << tk->lineno << " of file " << m_state.m_currentFileName.toUtf8().constData();
1994         std::cerr << std::endl;
1995 #endif // NO_DEBUG
1996     } else {
1997         bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
1998         m_state.m_skipping[m_state.m_ifLevel] = false;
1999         m_state.m_trueTest[m_state.m_ifLevel] = false;
2000         --m_state.m_ifLevel;
2001         if (m_client && wasSkipping && !m_state.m_skipping[m_state.m_ifLevel])
2002             m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1);
2003 
2004         if (m_state.m_ifLevel == 0)
2005             m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Endif);
2006     }
2007 
2008     lex(tk); // consume "endif" token
2009 }
2010 
handleIfDefDirective(bool checkUndefined,PPToken * tk)2011 void Preprocessor::handleIfDefDirective(bool checkUndefined, PPToken *tk)
2012 {
2013     lex(tk); // consume "ifdef" token
2014     if (tk->is(T_IDENTIFIER)) {
2015         if (checkUndefined && m_state.m_ifLevel == 0)
2016             m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Ifndef, tk);
2017 
2018         bool value = false;
2019         const ByteArrayRef macroName = tk->asByteArrayRef();
2020         if (Macro *macro = macroDefinition(macroName, tk->byteOffset, tk->utf16charOffset,
2021                                            tk->lineno, m_env, m_client)) {
2022             value = true;
2023 
2024             // the macro is a feature constraint(e.g. QT_NO_XXX)
2025             if (checkUndefined && macroName.startsWith("QT_NO_")) {
2026                 if (macro->fileName() == configurationFileName()) {
2027                     // and it' defined in a pro file (e.g. DEFINES += QT_NO_QOBJECT)
2028 
2029                     value = false; // take the branch
2030                 }
2031             }
2032         } else if (Environment::isBuiltinMacro(macroName)) {
2033             value = true;
2034         }
2035 
2036         if (checkUndefined)
2037             value = !value;
2038 
2039         const bool wasSkipping = m_state.m_skipping[m_state.m_ifLevel];
2040 
2041         if (m_state.m_ifLevel < MAX_LEVEL - 1) {
2042             ++m_state.m_ifLevel;
2043             m_state.m_trueTest[m_state.m_ifLevel] = value;
2044             m_state.m_skipping[m_state.m_ifLevel] = wasSkipping ? wasSkipping : !value;
2045 
2046             if (m_client && !wasSkipping && !value)
2047                 startSkippingBlocks(*tk);
2048         } else {
2049             nestingTooDeep();
2050         }
2051 
2052         lex(tk); // consume the identifier
2053 #ifndef NO_DEBUG
2054     } else {
2055         std::cerr << "*** WARNING #ifdef without identifier" << std::endl;
2056 #endif // NO_DEBUG
2057     }
2058 }
2059 
handleUndefDirective(PPToken * tk)2060 void Preprocessor::handleUndefDirective(PPToken *tk)
2061 {
2062     lex(tk); // consume "undef" token
2063     if (tk->is(T_IDENTIFIER)) {
2064         const ByteArrayRef macroName = tk->asByteArrayRef();
2065         const unsigned bytesOffset = tk->byteOffset + m_state.m_bytesOffsetRef;
2066         const unsigned utf16charsOffset = tk->utf16charOffset + m_state.m_utf16charsOffsetRef;
2067         // Track macro use if previously defined
2068         if (m_client) {
2069             if (const Macro *existingMacro = m_env->resolve(macroName)) {
2070                 m_client->notifyMacroReference(bytesOffset, utf16charsOffset,
2071                                                tk->lineno, *existingMacro);
2072             }
2073         }
2074         synchronizeOutputLines(*tk);
2075         Macro *macro = m_env->remove(macroName);
2076 
2077         if (m_client && macro) {
2078             macro->setBytesOffset(bytesOffset);
2079             macro->setUtf16charOffset(utf16charsOffset);
2080             m_client->macroAdded(*macro);
2081         }
2082         lex(tk); // consume macro name
2083 #ifndef NO_DEBUG
2084     } else {
2085         std::cerr << "*** WARNING #undef without identifier" << std::endl;
2086 #endif // NO_DEBUG
2087     }
2088 }
2089 
generateToken(enum Kind kind,const char * content,int length,unsigned lineno,bool addQuotes,bool addToControl)2090 PPToken Preprocessor::generateToken(enum Kind kind,
2091                                     const char *content, int length,
2092                                     unsigned lineno,
2093                                     bool addQuotes,
2094                                     bool addToControl)
2095 {
2096     // When the token is a generated token, the column position cannot be
2097     // reconstructed, but we also have to prevent it from searching the whole
2098     // scratch buffer. So inserting a newline before the new token will give
2099     // an indent width of 0 (zero).
2100     m_scratchBuffer.append('\n');
2101 
2102     const size_t pos = m_scratchBuffer.size();
2103 
2104     if (kind == T_STRING_LITERAL && addQuotes)
2105         m_scratchBuffer.append('"');
2106     m_scratchBuffer.append(content, length);
2107     if (kind == T_STRING_LITERAL && addQuotes) {
2108         m_scratchBuffer.append('"');
2109         length += 2;
2110     }
2111 
2112     PPToken tk(m_scratchBuffer);
2113     tk.f.kind = kind;
2114     if (m_state.m_lexer->control() && addToControl) {
2115         if (kind == T_STRING_LITERAL)
2116             tk.string = m_state.m_lexer->control()->stringLiteral(m_scratchBuffer.constData() + pos, length);
2117         else if (kind == T_IDENTIFIER)
2118             tk.identifier = m_state.m_lexer->control()->identifier(m_scratchBuffer.constData() + pos, length);
2119         else if (kind == T_NUMERIC_LITERAL)
2120             tk.number = m_state.m_lexer->control()->numericLiteral(m_scratchBuffer.constData() + pos, length);
2121     }
2122     tk.byteOffset = unsigned(pos);
2123     tk.f.bytes = length;
2124     tk.f.generated = true;
2125     tk.f.expanded = true;
2126     tk.lineno = lineno;
2127 
2128     return tk;
2129 }
2130 
generateConcatenated(const PPToken & leftTk,const PPToken & rightTk)2131 PPToken Preprocessor::generateConcatenated(const PPToken &leftTk, const PPToken &rightTk)
2132 {
2133     QByteArray newText;
2134     newText.reserve(leftTk.bytes() + rightTk.bytes());
2135     newText.append(leftTk.tokenStart(), leftTk.bytes());
2136     newText.append(rightTk.tokenStart(), rightTk.bytes());
2137     PPToken result = generateToken(T_IDENTIFIER, newText.constData(), newText.size(), leftTk.lineno, true);
2138     result.f.whitespace = leftTk.whitespace();
2139     return result;
2140 }
2141 
startSkippingBlocks(const Preprocessor::PPToken & tk) const2142 void Preprocessor::startSkippingBlocks(const Preprocessor::PPToken &tk) const
2143 {
2144     if (!m_client)
2145         return;
2146 
2147     unsigned utf16charIter = tk.utf16charsEnd();
2148     const char *source = tk.source().constData() + tk.bytesEnd();
2149     const char *sourceEnd = tk.source().constEnd();
2150     unsigned char yychar = *source;
2151 
2152     do {
2153         if (yychar == '\n') {
2154             m_client->startSkippingBlocks(utf16charIter + 1);
2155             return;
2156         }
2157         Lexer::yyinp_utf8(source, yychar, utf16charIter);
2158     } while (source < sourceEnd);
2159 }
2160 
atStartOfOutputLine() const2161 bool Preprocessor::atStartOfOutputLine() const
2162 {
2163     const QByteArray *buffer = m_state.m_currentExpansion;
2164     return buffer->isEmpty() || buffer->endsWith('\n');
2165 }
2166 
maybeStartOutputLine()2167 void Preprocessor::maybeStartOutputLine()
2168 {
2169     QByteArray &buffer = currentOutputBuffer();
2170     if (buffer.isEmpty())
2171         return;
2172     if (!buffer.endsWith('\n'))
2173         buffer.append('\n');
2174     // If previous line ends with \ (possibly followed by whitespace), add another \n
2175     const char *start = buffer.constData();
2176     const char *ch = start + buffer.length() - 2;
2177     while (ch > start && (*ch != '\n') && pp_isspace(*ch))
2178         --ch;
2179     if (*ch == '\\')
2180         buffer.append('\n');
2181 }
2182