1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the tools applications of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22 ** included in the packaging of this file. Please review the following
23 ** information to ensure the GNU General Public License requirements will
24 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25 **
26 ** $QT_END_LICENSE$
27 **
28 ****************************************************************************/
29 
30 #include "preprocessor.h"
31 #include "utils.h"
32 #include <qstringlist.h>
33 #include <qfile.h>
34 #include <qdir.h>
35 #include <qfileinfo.h>
36 
37 QT_BEGIN_NAMESPACE
38 
39 #include "ppkeywords.cpp"
40 #include "keywords.cpp"
41 
42 // transform \r\n into \n
43 // \r into \n (os9 style)
44 // backslash-newlines into newlines
cleaned(const QByteArray & input)45 static QByteArray cleaned(const QByteArray &input)
46 {
47     QByteArray result;
48     result.resize(input.size());
49     const char *data = input.constData();
50     const char *end = input.constData() + input.size();
51     char *output = result.data();
52 
53     int newlines = 0;
54     while (data != end) {
55         while (data != end && is_space(*data))
56             ++data;
57         bool takeLine = (*data == '#');
58         if (*data == '%' && *(data+1) == ':') {
59             takeLine = true;
60             ++data;
61         }
62         if (takeLine) {
63             *output = '#';
64             ++output;
65             do ++data; while (data != end && is_space(*data));
66         }
67         while (data != end) {
68             // handle \\\n, \\\r\n and \\\r
69             if (*data == '\\') {
70                 if (*(data + 1) == '\r') {
71                     ++data;
72                 }
73                 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
74                     ++newlines;
75                     data += 1;
76                     if (data != end && *data != '\r')
77                         data += 1;
78                     continue;
79                 }
80             } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
81                 ++data;
82             }
83             if (data == end)
84                 break;
85 
86             char ch = *data;
87             if (ch == '\r') // os9: replace \r with \n
88                 ch = '\n';
89             *output = ch;
90             ++output;
91 
92             if (*data == '\n') {
93                 // output additional newlines to keep the correct line-numbering
94                 // for the lines following the backslash-newline sequence(s)
95                 while (newlines) {
96                     *output = '\n';
97                     ++output;
98                     --newlines;
99                 }
100                 ++data;
101                 break;
102             }
103             ++data;
104         }
105     }
106     result.resize(output - result.constData());
107     return result;
108 }
109 
110 bool Preprocessor::preprocessOnly = false;
skipUntilEndif()111 void Preprocessor::skipUntilEndif()
112 {
113     while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
114         switch (symbols.at(index).token) {
115         case PP_IF:
116         case PP_IFDEF:
117         case PP_IFNDEF:
118             ++index;
119             skipUntilEndif();
120             break;
121         default:
122             ;
123         }
124         ++index;
125     }
126 }
127 
skipBranch()128 bool Preprocessor::skipBranch()
129 {
130     while (index < symbols.size() - 1
131           && (symbols.at(index).token != PP_ENDIF
132                && symbols.at(index).token != PP_ELIF
133                && symbols.at(index).token != PP_ELSE)
134        ){
135         switch (symbols.at(index).token) {
136         case PP_IF:
137         case PP_IFDEF:
138         case PP_IFNDEF:
139             ++index;
140             skipUntilEndif();
141             break;
142         default:
143             ;
144         }
145         ++index;
146     }
147     return (index < symbols.size() - 1);
148 }
149 
150 
tokenize(const QByteArray & input,int lineNum,Preprocessor::TokenizeMode mode)151 Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
152 {
153     Symbols symbols;
154     // Preallocate some space to speed up the code below.
155     // The magic divisor value was found by calculating the average ratio between
156     // input size and the final size of symbols.
157     // This yielded a value of 16.x when compiling Qt Base.
158     symbols.reserve(input.size() / 16);
159     const char *begin = input.constData();
160     const char *data = begin;
161     while (*data) {
162         if (mode == TokenizeCpp || mode == TokenizeDefine) {
163             int column = 0;
164 
165             const char *lexem = data;
166             int state = 0;
167             Token token = NOTOKEN;
168             for (;;) {
169                 if (static_cast<signed char>(*data) < 0) {
170                     ++data;
171                     continue;
172                 }
173                 int nextindex = keywords[state].next;
174                 int next = 0;
175                 if (*data == keywords[state].defchar)
176                     next = keywords[state].defnext;
177                 else if (!state || nextindex)
178                     next = keyword_trans[nextindex][(int)*data];
179                 if (!next)
180                     break;
181                 state = next;
182                 token = keywords[state].token;
183                 ++data;
184             }
185 
186             // suboptimal, is_ident_char  should use a table
187             if (keywords[state].ident && is_ident_char(*data))
188                 token = keywords[state].ident;
189 
190             if (token == NOTOKEN) {
191                 if (*data)
192                     ++data;
193                 // an error really, but let's ignore this input
194                 // to not confuse moc later. However in pre-processor
195                 // only mode let's continue.
196                 if (!Preprocessor::preprocessOnly)
197                     continue;
198             }
199 
200             ++column;
201 
202             if (token > SPECIAL_TREATMENT_MARK) {
203                 switch (token) {
204                 case QUOTE:
205                     data = skipQuote(data);
206                     token = STRING_LITERAL;
207                     // concatenate multi-line strings for easier
208                     // STRING_LITERAL handling in moc
209                     if (!Preprocessor::preprocessOnly
210                         && !symbols.isEmpty()
211                         && symbols.constLast().token == STRING_LITERAL) {
212 
213                         const QByteArray newString
214                                 = '\"'
215                                 + symbols.constLast().unquotedLexem()
216                                 + input.mid(lexem - begin + 1, data - lexem - 2)
217                                 + '\"';
218                         symbols.last() = Symbol(symbols.constLast().lineNum,
219                                                 STRING_LITERAL,
220                                                 newString);
221                         continue;
222                     }
223                     break;
224                 case SINGLEQUOTE:
225                     while (*data && (*data != '\''
226                                      || (*(data-1)=='\\'
227                                          && *(data-2)!='\\')))
228                         ++data;
229                     if (*data)
230                         ++data;
231                     token = CHARACTER_LITERAL;
232                     break;
233                 case LANGLE_SCOPE:
234                     // split <:: into two tokens, < and ::
235                     token = LANGLE;
236                     data -= 2;
237                     break;
238                 case DIGIT:
239                     while (is_digit_char(*data) || *data == '\'')
240                         ++data;
241                     if (!*data || *data != '.') {
242                         token = INTEGER_LITERAL;
243                         if (data - lexem == 1 &&
244                             (*data == 'x' || *data == 'X'
245                              || *data == 'b' || *data == 'B')
246                             && *lexem == '0') {
247                             ++data;
248                             while (is_hex_char(*data) || *data == '\'')
249                                 ++data;
250                         }
251                         break;
252                     }
253                     token = FLOATING_LITERAL;
254                     ++data;
255                     Q_FALLTHROUGH();
256                 case FLOATING_LITERAL:
257                     while (is_digit_char(*data) || *data == '\'')
258                         ++data;
259                     if (*data == '+' || *data == '-')
260                         ++data;
261                     if (*data == 'e' || *data == 'E') {
262                         ++data;
263                         while (is_digit_char(*data) || *data == '\'')
264                             ++data;
265                     }
266                     if (*data == 'f' || *data == 'F'
267                         || *data == 'l' || *data == 'L')
268                         ++data;
269                     break;
270                 case HASH:
271                     if (column == 1 && mode == TokenizeCpp) {
272                         mode = PreparePreprocessorStatement;
273                         while (*data && (*data == ' ' || *data == '\t'))
274                             ++data;
275                         if (is_ident_char(*data))
276                             mode = TokenizePreprocessorStatement;
277                         continue;
278                     }
279                     break;
280                 case PP_HASHHASH:
281                     if (mode == TokenizeCpp)
282                         continue;
283                     break;
284                 case NEWLINE:
285                     ++lineNum;
286                     if (mode == TokenizeDefine) {
287                         mode = TokenizeCpp;
288                         // emit the newline token
289                         break;
290                     }
291                     continue;
292                 case BACKSLASH:
293                 {
294                     const char *rewind = data;
295                     while (*data && (*data == ' ' || *data == '\t'))
296                         ++data;
297                     if (*data && *data == '\n') {
298                         ++data;
299                         continue;
300                     }
301                     data = rewind;
302                 } break;
303                 case CHARACTER:
304                     while (is_ident_char(*data))
305                         ++data;
306                     token = IDENTIFIER;
307                     break;
308                 case C_COMMENT:
309                     if (*data) {
310                         if (*data == '\n')
311                             ++lineNum;
312                         ++data;
313                         if (*data) {
314                             if (*data == '\n')
315                                 ++lineNum;
316                             ++data;
317                         }
318                     }
319                     while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
320                         if (*data == '\n')
321                             ++lineNum;
322                         ++data;
323                     }
324                     token = WHITESPACE; // one comment, one whitespace
325                     Q_FALLTHROUGH();
326                 case WHITESPACE:
327                     if (column == 1)
328                         column = 0;
329                     while (*data && (*data == ' ' || *data == '\t'))
330                         ++data;
331                     if (Preprocessor::preprocessOnly) // tokenize whitespace
332                         break;
333                     continue;
334                 case CPP_COMMENT:
335                     while (*data && *data != '\n')
336                         ++data;
337                     continue; // ignore safely, the newline is a separator
338                 default:
339                     continue; //ignore
340                 }
341             }
342 #ifdef USE_LEXEM_STORE
343             if (!Preprocessor::preprocessOnly
344                 && token != IDENTIFIER
345                 && token != STRING_LITERAL
346                 && token != FLOATING_LITERAL
347                 && token != INTEGER_LITERAL)
348                 symbols += Symbol(lineNum, token);
349             else
350 #endif
351                 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
352 
353         } else { //   Preprocessor
354 
355             const char *lexem = data;
356             int state = 0;
357             Token token = NOTOKEN;
358             if (mode == TokenizePreprocessorStatement) {
359                 state = pp_keyword_trans[0][(int)'#'];
360                 mode = TokenizePreprocessor;
361             }
362             for (;;) {
363                 if (static_cast<signed char>(*data) < 0) {
364                     ++data;
365                     continue;
366                 }
367                 int nextindex = pp_keywords[state].next;
368                 int next = 0;
369                 if (*data == pp_keywords[state].defchar)
370                     next = pp_keywords[state].defnext;
371                 else if (!state || nextindex)
372                     next = pp_keyword_trans[nextindex][(int)*data];
373                 if (!next)
374                     break;
375                 state = next;
376                 token = pp_keywords[state].token;
377                 ++data;
378             }
379             // suboptimal, is_ident_char  should use a table
380             if (pp_keywords[state].ident && is_ident_char(*data))
381                 token = pp_keywords[state].ident;
382 
383             switch (token) {
384             case NOTOKEN:
385                 if (*data)
386                     ++data;
387                 break;
388             case PP_DEFINE:
389                 mode = PrepareDefine;
390                 break;
391             case PP_IFDEF:
392                 symbols += Symbol(lineNum, PP_IF);
393                 symbols += Symbol(lineNum, PP_DEFINED);
394                 continue;
395             case PP_IFNDEF:
396                 symbols += Symbol(lineNum, PP_IF);
397                 symbols += Symbol(lineNum, PP_NOT);
398                 symbols += Symbol(lineNum, PP_DEFINED);
399                 continue;
400             case PP_INCLUDE:
401                 mode = TokenizeInclude;
402                 break;
403             case PP_QUOTE:
404                 data = skipQuote(data);
405                 token = PP_STRING_LITERAL;
406                 break;
407             case PP_SINGLEQUOTE:
408                 while (*data && (*data != '\''
409                                  || (*(data-1)=='\\'
410                                      && *(data-2)!='\\')))
411                     ++data;
412                 if (*data)
413                     ++data;
414                 token = PP_CHARACTER_LITERAL;
415                 break;
416             case PP_DIGIT:
417                 while (is_digit_char(*data) || *data == '\'')
418                     ++data;
419                 if (!*data || *data != '.') {
420                     token = PP_INTEGER_LITERAL;
421                     if (data - lexem == 1 &&
422                         (*data == 'x' || *data == 'X')
423                         && *lexem == '0') {
424                         ++data;
425                         while (is_hex_char(*data) || *data == '\'')
426                             ++data;
427                     }
428                     break;
429                 }
430                 token = PP_FLOATING_LITERAL;
431                 ++data;
432                 Q_FALLTHROUGH();
433             case PP_FLOATING_LITERAL:
434                 while (is_digit_char(*data) || *data == '\'')
435                     ++data;
436                 if (*data == '+' || *data == '-')
437                     ++data;
438                 if (*data == 'e' || *data == 'E') {
439                     ++data;
440                     while (is_digit_char(*data) || *data == '\'')
441                         ++data;
442                 }
443                 if (*data == 'f' || *data == 'F'
444                     || *data == 'l' || *data == 'L')
445                     ++data;
446                 break;
447             case PP_CHARACTER:
448                 if (mode == PreparePreprocessorStatement) {
449                     // rewind entire token to begin
450                     data = lexem;
451                     mode = TokenizePreprocessorStatement;
452                     continue;
453                 }
454                 while (is_ident_char(*data))
455                     ++data;
456                 token = PP_IDENTIFIER;
457 
458                 if (mode == PrepareDefine) {
459                     symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
460                     // make sure we explicitly add the whitespace here if the next char
461                     // is not an opening brace, so we can distinguish correctly between
462                     // regular and function macros
463                     if (*data != '(')
464                         symbols += Symbol(lineNum, WHITESPACE);
465                     mode = TokenizeDefine;
466                     continue;
467                 }
468                 break;
469             case PP_C_COMMENT:
470                 if (*data) {
471                     if (*data == '\n')
472                         ++lineNum;
473                     ++data;
474                     if (*data) {
475                         if (*data == '\n')
476                             ++lineNum;
477                         ++data;
478                     }
479                 }
480                 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
481                     if (*data == '\n')
482                         ++lineNum;
483                     ++data;
484                 }
485                 token = PP_WHITESPACE; // one comment, one whitespace
486                 Q_FALLTHROUGH();
487             case PP_WHITESPACE:
488                 while (*data && (*data == ' ' || *data == '\t'))
489                     ++data;
490                 continue; // the preprocessor needs no whitespace
491             case PP_CPP_COMMENT:
492                 while (*data && *data != '\n')
493                     ++data;
494                 continue; // ignore safely, the newline is a separator
495             case PP_NEWLINE:
496                 ++lineNum;
497                 mode = TokenizeCpp;
498                 break;
499             case PP_BACKSLASH:
500             {
501                 const char *rewind = data;
502                 while (*data && (*data == ' ' || *data == '\t'))
503                     ++data;
504                 if (*data && *data == '\n') {
505                     ++data;
506                     continue;
507                 }
508                 data = rewind;
509             } break;
510             case PP_LANGLE:
511                 if (mode != TokenizeInclude)
512                     break;
513                 token = PP_STRING_LITERAL;
514                 while (*data && *data != '\n' && *(data-1) != '>')
515                     ++data;
516                 break;
517             default:
518                 break;
519             }
520             if (mode == PreparePreprocessorStatement)
521                 continue;
522 #ifdef USE_LEXEM_STORE
523             if (token != PP_IDENTIFIER
524                 && token != PP_STRING_LITERAL
525                 && token != PP_FLOATING_LITERAL
526                 && token != PP_INTEGER_LITERAL)
527                 symbols += Symbol(lineNum, token);
528             else
529 #endif
530                 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
531         }
532     }
533     symbols += Symbol(); // eof symbol
534     return symbols;
535 }
536 
macroExpand(Symbols * into,Preprocessor * that,const Symbols & toExpand,int & index,int lineNum,bool one,const QSet<QByteArray> & excludeSymbols)537 void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, int &index,
538                                   int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
539 {
540     SymbolStack symbols;
541     SafeSymbols sf;
542     sf.symbols = toExpand;
543     sf.index = index;
544     sf.excludedSymbols = excludeSymbols;
545     symbols.push(sf);
546 
547     if (toExpand.isEmpty())
548         return;
549 
550     for (;;) {
551         QByteArray macro;
552         Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, &macro);
553 
554         if (macro.isEmpty()) {
555             // not a macro
556             Symbol s = symbols.symbol();
557             s.lineNum = lineNum;
558             *into += s;
559         } else {
560             SafeSymbols sf;
561             sf.symbols = newSyms;
562             sf.index = 0;
563             sf.expandedMacro = macro;
564             symbols.push(sf);
565         }
566         if (!symbols.hasNext() || (one && symbols.size() == 1))
567                 break;
568         symbols.next();
569     }
570 
571     if (symbols.size())
572         index = symbols.top().index;
573     else
574         index = toExpand.size();
575 }
576 
577 
macroExpandIdentifier(Preprocessor * that,SymbolStack & symbols,int lineNum,QByteArray * macroName)578 Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
579 {
580     Symbol s = symbols.symbol();
581 
582     // not a macro
583     if (s.token != PP_IDENTIFIER || !that->macros.contains(s) || symbols.dontReplaceSymbol(s.lexem())) {
584         return Symbols();
585     }
586 
587     const Macro &macro = that->macros.value(s);
588     *macroName = s.lexem();
589 
590     Symbols expansion;
591     if (!macro.isFunction) {
592         expansion = macro.symbols;
593     } else {
594         bool haveSpace = false;
595         while (symbols.test(PP_WHITESPACE)) { haveSpace = true; }
596         if (!symbols.test(PP_LPAREN)) {
597             *macroName = QByteArray();
598             Symbols syms;
599             if (haveSpace)
600                 syms += Symbol(lineNum, PP_WHITESPACE);
601             syms += s;
602             syms.last().lineNum = lineNum;
603             return syms;
604         }
605         QVarLengthArray<Symbols, 5> arguments;
606         while (symbols.hasNext()) {
607             Symbols argument;
608             // strip leading space
609             while (symbols.test(PP_WHITESPACE)) {}
610             int nesting = 0;
611             bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
612             while (symbols.hasNext()) {
613                 Token t = symbols.next();
614                 if (t == PP_LPAREN) {
615                     ++nesting;
616                 } else if (t == PP_RPAREN) {
617                     --nesting;
618                     if (nesting < 0)
619                         break;
620                 } else if (t == PP_COMMA && nesting == 0) {
621                     if (!vararg)
622                         break;
623                 }
624                 argument += symbols.symbol();
625             }
626             arguments += argument;
627 
628             if (nesting < 0)
629                 break;
630             else if (!symbols.hasNext())
631                 that->error("missing ')' in macro usage");
632         }
633 
634         // empty VA_ARGS
635         if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
636             arguments += Symbols();
637 
638         // now replace the macro arguments with the expanded arguments
639         enum Mode {
640             Normal,
641             Hash,
642             HashHash
643         } mode = Normal;
644 
645         for (int i = 0; i < macro.symbols.size(); ++i) {
646             const Symbol &s = macro.symbols.at(i);
647             if (s.token == HASH || s.token == PP_HASHHASH) {
648                 mode = (s.token == HASH ? Hash : HashHash);
649                 continue;
650             }
651             int index = macro.arguments.indexOf(s);
652             if (mode == Normal) {
653                 if (index >= 0 && index < arguments.size()) {
654                     // each argument undoergoes macro expansion if it's not used as part of a # or ##
655                     if (i == macro.symbols.size() - 1 || macro.symbols.at(i + 1).token != PP_HASHHASH) {
656                         Symbols arg = arguments.at(index);
657                         int idx = 1;
658                         macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols());
659                     } else {
660                         expansion += arguments.at(index);
661                     }
662                } else {
663                     expansion += s;
664                 }
665             } else if (mode == Hash) {
666                 if (index < 0) {
667                     that->error("'#' is not followed by a macro parameter");
668                     continue;
669                 } else if (index >= arguments.size()) {
670                     that->error("Macro invoked with too few parameters for a use of '#'");
671                     continue;
672                 }
673 
674                 const Symbols &arg = arguments.at(index);
675                 QByteArray stringified;
676                 for (int i = 0; i < arg.size(); ++i) {
677                     stringified += arg.at(i).lexem();
678                 }
679                 stringified.replace('"', "\\\"");
680                 stringified.prepend('"');
681                 stringified.append('"');
682                 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
683             } else if (mode == HashHash){
684                 if (s.token == WHITESPACE)
685                     continue;
686 
687                 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
688                     expansion.pop_back();
689 
690                 Symbol next = s;
691                 if (index >= 0 && index < arguments.size()) {
692                     const Symbols &arg = arguments.at(index);
693                     if (arg.size() == 0) {
694                         mode = Normal;
695                         continue;
696                     }
697                     next = arg.at(0);
698                 }
699 
700                 if (!expansion.isEmpty() && expansion.constLast().token == s.token
701                     && expansion.constLast().token != STRING_LITERAL) {
702                     Symbol last = expansion.takeLast();
703 
704                     QByteArray lexem = last.lexem() + next.lexem();
705                     expansion += Symbol(lineNum, last.token, lexem);
706                 } else {
707                     expansion += next;
708                 }
709 
710                 if (index >= 0 && index < arguments.size()) {
711                     const Symbols &arg = arguments.at(index);
712                     for (int i = 1; i < arg.size(); ++i)
713                         expansion += arg.at(i);
714                 }
715             }
716             mode = Normal;
717         }
718         if (mode != Normal)
719             that->error("'#' or '##' found at the end of a macro argument");
720 
721     }
722 
723     return expansion;
724 }
725 
substituteUntilNewline(Symbols & substituted)726 void Preprocessor::substituteUntilNewline(Symbols &substituted)
727 {
728     while (hasNext()) {
729         Token token = next();
730         if (token == PP_IDENTIFIER) {
731             macroExpand(&substituted, this, symbols, index, symbol().lineNum, true);
732         } else if (token == PP_DEFINED) {
733             bool braces = test(PP_LPAREN);
734             next(PP_IDENTIFIER);
735             Symbol definedOrNotDefined = symbol();
736             definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
737             substituted += definedOrNotDefined;
738             if (braces)
739                 test(PP_RPAREN);
740             continue;
741         } else if (token == PP_NEWLINE) {
742             substituted += symbol();
743             break;
744         } else {
745             substituted += symbol();
746         }
747     }
748 }
749 
750 
751 class PP_Expression : public Parser
752 {
753 public:
value()754     int value() { index = 0; return unary_expression_lookup() ?  conditional_expression() : 0; }
755 
756     int conditional_expression();
757     int logical_OR_expression();
758     int logical_AND_expression();
759     int inclusive_OR_expression();
760     int exclusive_OR_expression();
761     int AND_expression();
762     int equality_expression();
763     int relational_expression();
764     int shift_expression();
765     int additive_expression();
766     int multiplicative_expression();
767     int unary_expression();
768     bool unary_expression_lookup();
769     int primary_expression();
770     bool primary_expression_lookup();
771 };
772 
conditional_expression()773 int PP_Expression::conditional_expression()
774 {
775     int value = logical_OR_expression();
776     if (test(PP_QUESTION)) {
777         int alt1 = conditional_expression();
778         int alt2 = test(PP_COLON) ? conditional_expression() : 0;
779         return value ? alt1 : alt2;
780     }
781     return value;
782 }
783 
logical_OR_expression()784 int PP_Expression::logical_OR_expression()
785 {
786     int value = logical_AND_expression();
787     if (test(PP_OROR))
788         return logical_OR_expression() || value;
789     return value;
790 }
791 
logical_AND_expression()792 int PP_Expression::logical_AND_expression()
793 {
794     int value = inclusive_OR_expression();
795     if (test(PP_ANDAND))
796         return logical_AND_expression() && value;
797     return value;
798 }
799 
inclusive_OR_expression()800 int PP_Expression::inclusive_OR_expression()
801 {
802     int value = exclusive_OR_expression();
803     if (test(PP_OR))
804         return value | inclusive_OR_expression();
805     return value;
806 }
807 
exclusive_OR_expression()808 int PP_Expression::exclusive_OR_expression()
809 {
810     int value = AND_expression();
811     if (test(PP_HAT))
812         return value ^ exclusive_OR_expression();
813     return value;
814 }
815 
AND_expression()816 int PP_Expression::AND_expression()
817 {
818     int value = equality_expression();
819     if (test(PP_AND))
820         return value & AND_expression();
821     return value;
822 }
823 
equality_expression()824 int PP_Expression::equality_expression()
825 {
826     int value = relational_expression();
827     switch (next()) {
828     case PP_EQEQ:
829         return value == equality_expression();
830     case PP_NE:
831         return value != equality_expression();
832     default:
833         prev();
834         return value;
835     }
836 }
837 
relational_expression()838 int PP_Expression::relational_expression()
839 {
840     int value = shift_expression();
841     switch (next()) {
842     case PP_LANGLE:
843         return value < relational_expression();
844     case PP_RANGLE:
845         return value > relational_expression();
846     case PP_LE:
847         return value <= relational_expression();
848     case PP_GE:
849         return value >= relational_expression();
850     default:
851         prev();
852         return value;
853     }
854 }
855 
shift_expression()856 int PP_Expression::shift_expression()
857 {
858     int value = additive_expression();
859     switch (next()) {
860     case PP_LTLT:
861         return value << shift_expression();
862     case PP_GTGT:
863         return value >> shift_expression();
864     default:
865         prev();
866         return value;
867     }
868 }
869 
additive_expression()870 int PP_Expression::additive_expression()
871 {
872     int value = multiplicative_expression();
873     switch (next()) {
874     case PP_PLUS:
875         return value + additive_expression();
876     case PP_MINUS:
877         return value - additive_expression();
878     default:
879         prev();
880         return value;
881     }
882 }
883 
multiplicative_expression()884 int PP_Expression::multiplicative_expression()
885 {
886     int value = unary_expression();
887     switch (next()) {
888     case PP_STAR:
889     {
890         // get well behaved overflow behavior by converting to long
891         // and then back to int
892         // NOTE: A conformant preprocessor would need to work intmax_t/
893         // uintmax_t according to [cpp.cond], 19.1 §10
894         // But we're not compliant anyway
895         qint64 result = qint64(value) * qint64(multiplicative_expression());
896         return int(result);
897     }
898     case PP_PERCENT:
899     {
900         int remainder = multiplicative_expression();
901         return remainder ? value % remainder : 0;
902     }
903     case PP_SLASH:
904     {
905         int div = multiplicative_expression();
906         return div ? value / div : 0;
907     }
908     default:
909         prev();
910         return value;
911     };
912 }
913 
unary_expression()914 int PP_Expression::unary_expression()
915 {
916     switch (next()) {
917     case PP_PLUS:
918         return unary_expression();
919     case PP_MINUS:
920         return -unary_expression();
921     case PP_NOT:
922         return !unary_expression();
923     case PP_TILDE:
924         return ~unary_expression();
925     case PP_MOC_TRUE:
926         return 1;
927     case PP_MOC_FALSE:
928         return 0;
929     default:
930         prev();
931         return primary_expression();
932     }
933 }
934 
unary_expression_lookup()935 bool PP_Expression::unary_expression_lookup()
936 {
937     Token t = lookup();
938     return (primary_expression_lookup()
939             || t == PP_PLUS
940             || t == PP_MINUS
941             || t == PP_NOT
942             || t == PP_TILDE
943             || t == PP_DEFINED);
944 }
945 
primary_expression()946 int PP_Expression::primary_expression()
947 {
948     int value;
949     if (test(PP_LPAREN)) {
950         value = conditional_expression();
951         test(PP_RPAREN);
952     } else {
953         next();
954         value = lexem().toInt(nullptr, 0);
955     }
956     return value;
957 }
958 
primary_expression_lookup()959 bool PP_Expression::primary_expression_lookup()
960 {
961     Token t = lookup();
962     return (t == PP_IDENTIFIER
963             || t == PP_INTEGER_LITERAL
964             || t == PP_FLOATING_LITERAL
965             || t == PP_MOC_TRUE
966             || t == PP_MOC_FALSE
967             || t == PP_LPAREN);
968 }
969 
evaluateCondition()970 int Preprocessor::evaluateCondition()
971 {
972     PP_Expression expression;
973     expression.currentFilenames = currentFilenames;
974 
975     substituteUntilNewline(expression.symbols);
976 
977     return expression.value();
978 }
979 
readOrMapFile(QFile * file)980 static QByteArray readOrMapFile(QFile *file)
981 {
982     const qint64 size = file->size();
983     char *rawInput = reinterpret_cast<char*>(file->map(0, size));
984     return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
985 }
986 
mergeStringLiterals(Symbols * _symbols)987 static void mergeStringLiterals(Symbols *_symbols)
988 {
989     Symbols &symbols = *_symbols;
990     for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
991         if (i->token == STRING_LITERAL) {
992             Symbols::Iterator mergeSymbol = i;
993             int literalsLength = mergeSymbol->len;
994             while (++i != symbols.end() && i->token == STRING_LITERAL)
995                 literalsLength += i->len - 2; // no quotes
996 
997             if (literalsLength != mergeSymbol->len) {
998                 QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
999                 QByteArray &mergeSymbolLexem = mergeSymbol->lex;
1000                 mergeSymbolLexem.resize(0);
1001                 mergeSymbolLexem.reserve(literalsLength);
1002                 mergeSymbolLexem.append('"');
1003                 mergeSymbolLexem.append(mergeSymbolOriginalLexem);
1004                 for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j)
1005                     mergeSymbolLexem.append(j->lex.constData() + j->from + 1, j->len - 2); // append j->unquotedLexem()
1006                 mergeSymbolLexem.append('"');
1007                 mergeSymbol->len = mergeSymbol->lex.length();
1008                 mergeSymbol->from = 0;
1009                 i = symbols.erase(mergeSymbol + 1, i);
1010             }
1011             if (i == symbols.end())
1012                 break;
1013         }
1014     }
1015 }
1016 
searchIncludePaths(const QList<Parser::IncludePath> & includepaths,const QByteArray & include)1017 static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1018                                      const QByteArray &include)
1019 {
1020     QFileInfo fi;
1021     for (int j = 0; j < includepaths.size() && !fi.exists(); ++j) {
1022         const Parser::IncludePath &p = includepaths.at(j);
1023         if (p.isFrameworkPath) {
1024             const int slashPos = include.indexOf('/');
1025             if (slashPos == -1)
1026                 continue;
1027             fi.setFile(QString::fromLocal8Bit(p.path + '/' + include.left(slashPos) + ".framework/Headers/"),
1028                        QString::fromLocal8Bit(include.mid(slashPos + 1)));
1029         } else {
1030             fi.setFile(QString::fromLocal8Bit(p.path), QString::fromLocal8Bit(include));
1031         }
1032         // try again, maybe there's a file later in the include paths with the same name
1033         // (186067)
1034         if (fi.isDir()) {
1035             fi = QFileInfo();
1036             continue;
1037         }
1038     }
1039 
1040     if (!fi.exists() || fi.isDir())
1041         return QByteArray();
1042     return fi.canonicalFilePath().toLocal8Bit();
1043 }
1044 
resolveInclude(const QByteArray & include,const QByteArray & relativeTo)1045 QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1046 {
1047     if (!relativeTo.isEmpty()) {
1048         QFileInfo fi;
1049         fi.setFile(QFileInfo(QString::fromLocal8Bit(relativeTo)).dir(), QString::fromLocal8Bit(include));
1050         if (fi.exists() && !fi.isDir())
1051             return fi.canonicalFilePath().toLocal8Bit();
1052     }
1053 
1054     auto it = nonlocalIncludePathResolutionCache.find(include);
1055     if (it == nonlocalIncludePathResolutionCache.end())
1056        it = nonlocalIncludePathResolutionCache.insert(include, searchIncludePaths(includes, include));
1057     return it.value();
1058 }
1059 
preprocess(const QByteArray & filename,Symbols & preprocessed)1060 void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1061 {
1062     currentFilenames.push(filename);
1063     preprocessed.reserve(preprocessed.size() + symbols.size());
1064     while (hasNext()) {
1065         Token token = next();
1066 
1067         switch (token) {
1068         case PP_INCLUDE:
1069         {
1070             int lineNum = symbol().lineNum;
1071             QByteArray include;
1072             bool local = false;
1073             if (test(PP_STRING_LITERAL)) {
1074                 local = lexem().startsWith('\"');
1075                 include = unquotedLexem();
1076             } else
1077                 continue;
1078             until(PP_NEWLINE);
1079 
1080             include = resolveInclude(include, local ? filename : QByteArray());
1081             if (include.isNull())
1082                 continue;
1083 
1084             if (Preprocessor::preprocessedIncludes.contains(include))
1085                 continue;
1086             Preprocessor::preprocessedIncludes.insert(include);
1087 
1088             QFile file(QString::fromLocal8Bit(include.constData()));
1089             if (!file.open(QFile::ReadOnly))
1090                 continue;
1091 
1092             QByteArray input = readOrMapFile(&file);
1093 
1094             file.close();
1095             if (input.isEmpty())
1096                 continue;
1097 
1098             Symbols saveSymbols = symbols;
1099             int saveIndex = index;
1100 
1101             // phase 1: get rid of backslash-newlines
1102             input = cleaned(input);
1103 
1104             // phase 2: tokenize for the preprocessor
1105             symbols = tokenize(input);
1106             input.clear();
1107 
1108             index = 0;
1109 
1110             // phase 3: preprocess conditions and substitute macros
1111             preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1112             preprocess(include, preprocessed);
1113             preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1114 
1115             symbols = saveSymbols;
1116             index = saveIndex;
1117             continue;
1118         }
1119         case PP_DEFINE:
1120         {
1121             next();
1122             QByteArray name = lexem();
1123             if (name.isEmpty() || !is_ident_start(name[0]))
1124                 error();
1125             Macro macro;
1126             macro.isVariadic = false;
1127             if (test(LPAREN)) {
1128                 // we have a function macro
1129                 macro.isFunction = true;
1130                 parseDefineArguments(&macro);
1131             } else {
1132                 macro.isFunction = false;
1133             }
1134             int start = index;
1135             until(PP_NEWLINE);
1136             macro.symbols.reserve(index - start - 1);
1137 
1138             // remove whitespace where there shouldn't be any:
1139             // Before and after the macro, after a # and around ##
1140             Token lastToken = HASH; // skip shitespace at the beginning
1141             for (int i = start; i < index - 1; ++i) {
1142                 Token token = symbols.at(i).token;
1143                 if (token == WHITESPACE) {
1144                     if (lastToken == PP_HASH || lastToken == HASH ||
1145                         lastToken == PP_HASHHASH ||
1146                         lastToken == WHITESPACE)
1147                         continue;
1148                 } else if (token == PP_HASHHASH) {
1149                     if (!macro.symbols.isEmpty() &&
1150                         lastToken == WHITESPACE)
1151                         macro.symbols.pop_back();
1152                 }
1153                 macro.symbols.append(symbols.at(i));
1154                 lastToken = token;
1155             }
1156             // remove trailing whitespace
1157             while (!macro.symbols.isEmpty() &&
1158                    (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1159                 macro.symbols.pop_back();
1160 
1161             if (!macro.symbols.isEmpty()) {
1162                 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1163                     macro.symbols.constLast().token == PP_HASHHASH) {
1164                     error("'##' cannot appear at either end of a macro expansion");
1165                 }
1166             }
1167             macros.insert(name, macro);
1168             continue;
1169         }
1170         case PP_UNDEF: {
1171             next();
1172             QByteArray name = lexem();
1173             until(PP_NEWLINE);
1174             macros.remove(name);
1175             continue;
1176         }
1177         case PP_IDENTIFIER: {
1178             // substitute macros
1179             macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true);
1180             continue;
1181         }
1182         case PP_HASH:
1183             until(PP_NEWLINE);
1184             continue; // skip unknown preprocessor statement
1185         case PP_IFDEF:
1186         case PP_IFNDEF:
1187         case PP_IF:
1188             while (!evaluateCondition()) {
1189                 if (!skipBranch())
1190                     break;
1191                 if (test(PP_ELIF)) {
1192                 } else {
1193                     until(PP_NEWLINE);
1194                     break;
1195                 }
1196             }
1197             continue;
1198         case PP_ELIF:
1199         case PP_ELSE:
1200             skipUntilEndif();
1201             Q_FALLTHROUGH();
1202         case PP_ENDIF:
1203             until(PP_NEWLINE);
1204             continue;
1205         case PP_NEWLINE:
1206             continue;
1207         case SIGNALS:
1208         case SLOTS: {
1209             Symbol sym = symbol();
1210             if (macros.contains("QT_NO_KEYWORDS"))
1211                 sym.token = IDENTIFIER;
1212             else
1213                 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1214             preprocessed += sym;
1215         } continue;
1216         default:
1217             break;
1218         }
1219         preprocessed += symbol();
1220     }
1221 
1222     currentFilenames.pop();
1223 }
1224 
preprocessed(const QByteArray & filename,QFile * file)1225 Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1226 {
1227     QByteArray input = readOrMapFile(file);
1228 
1229     if (input.isEmpty())
1230         return symbols;
1231 
1232     // phase 1: get rid of backslash-newlines
1233     input = cleaned(input);
1234 
1235     // phase 2: tokenize for the preprocessor
1236     index = 0;
1237     symbols = tokenize(input);
1238 
1239 #if 0
1240     for (int j = 0; j < symbols.size(); ++j)
1241         fprintf(stderr, "line %d: %s(%s)\n",
1242                symbols[j].lineNum,
1243                symbols[j].lexem().constData(),
1244                tokenTypeName(symbols[j].token));
1245 #endif
1246 
1247     // phase 3: preprocess conditions and substitute macros
1248     Symbols result;
1249     // Preallocate some space to speed up the code below.
1250     // The magic value was found by logging the final size
1251     // and calculating an average when running moc over FOSS projects.
1252     result.reserve(file->size() / 300000);
1253     preprocess(filename, result);
1254     mergeStringLiterals(&result);
1255 
1256 #if 0
1257     for (int j = 0; j < result.size(); ++j)
1258         fprintf(stderr, "line %d: %s(%s)\n",
1259                result[j].lineNum,
1260                result[j].lexem().constData(),
1261                tokenTypeName(result[j].token));
1262 #endif
1263 
1264     return result;
1265 }
1266 
parseDefineArguments(Macro * m)1267 void Preprocessor::parseDefineArguments(Macro *m)
1268 {
1269     Symbols arguments;
1270     while (hasNext()) {
1271         while (test(PP_WHITESPACE)) {}
1272         Token t = next();
1273         if (t == PP_RPAREN)
1274             break;
1275         if (t != PP_IDENTIFIER) {
1276             QByteArray l = lexem();
1277             if (l == "...") {
1278                 m->isVariadic = true;
1279                 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1280                 while (test(PP_WHITESPACE)) {}
1281                 if (!test(PP_RPAREN))
1282                     error("missing ')' in macro argument list");
1283                 break;
1284             } else if (!is_identifier(l.constData(), l.length())) {
1285                 error("Unexpected character in macro argument list.");
1286             }
1287         }
1288 
1289         Symbol arg = symbol();
1290         if (arguments.contains(arg))
1291             error("Duplicate macro parameter.");
1292         arguments += symbol();
1293 
1294         while (test(PP_WHITESPACE)) {}
1295         t = next();
1296         if (t == PP_RPAREN)
1297             break;
1298         if (t == PP_COMMA)
1299             continue;
1300         if (lexem() == "...") {
1301             //GCC extension:    #define FOO(x, y...) x(y)
1302             // The last argument was already parsed. Just mark the macro as variadic.
1303             m->isVariadic = true;
1304             while (test(PP_WHITESPACE)) {}
1305             if (!test(PP_RPAREN))
1306                 error("missing ')' in macro argument list");
1307             break;
1308         }
1309         error("Unexpected character in macro argument list.");
1310     }
1311     m->arguments = arguments;
1312     while (test(PP_WHITESPACE)) {}
1313 }
1314 
until(Token t)1315 void Preprocessor::until(Token t)
1316 {
1317     while(hasNext() && next() != t)
1318         ;
1319 }
1320 
1321 QT_END_NAMESPACE
1322