1 /*
2  * simplecpp - A simple and high-fidelity C/C++ preprocessor library
3  * Copyright (C) 2016 Daniel Marjamäki.
4  *
5  * This library is free software: you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation, either
8  * version 3 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
20 #define SIMPLECPP_WINDOWS
21 #define NOMINMAX
22 #endif
23 #include "simplecpp.h"
24 
25 #include <algorithm>
26 #include <climits>
27 #include <cstdlib>
28 #include <cstring>
29 #include <exception>
30 #include <fstream>
31 #include <iostream>
32 #include <limits>
33 #include <sstream>
34 #include <stack>
35 #include <stdexcept>
36 #include <utility>
37 
38 #ifdef SIMPLECPP_WINDOWS
39 #include <windows.h>
40 #undef ERROR
41 #undef TRUE
42 #endif
43 
isHex(const std::string & s)44 static bool isHex(const std::string &s)
45 {
46     return s.size()>2 && (s.compare(0,2,"0x")==0 || s.compare(0,2,"0X")==0);
47 }
48 
isOct(const std::string & s)49 static bool isOct(const std::string &s)
50 {
51     return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8');
52 }
53 
54 
55 static const simplecpp::TokenString DEFINE("define");
56 static const simplecpp::TokenString UNDEF("undef");
57 
58 static const simplecpp::TokenString INCLUDE("include");
59 
60 static const simplecpp::TokenString ERROR("error");
61 static const simplecpp::TokenString WARNING("warning");
62 
63 static const simplecpp::TokenString IF("if");
64 static const simplecpp::TokenString IFDEF("ifdef");
65 static const simplecpp::TokenString IFNDEF("ifndef");
66 static const simplecpp::TokenString DEFINED("defined");
67 static const simplecpp::TokenString ELSE("else");
68 static const simplecpp::TokenString ELIF("elif");
69 static const simplecpp::TokenString ENDIF("endif");
70 
71 static const simplecpp::TokenString PRAGMA("pragma");
72 static const simplecpp::TokenString ONCE("once");
73 
74 static const simplecpp::TokenString HAS_INCLUDE("__has_include");
75 
toString(T t)76 template<class T> static std::string toString(T t)
77 {
78     std::ostringstream ostr;
79     ostr << t;
80     return ostr.str();
81 }
82 
stringToLL(const std::string & s)83 static long long stringToLL(const std::string &s)
84 {
85     long long ret;
86     const bool hex = isHex(s);
87     const bool oct = isOct(s);
88     std::istringstream istr(hex ? s.substr(2) : oct ? s.substr(1) : s);
89     if (hex)
90         istr >> std::hex;
91     else if (oct)
92         istr >> std::oct;
93     istr >> ret;
94     return ret;
95 }
96 
stringToULL(const std::string & s)97 static unsigned long long stringToULL(const std::string &s)
98 {
99     unsigned long long ret;
100     const bool hex = isHex(s);
101     const bool oct = isOct(s);
102     std::istringstream istr(hex ? s.substr(2) : oct ? s.substr(1) : s);
103     if (hex)
104         istr >> std::hex;
105     else if (oct)
106         istr >> std::oct;
107     istr >> ret;
108     return ret;
109 }
110 
startsWith(const std::string & str,const std::string & s)111 static bool startsWith(const std::string &str, const std::string &s)
112 {
113     return (str.size() >= s.size() && str.compare(0, s.size(), s) == 0);
114 }
115 
endsWith(const std::string & s,const std::string & e)116 static bool endsWith(const std::string &s, const std::string &e)
117 {
118     return (s.size() >= e.size() && s.compare(s.size() - e.size(), e.size(), e) == 0);
119 }
120 
sameline(const simplecpp::Token * tok1,const simplecpp::Token * tok2)121 static bool sameline(const simplecpp::Token *tok1, const simplecpp::Token *tok2)
122 {
123     return tok1 && tok2 && tok1->location.sameline(tok2->location);
124 }
125 
isAlternativeBinaryOp(const simplecpp::Token * tok,const std::string & alt)126 static bool isAlternativeBinaryOp(const simplecpp::Token *tok, const std::string &alt)
127 {
128     return (tok->name &&
129             tok->str() == alt &&
130             tok->previous &&
131             tok->next &&
132             (tok->previous->number || tok->previous->name || tok->previous->op == ')') &&
133             (tok->next->number || tok->next->name || tok->next->op == '('));
134 }
135 
isAlternativeUnaryOp(const simplecpp::Token * tok,const std::string & alt)136 static bool isAlternativeUnaryOp(const simplecpp::Token *tok, const std::string &alt)
137 {
138     return ((tok->name && tok->str() == alt) &&
139             (!tok->previous || tok->previous->op == '(') &&
140             (tok->next && (tok->next->name || tok->next->number)));
141 }
142 
replaceAll(std::string s,const std::string & from,const std::string & to)143 static std::string replaceAll(std::string s, const std::string& from, const std::string& to)
144 {
145     for (size_t pos = s.find(from); pos != std::string::npos; pos = s.find(from, pos + to.size()))
146         s.replace(pos, from.size(), to);
147     return s;
148 }
149 
150 const std::string simplecpp::Location::emptyFileName;
151 
adjust(const std::string & str)152 void simplecpp::Location::adjust(const std::string &str)
153 {
154     if (str.find_first_of("\r\n") == std::string::npos) {
155         col += str.size();
156         return;
157     }
158 
159     for (std::size_t i = 0U; i < str.size(); ++i) {
160         col++;
161         if (str[i] == '\n' || str[i] == '\r') {
162             col = 1;
163             line++;
164             if (str[i] == '\r' && (i+1)<str.size() && str[i+1]=='\n')
165                 ++i;
166         }
167     }
168 }
169 
isOneOf(const char ops[]) const170 bool simplecpp::Token::isOneOf(const char ops[]) const
171 {
172     return (op != '\0') && (std::strchr(ops, op) != NULL);
173 }
174 
startsWithOneOf(const char c[]) const175 bool simplecpp::Token::startsWithOneOf(const char c[]) const
176 {
177     return std::strchr(c, string[0]) != NULL;
178 }
179 
endsWithOneOf(const char c[]) const180 bool simplecpp::Token::endsWithOneOf(const char c[]) const
181 {
182     return std::strchr(c, string[string.size() - 1U]) != NULL;
183 }
184 
printAll() const185 void simplecpp::Token::printAll() const
186 {
187     const Token *tok = this;
188     while (tok->previous)
189         tok = tok->previous;
190     for (; tok; tok = tok->next) {
191         if (tok->previous) {
192             std::cout << (sameline(tok, tok->previous) ? ' ' : '\n');
193         }
194         std::cout << tok->str();
195     }
196     std::cout << std::endl;
197 }
198 
printOut() const199 void simplecpp::Token::printOut() const
200 {
201     for (const Token *tok = this; tok; tok = tok->next) {
202         if (tok != this) {
203             std::cout << (sameline(tok, tok->previous) ? ' ' : '\n');
204         }
205         std::cout << tok->str();
206     }
207     std::cout << std::endl;
208 }
209 
TokenList(std::vector<std::string> & filenames)210 simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(NULL), backToken(NULL), files(filenames) {}
211 
TokenList(std::istream & istr,std::vector<std::string> & filenames,const std::string & filename,OutputList * outputList)212 simplecpp::TokenList::TokenList(std::istream &istr, std::vector<std::string> &filenames, const std::string &filename, OutputList *outputList)
213     : frontToken(NULL), backToken(NULL), files(filenames)
214 {
215     readfile(istr,filename,outputList);
216 }
217 
TokenList(const TokenList & other)218 simplecpp::TokenList::TokenList(const TokenList &other) : frontToken(NULL), backToken(NULL), files(other.files)
219 {
220     *this = other;
221 }
222 
223 #if __cplusplus >= 201103L
TokenList(TokenList && other)224 simplecpp::TokenList::TokenList(TokenList &&other) : frontToken(NULL), backToken(NULL), files(other.files)
225 {
226     *this = std::move(other);
227 }
228 #endif
229 
~TokenList()230 simplecpp::TokenList::~TokenList()
231 {
232     clear();
233 }
234 
operator =(const TokenList & other)235 simplecpp::TokenList &simplecpp::TokenList::operator=(const TokenList &other)
236 {
237     if (this != &other) {
238         clear();
239         for (const Token *tok = other.cfront(); tok; tok = tok->next)
240             push_back(new Token(*tok));
241         sizeOfType = other.sizeOfType;
242     }
243     return *this;
244 }
245 
246 #if __cplusplus >= 201103L
operator =(TokenList && other)247 simplecpp::TokenList &simplecpp::TokenList::operator=(TokenList &&other)
248 {
249     if (this != &other) {
250         clear();
251         backToken = other.backToken;
252         other.backToken = NULL;
253         frontToken = other.frontToken;
254         other.frontToken = NULL;
255         sizeOfType = std::move(other.sizeOfType);
256     }
257     return *this;
258 }
259 #endif
260 
clear()261 void simplecpp::TokenList::clear()
262 {
263     backToken = NULL;
264     while (frontToken) {
265         Token *next = frontToken->next;
266         delete frontToken;
267         frontToken = next;
268     }
269     sizeOfType.clear();
270 }
271 
push_back(Token * tok)272 void simplecpp::TokenList::push_back(Token *tok)
273 {
274     if (!frontToken)
275         frontToken = tok;
276     else
277         backToken->next = tok;
278     tok->previous = backToken;
279     backToken = tok;
280 }
281 
dump() const282 void simplecpp::TokenList::dump() const
283 {
284     std::cout << stringify() << std::endl;
285 }
286 
stringify() const287 std::string simplecpp::TokenList::stringify() const
288 {
289     std::ostringstream ret;
290     Location loc(files);
291     for (const Token *tok = cfront(); tok; tok = tok->next) {
292         if (tok->location.line < loc.line || tok->location.fileIndex != loc.fileIndex) {
293             ret << "\n#line " << tok->location.line << " \"" << tok->location.file() << "\"\n";
294             loc = tok->location;
295         }
296 
297         while (tok->location.line > loc.line) {
298             ret << '\n';
299             loc.line++;
300         }
301 
302         if (sameline(tok->previous, tok))
303             ret << ' ';
304 
305         ret << tok->str();
306 
307         loc.adjust(tok->str());
308     }
309 
310     return ret.str();
311 }
312 
readChar(std::istream & istr,unsigned int bom)313 static unsigned char readChar(std::istream &istr, unsigned int bom)
314 {
315     unsigned char ch = (unsigned char)istr.get();
316 
317     // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
318     // character is non-ASCII character then replace it with 0xff
319     if (bom == 0xfeff || bom == 0xfffe) {
320         const unsigned char ch2 = (unsigned char)istr.get();
321         const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
322         ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16);
323     }
324 
325     // Handling of newlines..
326     if (ch == '\r') {
327         ch = '\n';
328         if (bom == 0 && (char)istr.peek() == '\n')
329             (void)istr.get();
330         else if (bom == 0xfeff || bom == 0xfffe) {
331             int c1 = istr.get();
332             int c2 = istr.get();
333             int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
334             if (ch16 != '\n') {
335                 istr.unget();
336                 istr.unget();
337             }
338         }
339     }
340 
341     return ch;
342 }
343 
peekChar(std::istream & istr,unsigned int bom)344 static unsigned char peekChar(std::istream &istr, unsigned int bom)
345 {
346     unsigned char ch = (unsigned char)istr.peek();
347 
348     // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
349     // character is non-ASCII character then replace it with 0xff
350     if (bom == 0xfeff || bom == 0xfffe) {
351         (void)istr.get();
352         const unsigned char ch2 = (unsigned char)istr.peek();
353         istr.unget();
354         const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
355         ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16);
356     }
357 
358     // Handling of newlines..
359     if (ch == '\r')
360         ch = '\n';
361 
362     return ch;
363 }
364 
ungetChar(std::istream & istr,unsigned int bom)365 static void ungetChar(std::istream &istr, unsigned int bom)
366 {
367     istr.unget();
368     if (bom == 0xfeff || bom == 0xfffe)
369         istr.unget();
370 }
371 
getAndSkipBOM(std::istream & istr)372 static unsigned short getAndSkipBOM(std::istream &istr)
373 {
374     const int ch1 = istr.peek();
375 
376     // The UTF-16 BOM is 0xfffe or 0xfeff.
377     if (ch1 >= 0xfe) {
378         unsigned short bom = ((unsigned char)istr.get() << 8);
379         if (istr.peek() >= 0xfe)
380             return bom | (unsigned char)istr.get();
381         istr.unget();
382         return 0;
383     }
384 
385     // Skip UTF-8 BOM 0xefbbbf
386     if (ch1 == 0xef) {
387         (void)istr.get();
388         if (istr.get() == 0xbb && istr.peek() == 0xbf) {
389             (void)istr.get();
390         } else {
391             istr.unget();
392             istr.unget();
393         }
394     }
395 
396     return 0;
397 }
398 
isNameChar(unsigned char ch)399 static bool isNameChar(unsigned char ch)
400 {
401     return std::isalnum(ch) || ch == '_' || ch == '$';
402 }
403 
escapeString(const std::string & str)404 static std::string escapeString(const std::string &str)
405 {
406     std::ostringstream ostr;
407     ostr << '\"';
408     for (std::size_t i = 1U; i < str.size() - 1; ++i) {
409         char c = str[i];
410         if (c == '\\' || c == '\"' || c == '\'')
411             ostr << '\\';
412         ostr << c;
413     }
414     ostr << '\"';
415     return ostr.str();
416 }
417 
portabilityBackslash(simplecpp::OutputList * outputList,const std::vector<std::string> & files,const simplecpp::Location & location)418 static void portabilityBackslash(simplecpp::OutputList *outputList, const std::vector<std::string> &files, const simplecpp::Location &location)
419 {
420     if (!outputList)
421         return;
422     simplecpp::Output err(files);
423     err.type = simplecpp::Output::PORTABILITY_BACKSLASH;
424     err.location = location;
425     err.msg = "Combination 'backslash space newline' is not portable.";
426     outputList->push_back(err);
427 }
428 
isStringLiteralPrefix(const std::string & str)429 static bool isStringLiteralPrefix(const std::string &str)
430 {
431     return str == "u" || str == "U" || str == "L" || str == "u8" ||
432            str == "R" || str == "uR" || str == "UR" || str == "LR" || str == "u8R";
433 }
434 
lineDirective(unsigned int fileIndex,unsigned int line,Location * location)435 void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int line, Location *location)
436 {
437     if (fileIndex != location->fileIndex || line >= location->line) {
438         location->fileIndex = fileIndex;
439         location->line = line;
440         return;
441     }
442 
443     if (line + 2 >= location->line) {
444         location->line = line;
445         while (cback()->op != '#')
446             deleteToken(back());
447         deleteToken(back());
448         return;
449     }
450 }
451 
readfile(std::istream & istr,const std::string & filename,OutputList * outputList)452 void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filename, OutputList *outputList)
453 {
454     std::stack<simplecpp::Location> loc;
455 
456     unsigned int multiline = 0U;
457 
458     const Token *oldLastToken = NULL;
459 
460     const unsigned short bom = getAndSkipBOM(istr);
461 
462     Location location(files);
463     location.fileIndex = fileIndex(filename);
464     location.line = 1U;
465     location.col  = 1U;
466     while (istr.good()) {
467         unsigned char ch = readChar(istr,bom);
468         if (!istr.good())
469             break;
470         if (ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r')
471             ch = ' ';
472 
473         if (ch >= 0x80) {
474             if (outputList) {
475                 simplecpp::Output err(files);
476                 err.type = simplecpp::Output::UNHANDLED_CHAR_ERROR;
477                 err.location = location;
478                 std::ostringstream s;
479                 s << (int)ch;
480                 err.msg = "The code contains unhandled character(s) (character code=" + s.str() + "). Neither unicode nor extended ascii is supported.";
481                 outputList->push_back(err);
482             }
483             clear();
484             return;
485         }
486 
487         if (ch == '\n') {
488             if (cback() && cback()->op == '\\') {
489                 if (location.col > cback()->location.col + 1U)
490                     portabilityBackslash(outputList, files, cback()->location);
491                 ++multiline;
492                 deleteToken(back());
493             } else {
494                 location.line += multiline + 1;
495                 multiline = 0U;
496             }
497             if (!multiline)
498                 location.col = 1;
499 
500             if (oldLastToken != cback()) {
501                 oldLastToken = cback();
502                 const std::string lastline(lastLine());
503                 if (lastline == "# file %str%") {
504                     loc.push(location);
505                     location.fileIndex = fileIndex(cback()->str().substr(1U, cback()->str().size() - 2U));
506                     location.line = 1U;
507                 } else if (lastline == "# line %num%") {
508                     lineDirective(location.fileIndex, std::atol(cback()->str().c_str()), &location);
509                 } else if (lastline == "# %num% %str%" || lastline == "# line %num% %str%") {
510                     lineDirective(fileIndex(replaceAll(cback()->str().substr(1U, cback()->str().size() - 2U),"\\\\","\\")),
511                                   std::atol(cback()->previous->str().c_str()), &location);
512                 }
513                 // #endfile
514                 else if (lastline == "# endfile" && !loc.empty()) {
515                     location = loc.top();
516                     loc.pop();
517                 }
518             }
519 
520             continue;
521         }
522 
523         if (std::isspace(ch)) {
524             location.col++;
525             continue;
526         }
527 
528         TokenString currentToken;
529 
530         if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#' && (lastLine() == "# error" || lastLine() == "# warning")) {
531             char prev = ' ';
532             while (istr.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) {
533                 currentToken += ch;
534                 prev = ch;
535                 ch = readChar(istr, bom);
536             }
537             ungetChar(istr, bom);
538             push_back(new Token(currentToken, location));
539             location.adjust(currentToken);
540             continue;
541         }
542 
543         // number or name
544         if (isNameChar(ch)) {
545             const bool num = std::isdigit(ch);
546             while (istr.good() && isNameChar(ch)) {
547                 currentToken += ch;
548                 ch = readChar(istr,bom);
549                 if (num && ch=='\'' && isNameChar(peekChar(istr,bom)))
550                     ch = readChar(istr,bom);
551             }
552 
553             ungetChar(istr,bom);
554         }
555 
556         // comment
557         else if (ch == '/' && peekChar(istr,bom) == '/') {
558             while (istr.good() && ch != '\r' && ch != '\n') {
559                 currentToken += ch;
560                 ch = readChar(istr, bom);
561             }
562             const std::string::size_type pos = currentToken.find_last_not_of(" \t");
563             if (pos < currentToken.size() - 1U && currentToken[pos] == '\\')
564                 portabilityBackslash(outputList, files, location);
565             if (currentToken[currentToken.size() - 1U] == '\\') {
566                 ++multiline;
567                 currentToken.erase(currentToken.size() - 1U);
568             } else {
569                 ungetChar(istr, bom);
570             }
571         }
572 
573         // comment
574         else if (ch == '/' && peekChar(istr,bom) == '*') {
575             currentToken = "/*";
576             (void)readChar(istr,bom);
577             ch = readChar(istr,bom);
578             while (istr.good()) {
579                 currentToken += ch;
580                 if (currentToken.size() >= 4U && endsWith(currentToken, "*/"))
581                     break;
582                 ch = readChar(istr,bom);
583             }
584             // multiline..
585 
586             std::string::size_type pos = 0;
587             while ((pos = currentToken.find("\\\n",pos)) != std::string::npos) {
588                 currentToken.erase(pos,2);
589                 ++multiline;
590             }
591             if (multiline || startsWith(lastLine(10),"# ")) {
592                 pos = 0;
593                 while ((pos = currentToken.find('\n',pos)) != std::string::npos) {
594                     currentToken.erase(pos,1);
595                     ++multiline;
596                 }
597             }
598         }
599 
600         // string / char literal
601         else if (ch == '\"' || ch == '\'') {
602             std::string prefix;
603             if (cback() && cback()->name && isStringLiteralPrefix(cback()->str()) &&
604                 ((cback()->location.col + cback()->str().size()) == location.col) &&
605                 (cback()->location.line == location.line)) {
606                 prefix = cback()->str();
607             }
608             // C++11 raw string literal
609             if (ch == '\"' && !prefix.empty() && *cback()->str().rbegin() == 'R') {
610                 std::string delim;
611                 currentToken = ch;
612                 prefix.resize(prefix.size() - 1);
613                 ch = readChar(istr,bom);
614                 while (istr.good() && ch != '(' && ch != '\n') {
615                     delim += ch;
616                     ch = readChar(istr,bom);
617                 }
618                 if (!istr.good() || ch == '\n') {
619                     if (outputList) {
620                         Output err(files);
621                         err.type = Output::SYNTAX_ERROR;
622                         err.location = location;
623                         err.msg = "Invalid newline in raw string delimiter.";
624                         outputList->push_back(err);
625                     }
626                     return;
627                 }
628                 const std::string endOfRawString(')' + delim + currentToken);
629                 while (istr.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1))
630                     currentToken += readChar(istr,bom);
631                 if (!endsWith(currentToken, endOfRawString)) {
632                     if (outputList) {
633                         Output err(files);
634                         err.type = Output::SYNTAX_ERROR;
635                         err.location = location;
636                         err.msg = "Raw string missing terminating delimiter.";
637                         outputList->push_back(err);
638                     }
639                     return;
640                 }
641                 currentToken.erase(currentToken.size() - endOfRawString.size(), endOfRawString.size() - 1U);
642                 currentToken = escapeString(currentToken);
643                 currentToken.insert(0, prefix);
644                 back()->setstr(currentToken);
645                 location.adjust(currentToken);
646                 if (currentToken.find_first_of("\r\n") == std::string::npos)
647                     location.col += 2 + 2 * delim.size();
648                 else
649                     location.col += 1 + delim.size();
650 
651                 continue;
652             }
653 
654             currentToken = readUntil(istr,location,ch,ch,outputList,bom);
655             if (currentToken.size() < 2U)
656                 // Error is reported by readUntil()
657                 return;
658 
659             std::string s = currentToken;
660             std::string::size_type pos;
661             int newlines = 0;
662             while ((pos = s.find_first_of("\r\n")) != std::string::npos) {
663                 s.erase(pos,1);
664                 newlines++;
665             }
666 
667             if (prefix.empty())
668                 push_back(new Token(s, location)); // push string without newlines
669             else
670                 back()->setstr(prefix + s);
671 
672             if (newlines > 0 && lastLine().compare(0,9,"# define ") == 0) {
673                 multiline += newlines;
674                 location.adjust(s);
675             } else {
676                 location.adjust(currentToken);
677             }
678             continue;
679         }
680 
681         else {
682             currentToken += ch;
683         }
684 
685         if (currentToken == "<" && lastLine() == "# include") {
686             currentToken = readUntil(istr, location, '<', '>', outputList, bom);
687             if (currentToken.size() < 2U)
688                 return;
689         }
690 
691         push_back(new Token(currentToken, location));
692 
693         if (multiline)
694             location.col += currentToken.size();
695         else
696             location.adjust(currentToken);
697     }
698 
699     combineOperators();
700 }
701 
constFold()702 void simplecpp::TokenList::constFold()
703 {
704     while (cfront()) {
705         // goto last '('
706         Token *tok = back();
707         while (tok && tok->op != '(')
708             tok = tok->previous;
709 
710         // no '(', goto first token
711         if (!tok)
712             tok = front();
713 
714         // Constant fold expression
715         constFoldUnaryNotPosNeg(tok);
716         constFoldMulDivRem(tok);
717         constFoldAddSub(tok);
718         constFoldShift(tok);
719         constFoldComparison(tok);
720         constFoldBitwise(tok);
721         constFoldLogicalOp(tok);
722         constFoldQuestionOp(&tok);
723 
724         // If there is no '(' we are done with the constant folding
725         if (tok->op != '(')
726             break;
727 
728         if (!tok->next || !tok->next->next || tok->next->next->op != ')')
729             break;
730 
731         tok = tok->next;
732         deleteToken(tok->previous);
733         deleteToken(tok->next);
734     }
735 }
736 
isFloatSuffix(const simplecpp::Token * tok)737 static bool isFloatSuffix(const simplecpp::Token *tok)
738 {
739     if (!tok || tok->str().size() != 1U)
740         return false;
741     const char c = std::tolower(tok->str()[0]);
742     return c == 'f' || c == 'l';
743 }
744 
combineOperators()745 void simplecpp::TokenList::combineOperators()
746 {
747     std::stack<bool> executableScope;
748     executableScope.push(false);
749     for (Token *tok = front(); tok; tok = tok->next) {
750         if (tok->op == '{') {
751             if (executableScope.top()) {
752                 executableScope.push(true);
753                 continue;
754             }
755             const Token *prev = tok->previous;
756             while (prev && prev->isOneOf(";{}()"))
757                 prev = prev->previous;
758             executableScope.push(prev && prev->op == ')');
759             continue;
760         }
761         if (tok->op == '}') {
762             if (executableScope.size() > 1)
763                 executableScope.pop();
764             continue;
765         }
766 
767         if (tok->op == '.') {
768             // ellipsis ...
769             if (tok->next && tok->next->op == '.' && tok->next->location.col == (tok->location.col + 1) &&
770                 tok->next->next && tok->next->next->op == '.' && tok->next->next->location.col == (tok->location.col + 2)) {
771                 tok->setstr("...");
772                 deleteToken(tok->next);
773                 deleteToken(tok->next);
774                 continue;
775             }
776             // float literals..
777             if (tok->previous && tok->previous->number) {
778                 tok->setstr(tok->previous->str() + '.');
779                 deleteToken(tok->previous);
780                 if (isFloatSuffix(tok->next) || (tok->next && tok->next->startsWithOneOf("AaBbCcDdEeFfPp"))) {
781                     tok->setstr(tok->str() + tok->next->str());
782                     deleteToken(tok->next);
783                 }
784             }
785             if (tok->next && tok->next->number) {
786                 tok->setstr(tok->str() + tok->next->str());
787                 deleteToken(tok->next);
788             }
789         }
790         // match: [0-9.]+E [+-] [0-9]+
791         const char lastChar = tok->str()[tok->str().size() - 1];
792         if (tok->number && !isOct(tok->str()) &&
793             ((!isHex(tok->str()) && (lastChar == 'E' || lastChar == 'e')) ||
794              (isHex(tok->str()) && (lastChar == 'P' || lastChar == 'p'))) &&
795             tok->next && tok->next->isOneOf("+-") && tok->next->next && tok->next->next->number) {
796             tok->setstr(tok->str() + tok->next->op + tok->next->next->str());
797             deleteToken(tok->next);
798             deleteToken(tok->next);
799         }
800 
801         if (tok->op == '\0' || !tok->next || tok->next->op == '\0')
802             continue;
803         if (!sameline(tok,tok->next))
804             continue;
805         if (tok->location.col + 1U != tok->next->location.col)
806             continue;
807 
808         if (tok->next->op == '=' && tok->isOneOf("=!<>+-*/%&|^")) {
809             if (tok->op == '&' && !executableScope.top()) {
810                 // don't combine &= if it is a anonymous reference parameter with default value:
811                 // void f(x&=2)
812                 int indentlevel = 0;
813                 const Token *start = tok;
814                 while (indentlevel >= 0 && start) {
815                     if (start->op == ')')
816                         ++indentlevel;
817                     else if (start->op == '(')
818                         --indentlevel;
819                     else if (start->isOneOf(";{}"))
820                         break;
821                     start = start->previous;
822                 }
823                 if (indentlevel == -1 && start) {
824                     const Token *ftok = start;
825                     bool isFuncDecl = ftok->name;
826                     while (isFuncDecl) {
827                         if (!start->name && start->str() != "::" && start->op != '*' && start->op != '&')
828                             isFuncDecl = false;
829                         if (!start->previous)
830                             break;
831                         if (start->previous->isOneOf(";{}:"))
832                             break;
833                         start = start->previous;
834                     }
835                     isFuncDecl &= start != ftok && start->name;
836                     if (isFuncDecl) {
837                         // TODO: we could loop through the parameters here and check if they are correct.
838                         continue;
839                     }
840                 }
841             }
842             tok->setstr(tok->str() + "=");
843             deleteToken(tok->next);
844         } else if ((tok->op == '|' || tok->op == '&') && tok->op == tok->next->op) {
845             tok->setstr(tok->str() + tok->next->str());
846             deleteToken(tok->next);
847         } else if (tok->op == ':' && tok->next->op == ':') {
848             tok->setstr(tok->str() + tok->next->str());
849             deleteToken(tok->next);
850         } else if (tok->op == '-' && tok->next->op == '>') {
851             tok->setstr(tok->str() + tok->next->str());
852             deleteToken(tok->next);
853         } else if ((tok->op == '<' || tok->op == '>') && tok->op == tok->next->op) {
854             tok->setstr(tok->str() + tok->next->str());
855             deleteToken(tok->next);
856             if (tok->next && tok->next->op == '=' && tok->next->next && tok->next->next->op != '=') {
857                 tok->setstr(tok->str() + tok->next->str());
858                 deleteToken(tok->next);
859             }
860         } else if ((tok->op == '+' || tok->op == '-') && tok->op == tok->next->op) {
861             if (tok->location.col + 1U != tok->next->location.col)
862                 continue;
863             if (tok->previous && tok->previous->number)
864                 continue;
865             if (tok->next->next && tok->next->next->number)
866                 continue;
867             tok->setstr(tok->str() + tok->next->str());
868             deleteToken(tok->next);
869         }
870     }
871 }
872 
873 static const std::string COMPL("compl");
874 static const std::string NOT("not");
constFoldUnaryNotPosNeg(simplecpp::Token * tok)875 void simplecpp::TokenList::constFoldUnaryNotPosNeg(simplecpp::Token *tok)
876 {
877     for (; tok && tok->op != ')'; tok = tok->next) {
878         // "not" might be !
879         if (isAlternativeUnaryOp(tok, NOT))
880             tok->op = '!';
881         // "compl" might be ~
882         else if (isAlternativeUnaryOp(tok, COMPL))
883             tok->op = '~';
884 
885         if (tok->op == '!' && tok->next && tok->next->number) {
886             tok->setstr(tok->next->str() == "0" ? "1" : "0");
887             deleteToken(tok->next);
888         } else if (tok->op == '~' && tok->next && tok->next->number) {
889             tok->setstr(toString(~stringToLL(tok->next->str())));
890             deleteToken(tok->next);
891         } else {
892             if (tok->previous && (tok->previous->number || tok->previous->name))
893                 continue;
894             if (!tok->next || !tok->next->number)
895                 continue;
896             switch (tok->op) {
897             case '+':
898                 tok->setstr(tok->next->str());
899                 deleteToken(tok->next);
900                 break;
901             case '-':
902                 tok->setstr(tok->op + tok->next->str());
903                 deleteToken(tok->next);
904                 break;
905             }
906         }
907     }
908 }
909 
constFoldMulDivRem(Token * tok)910 void simplecpp::TokenList::constFoldMulDivRem(Token *tok)
911 {
912     for (; tok && tok->op != ')'; tok = tok->next) {
913         if (!tok->previous || !tok->previous->number)
914             continue;
915         if (!tok->next || !tok->next->number)
916             continue;
917 
918         long long result;
919         if (tok->op == '*')
920             result = (stringToLL(tok->previous->str()) * stringToLL(tok->next->str()));
921         else if (tok->op == '/' || tok->op == '%') {
922             long long rhs = stringToLL(tok->next->str());
923             if (rhs == 0)
924                 throw std::overflow_error("division/modulo by zero");
925             long long lhs = stringToLL(tok->previous->str());
926             if (rhs == -1 && lhs == std::numeric_limits<long long>::min())
927                 throw std::overflow_error("division overflow");
928             if (tok->op == '/')
929                 result = (lhs / rhs);
930             else
931                 result = (lhs % rhs);
932         } else
933             continue;
934 
935         tok = tok->previous;
936         tok->setstr(toString(result));
937         deleteToken(tok->next);
938         deleteToken(tok->next);
939     }
940 }
941 
constFoldAddSub(Token * tok)942 void simplecpp::TokenList::constFoldAddSub(Token *tok)
943 {
944     for (; tok && tok->op != ')'; tok = tok->next) {
945         if (!tok->previous || !tok->previous->number)
946             continue;
947         if (!tok->next || !tok->next->number)
948             continue;
949 
950         long long result;
951         if (tok->op == '+')
952             result = stringToLL(tok->previous->str()) + stringToLL(tok->next->str());
953         else if (tok->op == '-')
954             result = stringToLL(tok->previous->str()) - stringToLL(tok->next->str());
955         else
956             continue;
957 
958         tok = tok->previous;
959         tok->setstr(toString(result));
960         deleteToken(tok->next);
961         deleteToken(tok->next);
962     }
963 }
964 
constFoldShift(Token * tok)965 void simplecpp::TokenList::constFoldShift(Token *tok)
966 {
967     for (; tok && tok->op != ')'; tok = tok->next) {
968         if (!tok->previous || !tok->previous->number)
969             continue;
970         if (!tok->next || !tok->next->number)
971             continue;
972 
973         long long result;
974         if (tok->str() == "<<")
975             result = stringToLL(tok->previous->str()) << stringToLL(tok->next->str());
976         else if (tok->str() == ">>")
977             result = stringToLL(tok->previous->str()) >> stringToLL(tok->next->str());
978         else
979             continue;
980 
981         tok = tok->previous;
982         tok->setstr(toString(result));
983         deleteToken(tok->next);
984         deleteToken(tok->next);
985     }
986 }
987 
988 static const std::string NOTEQ("not_eq");
constFoldComparison(Token * tok)989 void simplecpp::TokenList::constFoldComparison(Token *tok)
990 {
991     for (; tok && tok->op != ')'; tok = tok->next) {
992         if (isAlternativeBinaryOp(tok,NOTEQ))
993             tok->setstr("!=");
994 
995         if (!tok->startsWithOneOf("<>=!"))
996             continue;
997         if (!tok->previous || !tok->previous->number)
998             continue;
999         if (!tok->next || !tok->next->number)
1000             continue;
1001 
1002         int result;
1003         if (tok->str() == "==")
1004             result = (stringToLL(tok->previous->str()) == stringToLL(tok->next->str()));
1005         else if (tok->str() == "!=")
1006             result = (stringToLL(tok->previous->str()) != stringToLL(tok->next->str()));
1007         else if (tok->str() == ">")
1008             result = (stringToLL(tok->previous->str()) > stringToLL(tok->next->str()));
1009         else if (tok->str() == ">=")
1010             result = (stringToLL(tok->previous->str()) >= stringToLL(tok->next->str()));
1011         else if (tok->str() == "<")
1012             result = (stringToLL(tok->previous->str()) < stringToLL(tok->next->str()));
1013         else if (tok->str() == "<=")
1014             result = (stringToLL(tok->previous->str()) <= stringToLL(tok->next->str()));
1015         else
1016             continue;
1017 
1018         tok = tok->previous;
1019         tok->setstr(toString(result));
1020         deleteToken(tok->next);
1021         deleteToken(tok->next);
1022     }
1023 }
1024 
1025 static const std::string BITAND("bitand");
1026 static const std::string BITOR("bitor");
1027 static const std::string XOR("xor");
constFoldBitwise(Token * tok)1028 void simplecpp::TokenList::constFoldBitwise(Token *tok)
1029 {
1030     Token * const tok1 = tok;
1031     for (const char *op = "&^|"; *op; op++) {
1032         const std::string* alternativeOp;
1033         if (*op == '&')
1034             alternativeOp = &BITAND;
1035         else if (*op == '|')
1036             alternativeOp = &BITOR;
1037         else
1038             alternativeOp = &XOR;
1039         for (tok = tok1; tok && tok->op != ')'; tok = tok->next) {
1040             if (tok->op != *op && !isAlternativeBinaryOp(tok, *alternativeOp))
1041                 continue;
1042             if (!tok->previous || !tok->previous->number)
1043                 continue;
1044             if (!tok->next || !tok->next->number)
1045                 continue;
1046             long long result;
1047             if (*op == '&')
1048                 result = (stringToLL(tok->previous->str()) & stringToLL(tok->next->str()));
1049             else if (*op == '^')
1050                 result = (stringToLL(tok->previous->str()) ^ stringToLL(tok->next->str()));
1051             else /*if (*op == '|')*/
1052                 result = (stringToLL(tok->previous->str()) | stringToLL(tok->next->str()));
1053             tok = tok->previous;
1054             tok->setstr(toString(result));
1055             deleteToken(tok->next);
1056             deleteToken(tok->next);
1057         }
1058     }
1059 }
1060 
1061 static const std::string AND("and");
1062 static const std::string OR("or");
constFoldLogicalOp(Token * tok)1063 void simplecpp::TokenList::constFoldLogicalOp(Token *tok)
1064 {
1065     for (; tok && tok->op != ')'; tok = tok->next) {
1066         if (tok->name) {
1067             if (isAlternativeBinaryOp(tok,AND))
1068                 tok->setstr("&&");
1069             else if (isAlternativeBinaryOp(tok,OR))
1070                 tok->setstr("||");
1071         }
1072         if (tok->str() != "&&" && tok->str() != "||")
1073             continue;
1074         if (!tok->previous || !tok->previous->number)
1075             continue;
1076         if (!tok->next || !tok->next->number)
1077             continue;
1078 
1079         int result;
1080         if (tok->str() == "||")
1081             result = (stringToLL(tok->previous->str()) || stringToLL(tok->next->str()));
1082         else /*if (tok->str() == "&&")*/
1083             result = (stringToLL(tok->previous->str()) && stringToLL(tok->next->str()));
1084 
1085         tok = tok->previous;
1086         tok->setstr(toString(result));
1087         deleteToken(tok->next);
1088         deleteToken(tok->next);
1089     }
1090 }
1091 
constFoldQuestionOp(Token ** tok1)1092 void simplecpp::TokenList::constFoldQuestionOp(Token **tok1)
1093 {
1094     bool gotoTok1 = false;
1095     for (Token *tok = *tok1; tok && tok->op != ')'; tok =  gotoTok1 ? *tok1 : tok->next) {
1096         gotoTok1 = false;
1097         if (tok->str() != "?")
1098             continue;
1099         if (!tok->previous || !tok->next || !tok->next->next)
1100             throw std::runtime_error("invalid expression");
1101         if (!tok->previous->number)
1102             continue;
1103         if (tok->next->next->op != ':')
1104             continue;
1105         Token * const condTok = tok->previous;
1106         Token * const trueTok = tok->next;
1107         Token * const falseTok = trueTok->next->next;
1108         if (!falseTok)
1109             throw std::runtime_error("invalid expression");
1110         if (condTok == *tok1)
1111             *tok1 = (condTok->str() != "0" ? trueTok : falseTok);
1112         deleteToken(condTok->next); // ?
1113         deleteToken(trueTok->next); // :
1114         deleteToken(condTok->str() == "0" ? trueTok : falseTok);
1115         deleteToken(condTok);
1116         gotoTok1 = true;
1117     }
1118 }
1119 
removeComments()1120 void simplecpp::TokenList::removeComments()
1121 {
1122     Token *tok = frontToken;
1123     while (tok) {
1124         Token *tok1 = tok;
1125         tok = tok->next;
1126         if (tok1->comment)
1127             deleteToken(tok1);
1128     }
1129 }
1130 
readUntil(std::istream & istr,const Location & location,const char start,const char end,OutputList * outputList,unsigned int bom)1131 std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom)
1132 {
1133     std::string ret;
1134     ret += start;
1135 
1136     bool backslash = false;
1137     char ch = 0;
1138     while (ch != end && ch != '\r' && ch != '\n' && istr.good()) {
1139         ch = readChar(istr, bom);
1140         if (backslash && ch == '\n') {
1141             ch = 0;
1142             backslash = false;
1143             continue;
1144         }
1145         backslash = false;
1146         ret += ch;
1147         if (ch == '\\') {
1148             bool update_ch = false;
1149             char next = 0;
1150             do {
1151                 next = readChar(istr, bom);
1152                 if (next == '\r' || next == '\n') {
1153                     ret.erase(ret.size()-1U);
1154                     backslash = (next == '\r');
1155                     update_ch = false;
1156                 } else if (next == '\\')
1157                     update_ch = !update_ch;
1158                 ret += next;
1159             } while (next == '\\');
1160             if (update_ch)
1161                 ch = next;
1162         }
1163     }
1164 
1165     if (!istr.good() || ch != end) {
1166         clear();
1167         if (outputList) {
1168             Output err(files);
1169             err.type = Output::SYNTAX_ERROR;
1170             err.location = location;
1171             err.msg = std::string("No pair for character (") + start + "). Can't process file. File is either invalid or unicode, which is currently not supported.";
1172             outputList->push_back(err);
1173         }
1174         return "";
1175     }
1176 
1177     return ret;
1178 }
1179 
lastLine(int maxsize) const1180 std::string simplecpp::TokenList::lastLine(int maxsize) const
1181 {
1182     std::string ret;
1183     int count = 0;
1184     for (const Token *tok = cback(); sameline(tok,cback()); tok = tok->previous) {
1185         if (tok->comment)
1186             continue;
1187         if (!ret.empty())
1188             ret.insert(0, 1, ' ');
1189         ret.insert(0, tok->str()[0] == '\"' ? std::string("%str%")
1190                    : tok->number ? std::string("%num%") : tok->str());
1191         if (++count > maxsize)
1192             return "";
1193     }
1194     return ret;
1195 }
1196 
fileIndex(const std::string & filename)1197 unsigned int simplecpp::TokenList::fileIndex(const std::string &filename)
1198 {
1199     for (unsigned int i = 0; i < files.size(); ++i) {
1200         if (files[i] == filename)
1201             return i;
1202     }
1203     files.push_back(filename);
1204     return files.size() - 1U;
1205 }
1206 
1207 
1208 namespace simplecpp {
1209     class Macro {
1210     public:
Macro(std::vector<std::string> & f)1211         explicit Macro(std::vector<std::string> &f) : nameTokDef(NULL), variadic(false), valueToken(NULL), endToken(NULL), files(f), tokenListDefine(f), valueDefinedInCode_(false) {}
1212 
Macro(const Token * tok,std::vector<std::string> & f)1213         Macro(const Token *tok, std::vector<std::string> &f) : nameTokDef(NULL), files(f), tokenListDefine(f), valueDefinedInCode_(true) {
1214             if (sameline(tok->previous, tok))
1215                 throw std::runtime_error("bad macro syntax");
1216             if (tok->op != '#')
1217                 throw std::runtime_error("bad macro syntax");
1218             const Token * const hashtok = tok;
1219             tok = tok->next;
1220             if (!tok || tok->str() != DEFINE)
1221                 throw std::runtime_error("bad macro syntax");
1222             tok = tok->next;
1223             if (!tok || !tok->name || !sameline(hashtok,tok))
1224                 throw std::runtime_error("bad macro syntax");
1225             if (!parseDefine(tok))
1226                 throw std::runtime_error("bad macro syntax");
1227         }
1228 
Macro(const std::string & name,const std::string & value,std::vector<std::string> & f)1229         Macro(const std::string &name, const std::string &value, std::vector<std::string> &f) : nameTokDef(NULL), files(f), tokenListDefine(f), valueDefinedInCode_(false) {
1230             const std::string def(name + ' ' + value);
1231             std::istringstream istr(def);
1232             tokenListDefine.readfile(istr);
1233             if (!parseDefine(tokenListDefine.cfront()))
1234                 throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value);
1235         }
1236 
Macro(const Macro & macro)1237         Macro(const Macro &macro) : nameTokDef(NULL), files(macro.files), tokenListDefine(macro.files), valueDefinedInCode_(macro.valueDefinedInCode_) {
1238             *this = macro;
1239         }
1240 
operator =(const Macro & macro)1241         void operator=(const Macro &macro) {
1242             if (this != &macro) {
1243                 valueDefinedInCode_ = macro.valueDefinedInCode_;
1244                 if (macro.tokenListDefine.empty())
1245                     parseDefine(macro.nameTokDef);
1246                 else {
1247                     tokenListDefine = macro.tokenListDefine;
1248                     parseDefine(tokenListDefine.cfront());
1249                 }
1250             }
1251         }
1252 
valueDefinedInCode() const1253         bool valueDefinedInCode() const {
1254             return valueDefinedInCode_;
1255         }
1256 
1257         /**
1258          * Expand macro. This will recursively expand inner macros.
1259          * @param output     destination tokenlist
1260          * @param rawtok     macro token
1261          * @param macros     list of macros
1262          * @param inputFiles the input files
1263          * @return token after macro
1264          * @throw Can throw wrongNumberOfParameters or invalidHashHash
1265          */
expand(TokenList * const output,const Token * rawtok,const std::map<TokenString,Macro> & macros,std::vector<std::string> & inputFiles) const1266         const Token * expand(TokenList * const output,
1267                              const Token * rawtok,
1268                              const std::map<TokenString,Macro> &macros,
1269                              std::vector<std::string> &inputFiles) const {
1270             std::set<TokenString> expandedmacros;
1271 
1272             TokenList output2(inputFiles);
1273 
1274             if (functionLike() && rawtok->next && rawtok->next->op == '(') {
1275                 // Copy macro call to a new tokenlist with no linebreaks
1276                 const Token * const rawtok1 = rawtok;
1277                 TokenList rawtokens2(inputFiles);
1278                 rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location));
1279                 rawtok = rawtok->next;
1280                 rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location));
1281                 rawtok = rawtok->next;
1282                 int par = 1;
1283                 while (rawtok && par > 0) {
1284                     if (rawtok->op == '(')
1285                         ++par;
1286                     else if (rawtok->op == ')')
1287                         --par;
1288                     else if (rawtok->op == '#' && !sameline(rawtok->previous, rawtok))
1289                         throw Error(rawtok->location, "it is invalid to use a preprocessor directive as macro parameter");
1290                     rawtokens2.push_back(new Token(rawtok->str(), rawtok1->location));
1291                     rawtok = rawtok->next;
1292                 }
1293                 bool first = true;
1294                 if (valueToken && valueToken->str() == rawtok1->str())
1295                     first = false;
1296                 if (expand(&output2, rawtok1->location, rawtokens2.cfront(), macros, expandedmacros, first))
1297                     rawtok = rawtok1->next;
1298             } else {
1299                 rawtok = expand(&output2, rawtok->location, rawtok, macros, expandedmacros);
1300             }
1301             while (output2.cback() && rawtok) {
1302                 unsigned int par = 0;
1303                 Token* macro2tok = output2.back();
1304                 while (macro2tok) {
1305                     if (macro2tok->op == '(') {
1306                         if (par==0)
1307                             break;
1308                         --par;
1309                     } else if (macro2tok->op == ')')
1310                         ++par;
1311                     macro2tok = macro2tok->previous;
1312                 }
1313                 if (macro2tok) { // macro2tok->op == '('
1314                     macro2tok = macro2tok->previous;
1315                     expandedmacros.insert(name());
1316                 } else if (rawtok->op == '(')
1317                     macro2tok = output2.back();
1318                 if (!macro2tok || !macro2tok->name)
1319                     break;
1320                 if (output2.cfront() != output2.cback() && macro2tok->str() == this->name())
1321                     break;
1322                 const std::map<TokenString,Macro>::const_iterator macro = macros.find(macro2tok->str());
1323                 if (macro == macros.end() || !macro->second.functionLike())
1324                     break;
1325                 TokenList rawtokens2(inputFiles);
1326                 const Location loc(macro2tok->location);
1327                 while (macro2tok) {
1328                     Token *next = macro2tok->next;
1329                     rawtokens2.push_back(new Token(macro2tok->str(), loc));
1330                     output2.deleteToken(macro2tok);
1331                     macro2tok = next;
1332                 }
1333                 par = (rawtokens2.cfront() != rawtokens2.cback()) ? 1U : 0U;
1334                 const Token *rawtok2 = rawtok;
1335                 for (; rawtok2; rawtok2 = rawtok2->next) {
1336                     rawtokens2.push_back(new Token(rawtok2->str(), loc));
1337                     if (rawtok2->op == '(')
1338                         ++par;
1339                     else if (rawtok2->op == ')') {
1340                         if (par <= 1U)
1341                             break;
1342                         --par;
1343                     }
1344                 }
1345                 if (!rawtok2 || par != 1U)
1346                     break;
1347                 if (macro->second.expand(&output2, rawtok->location, rawtokens2.cfront(), macros, expandedmacros) != NULL)
1348                     break;
1349                 rawtok = rawtok2->next;
1350             }
1351             output->takeTokens(output2);
1352             return rawtok;
1353         }
1354 
1355         /** macro name */
name() const1356         const TokenString &name() const {
1357             return nameTokDef->str();
1358         }
1359 
1360         /** location for macro definition */
defineLocation() const1361         const Location &defineLocation() const {
1362             return nameTokDef->location;
1363         }
1364 
1365         /** how has this macro been used so far */
usage() const1366         const std::list<Location> &usage() const {
1367             return usageList;
1368         }
1369 
1370         /** is this a function like macro */
functionLike() const1371         bool functionLike() const {
1372             return nameTokDef->next &&
1373                    nameTokDef->next->op == '(' &&
1374                    sameline(nameTokDef, nameTokDef->next) &&
1375                    nameTokDef->next->location.col == nameTokDef->location.col + nameTokDef->str().size();
1376         }
1377 
1378         /** base class for errors */
1379         struct Error {
Errorsimplecpp::Macro::Error1380             Error(const Location &loc, const std::string &s) : location(loc), what(s) {}
1381             Location location;
1382             std::string what;
1383         };
1384 
1385         /** Struct that is thrown when macro is expanded with wrong number of parameters */
1386         struct wrongNumberOfParameters : public Error {
wrongNumberOfParameterssimplecpp::Macro::wrongNumberOfParameters1387             wrongNumberOfParameters(const Location &loc, const std::string &macroName) : Error(loc, "Wrong number of parameters for macro \'" + macroName + "\'.") {}
1388         };
1389 
1390         /** Struct that is thrown when there is invalid ## usage */
1391         struct invalidHashHash : public Error {
invalidHashHashsimplecpp::Macro::invalidHashHash1392             invalidHashHash(const Location &loc, const std::string &macroName) : Error(loc, "Invalid ## usage when expanding \'" + macroName + "\'.") {}
1393         };
1394     private:
1395         /** Create new token where Token::macro is set for replaced tokens */
newMacroToken(const TokenString & str,const Location & loc,bool replaced) const1396         Token *newMacroToken(const TokenString &str, const Location &loc, bool replaced) const {
1397             Token *tok = new Token(str,loc);
1398             if (replaced)
1399                 tok->macro = nameTokDef->str();
1400             return tok;
1401         }
1402 
parseDefine(const Token * nametoken)1403         bool parseDefine(const Token *nametoken) {
1404             nameTokDef = nametoken;
1405             variadic = false;
1406             if (!nameTokDef) {
1407                 valueToken = endToken = NULL;
1408                 args.clear();
1409                 return false;
1410             }
1411 
1412             // function like macro..
1413             if (functionLike()) {
1414                 args.clear();
1415                 const Token *argtok = nameTokDef->next->next;
1416                 while (sameline(nametoken, argtok) && argtok->op != ')') {
1417                     if (argtok->str() == "..." &&
1418                         argtok->next && argtok->next->op == ')') {
1419                         variadic = true;
1420                         if (!argtok->previous->name)
1421                             args.push_back("__VA_ARGS__");
1422                         argtok = argtok->next; // goto ')'
1423                         break;
1424                     }
1425                     if (argtok->op != ',')
1426                         args.push_back(argtok->str());
1427                     argtok = argtok->next;
1428                 }
1429                 if (!sameline(nametoken, argtok)) {
1430                     endToken = argtok ? argtok->previous : argtok;
1431                     valueToken = NULL;
1432                     return false;
1433                 }
1434                 valueToken = argtok ? argtok->next : NULL;
1435             } else {
1436                 args.clear();
1437                 valueToken = nameTokDef->next;
1438             }
1439 
1440             if (!sameline(valueToken, nameTokDef))
1441                 valueToken = NULL;
1442             endToken = valueToken;
1443             while (sameline(endToken, nameTokDef))
1444                 endToken = endToken->next;
1445             return true;
1446         }
1447 
getArgNum(const TokenString & str) const1448         unsigned int getArgNum(const TokenString &str) const {
1449             unsigned int par = 0;
1450             while (par < args.size()) {
1451                 if (str == args[par])
1452                     return par;
1453                 par++;
1454             }
1455             return ~0U;
1456         }
1457 
getMacroParameters(const Token * nameTokInst,bool calledInDefine) const1458         std::vector<const Token *> getMacroParameters(const Token *nameTokInst, bool calledInDefine) const {
1459             if (!nameTokInst->next || nameTokInst->next->op != '(' || !functionLike())
1460                 return std::vector<const Token *>();
1461 
1462             std::vector<const Token *> parametertokens;
1463             parametertokens.push_back(nameTokInst->next);
1464             unsigned int par = 0U;
1465             for (const Token *tok = nameTokInst->next->next; calledInDefine ? sameline(tok, nameTokInst) : (tok != NULL); tok = tok->next) {
1466                 if (tok->op == '(')
1467                     ++par;
1468                 else if (tok->op == ')') {
1469                     if (par == 0U) {
1470                         parametertokens.push_back(tok);
1471                         break;
1472                     }
1473                     --par;
1474                 } else if (par == 0U && tok->op == ',' && (!variadic || parametertokens.size() < args.size()))
1475                     parametertokens.push_back(tok);
1476             }
1477             return parametertokens;
1478         }
1479 
appendTokens(TokenList * tokens,const Location & rawloc,const Token * const lpar,const std::map<TokenString,Macro> & macros,const std::set<TokenString> & expandedmacros,const std::vector<const Token * > & parametertokens) const1480         const Token *appendTokens(TokenList *tokens,
1481                                   const Location &rawloc,
1482                                   const Token * const lpar,
1483                                   const std::map<TokenString,Macro> &macros,
1484                                   const std::set<TokenString> &expandedmacros,
1485                                   const std::vector<const Token*> &parametertokens) const {
1486             if (!lpar || lpar->op != '(')
1487                 return NULL;
1488             unsigned int par = 0;
1489             const Token *tok = lpar;
1490             while (sameline(lpar, tok)) {
1491                 if (tok->op == '#' && sameline(tok,tok->next) && tok->next->op == '#' && sameline(tok,tok->next->next)) {
1492                     // A##B => AB
1493                     tok = expandHashHash(tokens, rawloc, tok, macros, expandedmacros, parametertokens);
1494                 } else if (tok->op == '#' && sameline(tok, tok->next) && tok->next->op != '#') {
1495                     tok = expandHash(tokens, rawloc, tok, macros, expandedmacros, parametertokens);
1496                 } else {
1497                     if (!expandArg(tokens, tok, rawloc, macros, expandedmacros, parametertokens)) {
1498                         bool expanded = false;
1499                         const std::map<TokenString, Macro>::const_iterator it = macros.find(tok->str());
1500                         if (it != macros.end() && expandedmacros.find(tok->str()) == expandedmacros.end()) {
1501                             const Macro &m = it->second;
1502                             if (!m.functionLike()) {
1503                                 m.expand(tokens, rawloc, tok, macros, expandedmacros);
1504                                 expanded = true;
1505                             }
1506                         }
1507                         if (!expanded) {
1508                             tokens->push_back(new Token(*tok));
1509                             if (tok->macro.empty() && (par > 0 || tok->str() != "("))
1510                                 tokens->back()->macro = name();
1511                         }
1512                     }
1513 
1514                     if (tok->op == '(')
1515                         ++par;
1516                     else if (tok->op == ')') {
1517                         --par;
1518                         if (par == 0U)
1519                             break;
1520                     }
1521                     tok = tok->next;
1522                 }
1523             }
1524             for (Token *tok2 = tokens->front(); tok2; tok2 = tok2->next)
1525                 tok2->location = lpar->location;
1526             return sameline(lpar,tok) ? tok : NULL;
1527         }
1528 
expand(TokenList * const output,const Location & loc,const Token * const nameTokInst,const std::map<TokenString,Macro> & macros,std::set<TokenString> expandedmacros,bool first=false) const1529         const Token * expand(TokenList * const output, const Location &loc, const Token * const nameTokInst, const std::map<TokenString,Macro> &macros, std::set<TokenString> expandedmacros, bool first=false) const {
1530 
1531             if (!first)
1532                 expandedmacros.insert(nameTokInst->str());
1533 
1534             usageList.push_back(loc);
1535 
1536             if (nameTokInst->str() == "__FILE__") {
1537                 output->push_back(new Token('\"'+loc.file()+'\"', loc));
1538                 return nameTokInst->next;
1539             }
1540             if (nameTokInst->str() == "__LINE__") {
1541                 output->push_back(new Token(toString(loc.line), loc));
1542                 return nameTokInst->next;
1543             }
1544             if (nameTokInst->str() == "__COUNTER__") {
1545                 output->push_back(new Token(toString(usageList.size()-1U), loc));
1546                 return nameTokInst->next;
1547             }
1548 
1549             const bool calledInDefine = (loc.fileIndex != nameTokInst->location.fileIndex ||
1550                                          loc.line < nameTokInst->location.line);
1551 
1552             std::vector<const Token*> parametertokens1(getMacroParameters(nameTokInst, calledInDefine));
1553 
1554             if (functionLike()) {
1555                 // No arguments => not macro expansion
1556                 if (nameTokInst->next && nameTokInst->next->op != '(') {
1557                     output->push_back(new Token(nameTokInst->str(), loc));
1558                     return nameTokInst->next;
1559                 }
1560 
1561                 // Parse macro-call
1562                 if (variadic) {
1563                     if (parametertokens1.size() < args.size()) {
1564                         throw wrongNumberOfParameters(nameTokInst->location, name());
1565                     }
1566                 } else {
1567                     if (parametertokens1.size() != args.size() + (args.empty() ? 2U : 1U))
1568                         throw wrongNumberOfParameters(nameTokInst->location, name());
1569                 }
1570             }
1571 
1572             // If macro call uses __COUNTER__ then expand that first
1573             TokenList tokensparams(files);
1574             std::vector<const Token *> parametertokens2;
1575             if (!parametertokens1.empty()) {
1576                 bool counter = false;
1577                 for (const Token *tok = parametertokens1[0]; tok != parametertokens1.back(); tok = tok->next) {
1578                     if (tok->str() == "__COUNTER__") {
1579                         counter = true;
1580                         break;
1581                     }
1582                 }
1583 
1584                 const std::map<TokenString,Macro>::const_iterator m = macros.find("__COUNTER__");
1585 
1586                 if (!counter || m == macros.end())
1587                     parametertokens2.swap(parametertokens1);
1588                 else {
1589                     const Macro &counterMacro = m->second;
1590                     unsigned int par = 0;
1591                     for (const Token *tok = parametertokens1[0]; tok && par < parametertokens1.size(); tok = tok->next) {
1592                         if (tok->str() == "__COUNTER__") {
1593                             tokensparams.push_back(new Token(toString(counterMacro.usageList.size()), tok->location));
1594                             counterMacro.usageList.push_back(tok->location);
1595                         } else {
1596                             tokensparams.push_back(new Token(*tok));
1597                             if (tok == parametertokens1[par]) {
1598                                 parametertokens2.push_back(tokensparams.cback());
1599                                 par++;
1600                             }
1601                         }
1602                     }
1603                 }
1604             }
1605 
1606             Token * const output_end_1 = output->back();
1607 
1608             // expand
1609             for (const Token *tok = valueToken; tok != endToken;) {
1610                 if (tok->op != '#') {
1611                     // A##B => AB
1612                     if (sameline(tok, tok->next) && tok->next && tok->next->op == '#' && tok->next->next && tok->next->next->op == '#') {
1613                         if (!sameline(tok, tok->next->next->next))
1614                             throw invalidHashHash(tok->location, name());
1615                         TokenList new_output(files);
1616                         if (!expandArg(&new_output, tok, parametertokens2))
1617                             output->push_back(newMacroToken(tok->str(), loc, isReplaced(expandedmacros)));
1618                         else if (new_output.empty()) // placemarker token
1619                             output->push_back(newMacroToken("", loc, isReplaced(expandedmacros)));
1620                         else
1621                             for (const Token *tok2 = new_output.cfront(); tok2; tok2 = tok2->next)
1622                                 output->push_back(newMacroToken(tok2->str(), loc, isReplaced(expandedmacros)));
1623                         tok = tok->next;
1624                     } else {
1625                         tok = expandToken(output, loc, tok, macros, expandedmacros, parametertokens2);
1626                     }
1627                     continue;
1628                 }
1629 
1630                 int numberOfHash = 1;
1631                 const Token *hashToken = tok->next;
1632                 while (sameline(tok,hashToken) && hashToken->op == '#') {
1633                     hashToken = hashToken->next;
1634                     ++numberOfHash;
1635                 }
1636                 if (numberOfHash == 4 && tok->next->location.col + 1 == tok->next->next->location.col) {
1637                     // # ## #  => ##
1638                     output->push_back(newMacroToken("##", loc, isReplaced(expandedmacros)));
1639                     tok = hashToken;
1640                     continue;
1641                 }
1642 
1643                 if (numberOfHash >= 2 && tok->location.col + 1 < tok->next->location.col) {
1644                     output->push_back(new Token(*tok));
1645                     tok = tok->next;
1646                     continue;
1647                 }
1648 
1649                 tok = tok->next;
1650                 if (tok == endToken) {
1651                     output->push_back(new Token(*tok->previous));
1652                     break;
1653                 }
1654                 if (tok->op == '#') {
1655                     // A##B => AB
1656                     tok = expandHashHash(output, loc, tok->previous, macros, expandedmacros, parametertokens2);
1657                 } else {
1658                     // #123 => "123"
1659                     tok = expandHash(output, loc, tok->previous, macros, expandedmacros, parametertokens2);
1660                 }
1661             }
1662 
1663             if (!functionLike()) {
1664                 for (Token *tok = output_end_1 ? output_end_1->next : output->front(); tok; tok = tok->next) {
1665                     tok->macro = nameTokInst->str();
1666                 }
1667             }
1668 
1669             if (!parametertokens1.empty())
1670                 parametertokens1.swap(parametertokens2);
1671 
1672             return functionLike() ? parametertokens2.back()->next : nameTokInst->next;
1673         }
1674 
recursiveExpandToken(TokenList * output,TokenList & temp,const Location & loc,const Token * tok,const std::map<TokenString,Macro> & macros,const std::set<TokenString> & expandedmacros,const std::vector<const Token * > & parametertokens) const1675         const Token *recursiveExpandToken(TokenList *output, TokenList &temp, const Location &loc, const Token *tok, const std::map<TokenString,Macro> &macros, const std::set<TokenString> &expandedmacros, const std::vector<const Token*> &parametertokens) const {
1676             if (!(temp.cback() && temp.cback()->name && tok->next && tok->next->op == '(')) {
1677                 output->takeTokens(temp);
1678                 return tok->next;
1679             }
1680 
1681             if (!sameline(tok, tok->next)) {
1682                 output->takeTokens(temp);
1683                 return tok->next;
1684             }
1685 
1686             const std::map<TokenString, Macro>::const_iterator it = macros.find(temp.cback()->str());
1687             if (it == macros.end() || expandedmacros.find(temp.cback()->str()) != expandedmacros.end()) {
1688                 output->takeTokens(temp);
1689                 return tok->next;
1690             }
1691 
1692             const Macro &calledMacro = it->second;
1693             if (!calledMacro.functionLike()) {
1694                 output->takeTokens(temp);
1695                 return tok->next;
1696             }
1697 
1698             TokenList temp2(files);
1699             temp2.push_back(new Token(temp.cback()->str(), tok->location));
1700 
1701             const Token *tok2 = appendTokens(&temp2, loc, tok->next, macros, expandedmacros, parametertokens);
1702             if (!tok2)
1703                 return tok->next;
1704             output->takeTokens(temp);
1705             output->deleteToken(output->back());
1706             calledMacro.expand(output, loc, temp2.cfront(), macros, expandedmacros);
1707             return tok2->next;
1708         }
1709 
expandToken(TokenList * output,const Location & loc,const Token * tok,const std::map<TokenString,Macro> & macros,const std::set<TokenString> & expandedmacros,const std::vector<const Token * > & parametertokens) const1710         const Token *expandToken(TokenList *output, const Location &loc, const Token *tok, const std::map<TokenString,Macro> &macros, const std::set<TokenString> &expandedmacros, const std::vector<const Token*> &parametertokens) const {
1711             // Not name..
1712             if (!tok->name) {
1713                 output->push_back(newMacroToken(tok->str(), loc, true));
1714                 return tok->next;
1715             }
1716 
1717             // Macro parameter..
1718             {
1719                 TokenList temp(files);
1720                 if (expandArg(&temp, tok, loc, macros, expandedmacros, parametertokens))
1721                     return recursiveExpandToken(output, temp, loc, tok, macros, expandedmacros, parametertokens);
1722             }
1723 
1724             // Macro..
1725             const std::map<TokenString, Macro>::const_iterator it = macros.find(tok->str());
1726             if (it != macros.end() && expandedmacros.find(tok->str()) == expandedmacros.end()) {
1727                 std::set<std::string> expandedmacros2(expandedmacros);
1728                 expandedmacros2.insert(tok->str());
1729 
1730                 const Macro &calledMacro = it->second;
1731                 if (!calledMacro.functionLike()) {
1732                     TokenList temp(files);
1733                     calledMacro.expand(&temp, loc, tok, macros, expandedmacros);
1734                     return recursiveExpandToken(output, temp, loc, tok, macros, expandedmacros2, parametertokens);
1735                 }
1736                 if (!sameline(tok, tok->next) || tok->next->op != '(') {
1737                     output->push_back(newMacroToken(tok->str(), loc, true));
1738                     return tok->next;
1739                 }
1740                 TokenList tokens(files);
1741                 tokens.push_back(new Token(*tok));
1742                 const Token *tok2 = appendTokens(&tokens, loc, tok->next, macros, expandedmacros, parametertokens);
1743                 if (!tok2) {
1744                     output->push_back(newMacroToken(tok->str(), loc, true));
1745                     return tok->next;
1746                 }
1747                 TokenList temp(files);
1748                 calledMacro.expand(&temp, loc, tokens.cfront(), macros, expandedmacros);
1749                 return recursiveExpandToken(output, temp, loc, tok2, macros, expandedmacros2, parametertokens);
1750             }
1751 
1752             else if (tok->str() == DEFINED) {
1753                 const Token *tok2 = tok->next;
1754                 const Token *tok3 = tok2 ? tok2->next : NULL;
1755                 const Token *tok4 = tok3 ? tok3->next : NULL;
1756                 const Token *defToken = NULL;
1757                 const Token *lastToken = NULL;
1758                 if (sameline(tok, tok4) && tok2->op == '(' && tok3->name && tok4->op == ')') {
1759                     defToken = tok3;
1760                     lastToken = tok4;
1761                 } else if (sameline(tok,tok2) && tok2->name) {
1762                     defToken = lastToken = tok2;
1763                 }
1764                 if (defToken) {
1765                     std::string macroName = defToken->str();
1766                     if (defToken->next && defToken->next->op == '#' && defToken->next->next && defToken->next->next->op == '#' && defToken->next->next->next && defToken->next->next->next->name && sameline(defToken,defToken->next->next->next)) {
1767                         TokenList temp(files);
1768                         if (expandArg(&temp, defToken, parametertokens))
1769                             macroName = temp.cback()->str();
1770                         if (expandArg(&temp, defToken->next->next->next, parametertokens))
1771                             macroName += temp.cback()->str();
1772                         else
1773                             macroName += defToken->next->next->next->str();
1774                         lastToken = defToken->next->next->next;
1775                     }
1776                     const bool def = (macros.find(macroName) != macros.end());
1777                     output->push_back(newMacroToken(def ? "1" : "0", loc, true));
1778                     return lastToken->next;
1779                 }
1780             }
1781 
1782             output->push_back(newMacroToken(tok->str(), loc, true));
1783             return tok->next;
1784         }
1785 
expandArg(TokenList * output,const Token * tok,const std::vector<const Token * > & parametertokens) const1786         bool expandArg(TokenList *output, const Token *tok, const std::vector<const Token*> &parametertokens) const {
1787             if (!tok->name)
1788                 return false;
1789 
1790             const unsigned int argnr = getArgNum(tok->str());
1791             if (argnr >= args.size())
1792                 return false;
1793 
1794             // empty variadic parameter
1795             if (variadic && argnr + 1U >= parametertokens.size())
1796                 return true;
1797 
1798             for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U]; partok = partok->next)
1799                 output->push_back(new Token(*partok));
1800 
1801             return true;
1802         }
1803 
expandArg(TokenList * output,const Token * tok,const Location & loc,const std::map<TokenString,Macro> & macros,const std::set<TokenString> & expandedmacros,const std::vector<const Token * > & parametertokens) const1804         bool expandArg(TokenList *output, const Token *tok, const Location &loc, const std::map<TokenString, Macro> &macros, const std::set<TokenString> &expandedmacros, const std::vector<const Token*> &parametertokens) const {
1805             if (!tok->name)
1806                 return false;
1807             const unsigned int argnr = getArgNum(tok->str());
1808             if (argnr >= args.size())
1809                 return false;
1810             if (variadic && argnr + 1U >= parametertokens.size()) // empty variadic parameter
1811                 return true;
1812             for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U];) {
1813                 const std::map<TokenString, Macro>::const_iterator it = macros.find(partok->str());
1814                 if (it != macros.end() && (partok->str() == name() || expandedmacros.find(partok->str()) == expandedmacros.end()))
1815                     partok = it->second.expand(output, loc, partok, macros, expandedmacros);
1816                 else {
1817                     output->push_back(newMacroToken(partok->str(), loc, isReplaced(expandedmacros)));
1818                     output->back()->macro = partok->macro;
1819                     partok = partok->next;
1820                 }
1821             }
1822             return true;
1823         }
1824 
1825         /**
1826          * Expand #X => "X"
1827          * @param output  destination tokenlist
1828          * @param loc     location for expanded token
1829          * @param tok     The # token
1830          * @param macros  all macros
1831          * @param expandedmacros   set with expanded macros, with this macro
1832          * @param parametertokens  parameters given when expanding this macro
1833          * @return token after the X
1834          */
expandHash(TokenList * output,const Location & loc,const Token * tok,const std::map<TokenString,Macro> & macros,const std::set<TokenString> & expandedmacros,const std::vector<const Token * > & parametertokens) const1835         const Token *expandHash(TokenList *output, const Location &loc, const Token *tok, const std::map<TokenString, Macro> &macros, const std::set<TokenString> &expandedmacros, const std::vector<const Token*> &parametertokens) const {
1836             TokenList tokenListHash(files);
1837             tok = expandToken(&tokenListHash, loc, tok->next, macros, expandedmacros, parametertokens);
1838             std::ostringstream ostr;
1839             ostr << '\"';
1840             for (const Token *hashtok = tokenListHash.cfront(); hashtok; hashtok = hashtok->next)
1841                 ostr << hashtok->str();
1842             ostr << '\"';
1843             output->push_back(newMacroToken(escapeString(ostr.str()), loc, isReplaced(expandedmacros)));
1844             return tok;
1845         }
1846 
1847         /**
1848          * Expand A##B => AB
1849          * The A should already be expanded. Call this when you reach the first # token
1850          * @param output  destination tokenlist
1851          * @param loc     location for expanded token
1852          * @param tok     first # token
1853          * @param macros  all macros
1854          * @param expandedmacros   set with expanded macros, with this macro
1855          * @param parametertokens  parameters given when expanding this macro
1856          * @return token after B
1857          */
expandHashHash(TokenList * output,const Location & loc,const Token * tok,const std::map<TokenString,Macro> & macros,const std::set<TokenString> & expandedmacros,const std::vector<const Token * > & parametertokens) const1858         const Token *expandHashHash(TokenList *output, const Location &loc, const Token *tok, const std::map<TokenString, Macro> &macros, const std::set<TokenString> &expandedmacros, const std::vector<const Token*> &parametertokens) const {
1859             Token *A = output->back();
1860             if (!A)
1861                 throw invalidHashHash(tok->location, name());
1862             if (!sameline(tok, tok->next) || !sameline(tok, tok->next->next))
1863                 throw invalidHashHash(tok->location, name());
1864 
1865             bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>";
1866             if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual)
1867                 throw invalidHashHash(tok->location, name());
1868 
1869             Token *B = tok->next->next;
1870             if (!B->name && !B->number && B->op && !B->isOneOf("#="))
1871                 throw invalidHashHash(tok->location, name());
1872 
1873             if ((canBeConcatenatedWithEqual && B->op != '=') ||
1874                 (!canBeConcatenatedWithEqual && B->op == '='))
1875                 throw invalidHashHash(tok->location, name());
1876 
1877             std::string strAB;
1878 
1879             const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];
1880 
1881             TokenList tokensB(files);
1882             if (expandArg(&tokensB, B, parametertokens)) {
1883                 if (tokensB.empty())
1884                     strAB = A->str();
1885                 else if (varargs && A->op == ',') {
1886                     strAB = ",";
1887                 } else {
1888                     strAB = A->str() + tokensB.cfront()->str();
1889                     tokensB.deleteToken(tokensB.front());
1890                 }
1891             } else {
1892                 strAB = A->str() + B->str();
1893             }
1894 
1895             const Token *nextTok = B->next;
1896             if (varargs && tokensB.empty() && tok->previous->str() == ",")
1897                 output->deleteToken(A);
1898             else if (strAB != "," && macros.find(strAB) == macros.end()) {
1899                 A->setstr(strAB);
1900                 for (Token *b = tokensB.front(); b; b = b->next)
1901                     b->location = loc;
1902                 output->takeTokens(tokensB);
1903             } else if (nextTok->op == '#' && nextTok->next->op == '#') {
1904                 TokenList output2(files);
1905                 output2.push_back(new Token(strAB, tok->location));
1906                 nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
1907                 output->deleteToken(A);
1908                 output->takeTokens(output2);
1909             } else {
1910                 output->deleteToken(A);
1911                 TokenList tokens(files);
1912                 tokens.push_back(new Token(strAB, tok->location));
1913                 // for function like macros, push the (...)
1914                 if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
1915                     const std::map<TokenString,Macro>::const_iterator it = macros.find(strAB);
1916                     if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
1917                         const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
1918                         if (tok2)
1919                             nextTok = tok2->next;
1920                     }
1921                 }
1922                 expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
1923                 for (Token *b = tokensB.front(); b; b = b->next)
1924                     b->location = loc;
1925                 output->takeTokens(tokensB);
1926             }
1927 
1928             return nextTok;
1929         }
1930 
isReplaced(const std::set<std::string> & expandedmacros)1931         static bool isReplaced(const std::set<std::string> &expandedmacros) {
1932             // return true if size > 1
1933             std::set<std::string>::const_iterator it = expandedmacros.begin();
1934             if (it == expandedmacros.end())
1935                 return false;
1936             ++it;
1937             return (it != expandedmacros.end());
1938         }
1939 
1940         /** name token in definition */
1941         const Token *nameTokDef;
1942 
1943         /** arguments for macro */
1944         std::vector<TokenString> args;
1945 
1946         /** is macro variadic? */
1947         bool variadic;
1948 
1949         /** first token in replacement string */
1950         const Token *valueToken;
1951 
1952         /** token after replacement string */
1953         const Token *endToken;
1954 
1955         /** files */
1956         std::vector<std::string> &files;
1957 
1958         /** this is used for -D where the definition is not seen anywhere in code */
1959         TokenList tokenListDefine;
1960 
1961         /** usage of this macro */
1962         mutable std::list<Location> usageList;
1963 
1964         /** was the value of this macro actually defined in the code? */
1965         bool valueDefinedInCode_;
1966     };
1967 }
1968 
1969 namespace simplecpp {
1970 
convertCygwinToWindowsPath(const std::string & cygwinPath)1971     std::string convertCygwinToWindowsPath(const std::string &cygwinPath)
1972     {
1973         std::string windowsPath;
1974 
1975         std::string::size_type pos = 0;
1976         if (cygwinPath.size() >= 11 && startsWith(cygwinPath, "/cygdrive/")) {
1977             unsigned char driveLetter = cygwinPath[10];
1978             if (std::isalpha(driveLetter)) {
1979                 if (cygwinPath.size() == 11) {
1980                     windowsPath = toupper(driveLetter);
1981                     windowsPath += ":\\";   // volume root directory
1982                     pos = 11;
1983                 } else if (cygwinPath[11] == '/') {
1984                     windowsPath = toupper(driveLetter);
1985                     windowsPath += ":";
1986                     pos = 11;
1987                 }
1988             }
1989         }
1990 
1991         for (; pos < cygwinPath.size(); ++pos) {
1992             unsigned char c = cygwinPath[pos];
1993             if (c == '/')
1994                 c = '\\';
1995             windowsPath += c;
1996         }
1997 
1998         return windowsPath;
1999     }
2000 }
2001 
2002 #ifdef SIMPLECPP_WINDOWS
2003 
2004 class ScopedLock {
2005 public:
ScopedLock(CRITICAL_SECTION & criticalSection)2006     explicit ScopedLock(CRITICAL_SECTION& criticalSection)
2007         : m_criticalSection(criticalSection) {
2008         EnterCriticalSection(&m_criticalSection);
2009     }
2010 
~ScopedLock()2011     ~ScopedLock() {
2012         LeaveCriticalSection(&m_criticalSection);
2013     }
2014 
2015 private:
2016     ScopedLock& operator=(const ScopedLock&);
2017     ScopedLock(const ScopedLock&);
2018 
2019     CRITICAL_SECTION& m_criticalSection;
2020 };
2021 
2022 class RealFileNameMap {
2023 public:
RealFileNameMap()2024     RealFileNameMap() {
2025         InitializeCriticalSection(&m_criticalSection);
2026     }
2027 
~RealFileNameMap()2028     ~RealFileNameMap() {
2029         DeleteCriticalSection(&m_criticalSection);
2030     }
2031 
getCacheEntry(const std::string & path,std::string * returnPath)2032     bool getCacheEntry(const std::string& path, std::string* returnPath) {
2033         ScopedLock lock(m_criticalSection);
2034 
2035         std::map<std::string, std::string>::iterator it = m_fileMap.find(path);
2036         if (it != m_fileMap.end()) {
2037             *returnPath = it->second;
2038             return true;
2039         }
2040         return false;
2041     }
2042 
addToCache(const std::string & path,const std::string & actualPath)2043     void addToCache(const std::string& path, const std::string& actualPath) {
2044         ScopedLock lock(m_criticalSection);
2045         m_fileMap[path] = actualPath;
2046     }
2047 
2048 private:
2049     std::map<std::string, std::string> m_fileMap;
2050     CRITICAL_SECTION m_criticalSection;
2051 };
2052 
2053 static RealFileNameMap realFileNameMap;
2054 
realFileName(const std::string & f,std::string * result)2055 static bool realFileName(const std::string &f, std::string *result)
2056 {
2057     // are there alpha characters in last subpath?
2058     bool alpha = false;
2059     for (std::string::size_type pos = 1; pos <= f.size(); ++pos) {
2060         unsigned char c = f[f.size() - pos];
2061         if (c == '/' || c == '\\')
2062             break;
2063         if (std::isalpha(c)) {
2064             alpha = true;
2065             break;
2066         }
2067     }
2068 
2069     // do not convert this path if there are no alpha characters (either pointless or cause wrong results for . and ..)
2070     if (!alpha)
2071         return false;
2072 
2073     // Lookup filename or foldername on file system
2074     if (!realFileNameMap.getCacheEntry(f, result)) {
2075 
2076         WIN32_FIND_DATAA FindFileData;
2077 
2078 #ifdef __CYGWIN__
2079         std::string fConverted = simplecpp::convertCygwinToWindowsPath(f);
2080         HANDLE hFind = FindFirstFileExA(fConverted.c_str(), FindExInfoBasic, &FindFileData, FindExSearchNameMatch, NULL, 0);
2081 #else
2082         HANDLE hFind = FindFirstFileExA(f.c_str(), FindExInfoBasic, &FindFileData, FindExSearchNameMatch, NULL, 0);
2083 #endif
2084 
2085         if (INVALID_HANDLE_VALUE == hFind)
2086             return false;
2087         *result = FindFileData.cFileName;
2088         realFileNameMap.addToCache(f, *result);
2089         FindClose(hFind);
2090     }
2091     return true;
2092 }
2093 
2094 static RealFileNameMap realFilePathMap;
2095 
2096 /** Change case in given path to match filesystem */
realFilename(const std::string & f)2097 static std::string realFilename(const std::string &f)
2098 {
2099     std::string ret;
2100     ret.reserve(f.size()); // this will be the final size
2101     if (realFilePathMap.getCacheEntry(f, &ret))
2102         return ret;
2103 
2104     // Current subpath
2105     std::string subpath;
2106 
2107     for (std::string::size_type pos = 0; pos < f.size(); ++pos) {
2108         unsigned char c = f[pos];
2109 
2110         // Separator.. add subpath and separator
2111         if (c == '/' || c == '\\') {
2112             // if subpath is empty just add separator
2113             if (subpath.empty()) {
2114                 ret += c;
2115                 continue;
2116             }
2117 
2118             bool isDriveSpecification =
2119                 (pos == 2 && subpath.size() == 2 && std::isalpha(subpath[0]) && subpath[1] == ':');
2120 
2121             // Append real filename (proper case)
2122             std::string f2;
2123             if (!isDriveSpecification && realFileName(f.substr(0, pos), &f2))
2124                 ret += f2;
2125             else
2126                 ret += subpath;
2127 
2128             subpath.clear();
2129 
2130             // Append separator
2131             ret += c;
2132         } else {
2133             subpath += c;
2134         }
2135     }
2136 
2137     if (!subpath.empty()) {
2138         std::string f2;
2139         if (realFileName(f,&f2))
2140             ret += f2;
2141         else
2142             ret += subpath;
2143     }
2144 
2145     realFilePathMap.addToCache(f, ret);
2146     return ret;
2147 }
2148 
isAbsolutePath(const std::string & path)2149 static bool isAbsolutePath(const std::string &path)
2150 {
2151     if (path.length() >= 3 && path[0] > 0 && std::isalpha(path[0]) && path[1] == ':' && (path[2] == '\\' || path[2] == '/'))
2152         return true;
2153     return path.length() > 1U && (path[0] == '/' || path[0] == '\\');
2154 }
2155 #else
2156 #define realFilename(f)  f
2157 
isAbsolutePath(const std::string & path)2158 static bool isAbsolutePath(const std::string &path)
2159 {
2160     return path.length() > 1U && path[0] == '/';
2161 }
2162 #endif
2163 
2164 namespace simplecpp {
2165     /**
2166      * perform path simplifications for . and ..
2167      */
simplifyPath(std::string path)2168     std::string simplifyPath(std::string path)
2169     {
2170         if (path.empty())
2171             return path;
2172 
2173         std::string::size_type pos;
2174 
2175         // replace backslash separators
2176         std::replace(path.begin(), path.end(), '\\', '/');
2177 
2178         const bool unc(path.compare(0,2,"//") == 0);
2179 
2180         // replace "//" with "/"
2181         pos = 0;
2182         while ((pos = path.find("//",pos)) != std::string::npos) {
2183             path.erase(pos,1);
2184         }
2185 
2186         // remove "./"
2187         pos = 0;
2188         while ((pos = path.find("./",pos)) != std::string::npos) {
2189             if (pos == 0 || path[pos - 1U] == '/')
2190                 path.erase(pos,2);
2191             else
2192                 pos += 2;
2193         }
2194 
2195         // remove trailing dot if path ends with "/."
2196         if (endsWith(path,"/."))
2197             path.erase(path.size()-1);
2198 
2199         // simplify ".."
2200         pos = 1; // don't simplify ".." if path starts with that
2201         while ((pos = path.find("/..", pos)) != std::string::npos) {
2202             // not end of path, then string must be "/../"
2203             if (pos + 3 < path.size() && path[pos + 3] != '/') {
2204                 ++pos;
2205                 continue;
2206             }
2207             // get previous subpath
2208             const std::string::size_type pos1 = path.rfind('/', pos - 1U) + 1U;
2209             const std::string previousSubPath = path.substr(pos1, pos-pos1);
2210             if (previousSubPath == "..") {
2211                 // don't simplify
2212                 ++pos;
2213             } else {
2214                 // remove previous subpath and ".."
2215                 path.erase(pos1,pos-pos1+4);
2216                 if (path.empty())
2217                     path = ".";
2218                 // update pos
2219                 pos = (pos1 == 0) ? 1 : (pos1 - 1);
2220             }
2221         }
2222 
2223         // Remove trailing '/'?
2224         //if (path.size() > 1 && endsWith(path, "/"))
2225         //    path.erase(path.size()-1);
2226 
2227         if (unc)
2228             path = '/' + path;
2229 
2230         return path.find_first_of("*?") == std::string::npos ? realFilename(path) : path;
2231     }
2232 }
2233 
2234 /** Evaluate sizeof(type) */
simplifySizeof(simplecpp::TokenList & expr,const std::map<std::string,std::size_t> & sizeOfType)2235 static void simplifySizeof(simplecpp::TokenList &expr, const std::map<std::string, std::size_t> &sizeOfType)
2236 {
2237     for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) {
2238         if (tok->str() != "sizeof")
2239             continue;
2240         simplecpp::Token *tok1 = tok->next;
2241         if (!tok1) {
2242             throw std::runtime_error("missing sizeof argument");
2243         }
2244         simplecpp::Token *tok2 = tok1->next;
2245         if (!tok2) {
2246             throw std::runtime_error("missing sizeof argument");
2247         }
2248         if (tok1->op == '(') {
2249             tok1 = tok1->next;
2250             while (tok2->op != ')') {
2251                 tok2 = tok2->next;
2252                 if (!tok2) {
2253                     throw std::runtime_error("invalid sizeof expression");
2254                 }
2255             }
2256         }
2257 
2258         std::string type;
2259         for (simplecpp::Token *typeToken = tok1; typeToken != tok2; typeToken = typeToken->next) {
2260             if ((typeToken->str() == "unsigned" || typeToken->str() == "signed") && typeToken->next->name)
2261                 continue;
2262             if (typeToken->str() == "*" && type.find('*') != std::string::npos)
2263                 continue;
2264             if (!type.empty())
2265                 type += ' ';
2266             type += typeToken->str();
2267         }
2268 
2269         const std::map<std::string, std::size_t>::const_iterator it = sizeOfType.find(type);
2270         if (it != sizeOfType.end())
2271             tok->setstr(toString(it->second));
2272         else
2273             continue;
2274 
2275         tok2 = tok2->next;
2276         while (tok->next != tok2)
2277             expr.deleteToken(tok->next);
2278     }
2279 }
2280 
2281 static const char * const altopData[] = {"and","or","bitand","bitor","compl","not","not_eq","xor"};
2282 static const std::set<std::string> altop(&altopData[0], &altopData[8]);
simplifyName(simplecpp::TokenList & expr)2283 static void simplifyName(simplecpp::TokenList &expr)
2284 {
2285     for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) {
2286         if (tok->name) {
2287             if (altop.find(tok->str()) != altop.end()) {
2288                 bool alt;
2289                 if (tok->str() == "not" || tok->str() == "compl") {
2290                     alt = isAlternativeUnaryOp(tok,tok->str());
2291                 } else {
2292                     alt = isAlternativeBinaryOp(tok,tok->str());
2293                 }
2294                 if (alt)
2295                     continue;
2296             }
2297             tok->setstr("0");
2298         }
2299     }
2300 }
2301 
2302 /*
2303  * Reads at least minlen and at most maxlen digits (inc. prefix) in base base
2304  * from s starting at position pos and converts them to a
2305  * unsigned long long value, updating pos to point to the first
2306  * unused element of s.
2307  * Returns ULLONG_MAX if the result is not representable and
2308  * throws if the above requirements were not possible to satisfy.
2309  */
stringToULLbounded(const std::string & s,std::size_t & pos,int base=0,std::ptrdiff_t minlen=1,std::size_t maxlen=std::string::npos)2310 static unsigned long long stringToULLbounded(
2311     const std::string& s,
2312     std::size_t& pos,
2313     int base = 0,
2314     std::ptrdiff_t minlen = 1,
2315     std::size_t maxlen = std::string::npos
2316 )
2317 {
2318     std::string sub = s.substr(pos, maxlen);
2319     const char* start = sub.c_str();
2320     char* end;
2321     unsigned long long value = std::strtoull(start, &end, base);
2322     pos += end - start;
2323     if (end - start < minlen)
2324         throw std::runtime_error("expected digit");
2325     return value;
2326 }
2327 
2328 /* Converts character literal (including prefix, but not ud-suffix)
2329  * to long long value.
2330  *
2331  * Assumes ASCII-compatible single-byte encoded str for narrow literals
2332  * and UTF-8 otherwise.
2333  *
2334  * For target assumes
2335  * - execution character set encoding matching str
2336  * - UTF-32 execution wide-character set encoding
2337  * - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied
2338  * - char16_t is 16bit wide
2339  * - char32_t is 32bit wide
2340  * - wchar_t is 32bit wide and unsigned
2341  * - matching char signedness to host
2342  * - matching sizeof(int) to host
2343  *
2344  * For host assumes
2345  * - ASCII-compatible execution character set
2346  *
2347  * For host and target assumes
2348  * - CHAR_BIT == 8
2349  * - two's complement
2350  *
2351  * Implements multi-character narrow literals according to GCC's behavior,
2352  * except multi code unit universal character names are not supported.
2353  * Multi-character wide literals are not supported.
2354  * Limited support of universal character names for non-UTF-8 execution character set encodings.
2355  */
characterLiteralToLL(const std::string & str)2356 long long simplecpp::characterLiteralToLL(const std::string& str)
2357 {
2358     // default is wide/utf32
2359     bool narrow = false;
2360     bool utf8 = false;
2361     bool utf16 = false;
2362 
2363     std::size_t pos;
2364 
2365     if (str.size() >= 1 && str[0] == '\'') {
2366         narrow = true;
2367         pos = 1;
2368     } else if (str.size() >= 2 && str[0] == 'u' && str[1] == '\'') {
2369         utf16 = true;
2370         pos = 2;
2371     } else if (str.size() >= 3 && str[0] == 'u' && str[1] == '8' && str[2] == '\'') {
2372         utf8 = true;
2373         pos = 3;
2374     } else if (str.size() >= 2 && (str[0] == 'L' || str[0] == 'U') && str[1] == '\'') {
2375         pos = 2;
2376     } else
2377         throw std::runtime_error("expected a character literal");
2378 
2379     unsigned long long multivalue = 0;
2380 
2381     std::size_t nbytes = 0;
2382 
2383     while (pos + 1 < str.size()) {
2384         if (str[pos] == '\'' || str[pos] == '\n')
2385             throw std::runtime_error("raw single quotes and newlines not allowed in character literals");
2386 
2387         if (nbytes >= 1 && !narrow)
2388             throw std::runtime_error("multiple characters only supported in narrow character literals");
2389 
2390         unsigned long long value;
2391 
2392         if (str[pos] == '\\') {
2393             pos++;
2394             char escape = str[pos++];
2395 
2396             if (pos >= str.size())
2397                 throw std::runtime_error("unexpected end of character literal");
2398 
2399             switch (escape) {
2400             // obscure GCC extensions
2401             case '%':
2402             case '(':
2403             case '[':
2404             case '{':
2405             // standard escape sequences
2406             case '\'':
2407             case '"':
2408             case '?':
2409             case '\\':
2410                 value = static_cast<unsigned char>(escape);
2411                 break;
2412 
2413             case 'a':
2414                 value = static_cast<unsigned char>('\a');
2415                 break;
2416             case 'b':
2417                 value = static_cast<unsigned char>('\b');
2418                 break;
2419             case 'f':
2420                 value = static_cast<unsigned char>('\f');
2421                 break;
2422             case 'n':
2423                 value = static_cast<unsigned char>('\n');
2424                 break;
2425             case 'r':
2426                 value = static_cast<unsigned char>('\r');
2427                 break;
2428             case 't':
2429                 value = static_cast<unsigned char>('\t');
2430                 break;
2431             case 'v':
2432                 value = static_cast<unsigned char>('\v');
2433                 break;
2434 
2435             // GCC extension for ESC character
2436             case 'e':
2437             case 'E':
2438                 value = static_cast<unsigned char>('\x1b');
2439                 break;
2440 
2441             case '0':
2442             case '1':
2443             case '2':
2444             case '3':
2445             case '4':
2446             case '5':
2447             case '6':
2448             case '7':
2449                 // octal escape sequences consist of 1 to 3 digits
2450                 value = stringToULLbounded(str, --pos, 8, 1, 3);
2451                 break;
2452 
2453             case 'x':
2454                 // hexadecimal escape sequences consist of at least 1 digit
2455                 value = stringToULLbounded(str, pos, 16);
2456                 break;
2457 
2458             case 'u':
2459             case 'U': {
2460                 // universal character names have exactly 4 or 8 digits
2461                 std::size_t ndigits = (escape == 'u' ? 4 : 8);
2462                 value = stringToULLbounded(str, pos, 16, ndigits, ndigits);
2463 
2464                 // UTF-8 encodes code points above 0x7f in multiple code units
2465                 // code points above 0x10ffff are not allowed
2466                 if (((narrow || utf8) && value > 0x7f) || (utf16 && value > 0xffff) || value > 0x10ffff)
2467                     throw std::runtime_error("code point too large");
2468 
2469                 if (value >= 0xd800 && value <= 0xdfff)
2470                     throw std::runtime_error("surrogate code points not allowed in universal character names");
2471 
2472                 break;
2473             }
2474 
2475             default:
2476                 throw std::runtime_error("invalid escape sequence");
2477             }
2478         } else {
2479             value = static_cast<unsigned char>(str[pos++]);
2480 
2481             if (!narrow && value >= 0x80) {
2482                 // Assuming this is a UTF-8 encoded code point.
2483                 // This decoder may not completely validate the input.
2484                 // Noncharacters are neither rejected nor replaced.
2485 
2486                 int additional_bytes;
2487                 if (value >= 0xf5)  // higher values would result in code points above 0x10ffff
2488                     throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
2489                 else if (value >= 0xf0)
2490                     additional_bytes = 3;
2491                 else if (value >= 0xe0)
2492                     additional_bytes = 2;
2493                 else if (value >= 0xc2) // 0xc0 and 0xc1 are always overlong 2-bytes encodings
2494                     additional_bytes = 1;
2495                 else
2496                     throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
2497 
2498                 value &= (1 << (6 - additional_bytes)) - 1;
2499 
2500                 while (additional_bytes--) {
2501                     if (pos + 1 >= str.size())
2502                         throw std::runtime_error("assumed UTF-8 encoded source, but character literal ends unexpectedly");
2503 
2504                     unsigned char c = str[pos++];
2505 
2506                     if (((c >> 6) != 2)    // ensure c has form 0xb10xxxxxx
2507                         || (!value && additional_bytes == 1 && c < 0xa0)    // overlong 3-bytes encoding
2508                         || (!value && additional_bytes == 2 && c < 0x90))   // overlong 4-bytes encoding
2509                         throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
2510 
2511                     value = (value << 6) | (c & ((1 << 7) - 1));
2512                 }
2513 
2514                 if (value >= 0xd800 && value <= 0xdfff)
2515                     throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
2516 
2517                 if ((utf8 && value > 0x7f) || (utf16 && value > 0xffff) || value > 0x10ffff)
2518                     throw std::runtime_error("code point too large");
2519             }
2520         }
2521 
2522         if (((narrow || utf8) && value > std::numeric_limits<unsigned char>::max()) || (utf16 && value >> 16) || value >> 32)
2523             throw std::runtime_error("numeric escape sequence too large");
2524 
2525         multivalue <<= CHAR_BIT;
2526         multivalue |= value;
2527         nbytes++;
2528     }
2529 
2530     if (pos + 1 != str.size() || str[pos] != '\'')
2531         throw std::runtime_error("missing closing quote in character literal");
2532 
2533     if (!nbytes)
2534         throw std::runtime_error("empty character literal");
2535 
2536     // ordinary narrow character literal's value is determined by (possibly signed) char
2537     if (narrow && nbytes == 1)
2538         return static_cast<char>(multivalue);
2539 
2540     // while multi-character literal's value is determined by (signed) int
2541     if (narrow)
2542         return static_cast<int>(multivalue);
2543 
2544     // All other cases are unsigned. Since long long is at least 64bit wide,
2545     // while the literals at most 32bit wide, the conversion preserves all values.
2546     return multivalue;
2547 }
2548 
simplifyNumbers(simplecpp::TokenList & expr)2549 static void simplifyNumbers(simplecpp::TokenList &expr)
2550 {
2551     for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) {
2552         if (tok->str().size() == 1U)
2553             continue;
2554         if (tok->str().compare(0,2,"0x") == 0)
2555             tok->setstr(toString(stringToULL(tok->str())));
2556         else if (!tok->number && tok->str().find('\'') != tok->str().npos)
2557             tok->setstr(toString(simplecpp::characterLiteralToLL(tok->str())));
2558     }
2559 }
2560 
evaluate(simplecpp::TokenList & expr,const std::map<std::string,std::size_t> & sizeOfType)2561 static long long evaluate(simplecpp::TokenList &expr, const std::map<std::string, std::size_t> &sizeOfType)
2562 {
2563     simplifySizeof(expr, sizeOfType);
2564     simplifyName(expr);
2565     simplifyNumbers(expr);
2566     expr.constFold();
2567     // TODO: handle invalid expressions
2568     return expr.cfront() && expr.cfront() == expr.cback() && expr.cfront()->number ? stringToLL(expr.cfront()->str()) : 0LL;
2569 }
2570 
gotoNextLine(const simplecpp::Token * tok)2571 static const simplecpp::Token *gotoNextLine(const simplecpp::Token *tok)
2572 {
2573     const unsigned int line = tok->location.line;
2574     const unsigned int file = tok->location.fileIndex;
2575     while (tok && tok->location.line == line && tok->location.fileIndex == file)
2576         tok = tok->next;
2577     return tok;
2578 }
2579 
2580 #ifdef SIMPLECPP_WINDOWS
2581 
2582 class NonExistingFilesCache {
2583 public:
NonExistingFilesCache()2584     NonExistingFilesCache() {
2585         InitializeCriticalSection(&m_criticalSection);
2586     }
2587 
~NonExistingFilesCache()2588     ~NonExistingFilesCache() {
2589         DeleteCriticalSection(&m_criticalSection);
2590     }
2591 
contains(const std::string & path)2592     bool contains(const std::string& path) {
2593         ScopedLock lock(m_criticalSection);
2594         return (m_pathSet.find(path) != m_pathSet.end());
2595     }
2596 
add(const std::string & path)2597     void add(const std::string& path) {
2598         ScopedLock lock(m_criticalSection);
2599         m_pathSet.insert(path);
2600     }
2601 
2602 private:
2603     std::set<std::string> m_pathSet;
2604     CRITICAL_SECTION m_criticalSection;
2605 };
2606 
2607 static NonExistingFilesCache nonExistingFilesCache;
2608 
2609 #endif
2610 
_openHeader(std::ifstream & f,const std::string & path)2611 static std::string _openHeader(std::ifstream &f, const std::string &path)
2612 {
2613 #ifdef SIMPLECPP_WINDOWS
2614     std::string simplePath = simplecpp::simplifyPath(path);
2615     if (nonExistingFilesCache.contains(simplePath))
2616         return "";  // file is known not to exist, skip expensive file open call
2617 
2618     f.open(simplePath.c_str());
2619     if (f.is_open())
2620         return simplePath;
2621     else {
2622         nonExistingFilesCache.add(simplePath);
2623         return "";
2624     }
2625 #else
2626     f.open(path.c_str());
2627     return f.is_open() ? simplecpp::simplifyPath(path) : "";
2628 #endif
2629 }
2630 
getRelativeFileName(const std::string & sourcefile,const std::string & header)2631 static std::string getRelativeFileName(const std::string &sourcefile, const std::string &header)
2632 {
2633     if (sourcefile.find_first_of("\\/") != std::string::npos)
2634         return simplecpp::simplifyPath(sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header);
2635     return simplecpp::simplifyPath(header);
2636 }
2637 
openHeaderRelative(std::ifstream & f,const std::string & sourcefile,const std::string & header)2638 static std::string openHeaderRelative(std::ifstream &f, const std::string &sourcefile, const std::string &header)
2639 {
2640     return _openHeader(f, getRelativeFileName(sourcefile, header));
2641 }
2642 
getIncludePathFileName(const std::string & includePath,const std::string & header)2643 static std::string getIncludePathFileName(const std::string &includePath, const std::string &header)
2644 {
2645     std::string path = includePath;
2646     if (!path.empty() && path[path.size()-1U]!='/' && path[path.size()-1U]!='\\')
2647         path += '/';
2648     return path + header;
2649 }
2650 
openHeaderIncludePath(std::ifstream & f,const simplecpp::DUI & dui,const std::string & header)2651 static std::string openHeaderIncludePath(std::ifstream &f, const simplecpp::DUI &dui, const std::string &header)
2652 {
2653     for (std::list<std::string>::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) {
2654         std::string simplePath = _openHeader(f, getIncludePathFileName(*it, header));
2655         if (!simplePath.empty())
2656             return simplePath;
2657     }
2658     return "";
2659 }
2660 
openHeader(std::ifstream & f,const simplecpp::DUI & dui,const std::string & sourcefile,const std::string & header,bool systemheader)2661 static std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader)
2662 {
2663     if (isAbsolutePath(header))
2664         return _openHeader(f, header);
2665 
2666     std::string ret;
2667 
2668     if (systemheader) {
2669         ret = openHeaderIncludePath(f, dui, header);
2670         return ret.empty() ? openHeaderRelative(f, sourcefile, header) : ret;
2671     }
2672 
2673     ret = openHeaderRelative(f, sourcefile, header);
2674     return ret.empty() ? openHeaderIncludePath(f, dui, header) : ret;
2675 }
2676 
getFileName(const std::map<std::string,simplecpp::TokenList * > & filedata,const std::string & sourcefile,const std::string & header,const simplecpp::DUI & dui,bool systemheader)2677 static std::string getFileName(const std::map<std::string, simplecpp::TokenList *> &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader)
2678 {
2679     if (filedata.empty()) {
2680         return "";
2681     }
2682     if (isAbsolutePath(header)) {
2683         return (filedata.find(header) != filedata.end()) ? simplecpp::simplifyPath(header) : "";
2684     }
2685 
2686     const std::string relativeFilename = getRelativeFileName(sourcefile, header);
2687     if (!systemheader && filedata.find(relativeFilename) != filedata.end())
2688         return relativeFilename;
2689 
2690     for (std::list<std::string>::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) {
2691         std::string s = simplecpp::simplifyPath(getIncludePathFileName(*it, header));
2692         if (filedata.find(s) != filedata.end())
2693             return s;
2694     }
2695 
2696     if (filedata.find(relativeFilename) != filedata.end())
2697         return relativeFilename;
2698 
2699     return "";
2700 }
2701 
hasFile(const std::map<std::string,simplecpp::TokenList * > & filedata,const std::string & sourcefile,const std::string & header,const simplecpp::DUI & dui,bool systemheader)2702 static bool hasFile(const std::map<std::string, simplecpp::TokenList *> &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader)
2703 {
2704     return !getFileName(filedata, sourcefile, header, dui, systemheader).empty();
2705 }
2706 
load(const simplecpp::TokenList & rawtokens,std::vector<std::string> & fileNumbers,const simplecpp::DUI & dui,simplecpp::OutputList * outputList)2707 std::map<std::string, simplecpp::TokenList*> simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector<std::string> &fileNumbers, const simplecpp::DUI &dui, simplecpp::OutputList *outputList)
2708 {
2709     std::map<std::string, simplecpp::TokenList*> ret;
2710 
2711     std::list<const Token *> filelist;
2712 
2713     // -include files
2714     for (std::list<std::string>::const_iterator it = dui.includes.begin(); it != dui.includes.end(); ++it) {
2715         const std::string &filename = realFilename(*it);
2716 
2717         if (ret.find(filename) != ret.end())
2718             continue;
2719 
2720         std::ifstream fin(filename.c_str());
2721         if (!fin.is_open()) {
2722             if (outputList) {
2723                 simplecpp::Output err(fileNumbers);
2724                 err.type = simplecpp::Output::EXPLICIT_INCLUDE_NOT_FOUND;
2725                 err.location = Location(fileNumbers);
2726                 err.msg = "Can not open include file '" + filename + "' that is explicitly included.";
2727                 outputList->push_back(err);
2728             }
2729             continue;
2730         }
2731 
2732         TokenList *tokenlist = new TokenList(fin, fileNumbers, filename, outputList);
2733         if (!tokenlist->front()) {
2734             delete tokenlist;
2735             continue;
2736         }
2737 
2738         ret[filename] = tokenlist;
2739         filelist.push_back(tokenlist->front());
2740     }
2741 
2742     for (const Token *rawtok = rawtokens.cfront(); rawtok || !filelist.empty(); rawtok = rawtok ? rawtok->next : NULL) {
2743         if (rawtok == NULL) {
2744             rawtok = filelist.back();
2745             filelist.pop_back();
2746         }
2747 
2748         if (rawtok->op != '#' || sameline(rawtok->previousSkipComments(), rawtok))
2749             continue;
2750 
2751         rawtok = rawtok->nextSkipComments();
2752         if (!rawtok || rawtok->str() != INCLUDE)
2753             continue;
2754 
2755         const std::string &sourcefile = rawtok->location.file();
2756 
2757         const Token *htok = rawtok->nextSkipComments();
2758         if (!sameline(rawtok, htok))
2759             continue;
2760 
2761         bool systemheader = (htok->str()[0] == '<');
2762         const std::string header(realFilename(htok->str().substr(1U, htok->str().size() - 2U)));
2763         if (hasFile(ret, sourcefile, header, dui, systemheader))
2764             continue;
2765 
2766         std::ifstream f;
2767         const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader);
2768         if (!f.is_open())
2769             continue;
2770 
2771         TokenList *tokens = new TokenList(f, fileNumbers, header2, outputList);
2772         ret[header2] = tokens;
2773         if (tokens->front())
2774             filelist.push_back(tokens->front());
2775     }
2776 
2777     return ret;
2778 }
2779 
preprocessToken(simplecpp::TokenList & output,const simplecpp::Token ** tok1,std::map<std::string,simplecpp::Macro> & macros,std::vector<std::string> & files,simplecpp::OutputList * outputList)2780 static bool preprocessToken(simplecpp::TokenList &output, const simplecpp::Token **tok1, std::map<std::string, simplecpp::Macro> &macros, std::vector<std::string> &files, simplecpp::OutputList *outputList)
2781 {
2782     const simplecpp::Token *tok = *tok1;
2783     const std::map<std::string,simplecpp::Macro>::const_iterator it = macros.find(tok->str());
2784     if (it != macros.end()) {
2785         simplecpp::TokenList value(files);
2786         try {
2787             *tok1 = it->second.expand(&value, tok, macros, files);
2788         } catch (simplecpp::Macro::Error &err) {
2789             if (outputList) {
2790                 simplecpp::Output out(files);
2791                 out.type = simplecpp::Output::SYNTAX_ERROR;
2792                 out.location = err.location;
2793                 out.msg = "failed to expand \'" + tok->str() + "\', " + err.what;
2794                 outputList->push_back(out);
2795             }
2796             return false;
2797         }
2798         output.takeTokens(value);
2799     } else {
2800         if (!tok->comment)
2801             output.push_back(new simplecpp::Token(*tok));
2802         *tok1 = tok->next;
2803     }
2804     return true;
2805 }
2806 
preprocess(simplecpp::TokenList & output,const simplecpp::TokenList & rawtokens,std::vector<std::string> & files,std::map<std::string,simplecpp::TokenList * > & filedata,const simplecpp::DUI & dui,simplecpp::OutputList * outputList,std::list<simplecpp::MacroUsage> * macroUsage,std::list<simplecpp::IfCond> * ifCond)2807 void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector<std::string> &files, std::map<std::string, simplecpp::TokenList *> &filedata, const simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list<simplecpp::MacroUsage> *macroUsage, std::list<simplecpp::IfCond> *ifCond)
2808 {
2809     std::map<std::string, std::size_t> sizeOfType(rawtokens.sizeOfType);
2810     sizeOfType.insert(std::make_pair("char", sizeof(char)));
2811     sizeOfType.insert(std::make_pair("short", sizeof(short)));
2812     sizeOfType.insert(std::make_pair("short int", sizeOfType["short"]));
2813     sizeOfType.insert(std::make_pair("int", sizeof(int)));
2814     sizeOfType.insert(std::make_pair("long", sizeof(long)));
2815     sizeOfType.insert(std::make_pair("long int", sizeOfType["long"]));
2816     sizeOfType.insert(std::make_pair("long long", sizeof(long long)));
2817     sizeOfType.insert(std::make_pair("float", sizeof(float)));
2818     sizeOfType.insert(std::make_pair("double", sizeof(double)));
2819     sizeOfType.insert(std::make_pair("long double", sizeof(long double)));
2820     sizeOfType.insert(std::make_pair("char *", sizeof(char *)));
2821     sizeOfType.insert(std::make_pair("short *", sizeof(short *)));
2822     sizeOfType.insert(std::make_pair("short int *", sizeOfType["short *"]));
2823     sizeOfType.insert(std::make_pair("int *", sizeof(int *)));
2824     sizeOfType.insert(std::make_pair("long *", sizeof(long *)));
2825     sizeOfType.insert(std::make_pair("long int *", sizeOfType["long *"]));
2826     sizeOfType.insert(std::make_pair("long long *", sizeof(long long *)));
2827     sizeOfType.insert(std::make_pair("float *", sizeof(float *)));
2828     sizeOfType.insert(std::make_pair("double *", sizeof(double *)));
2829     sizeOfType.insert(std::make_pair("long double *", sizeof(long double *)));
2830 
2831     const bool hasInclude = (dui.std.size() == 5 && dui.std.compare(0,3,"c++") == 0 && dui.std >= "c++17");
2832     std::map<TokenString, Macro> macros;
2833     for (std::list<std::string>::const_iterator it = dui.defines.begin(); it != dui.defines.end(); ++it) {
2834         const std::string &macrostr = *it;
2835         const std::string::size_type eq = macrostr.find('=');
2836         const std::string::size_type par = macrostr.find('(');
2837         const std::string macroname = macrostr.substr(0, std::min(eq,par));
2838         if (dui.undefined.find(macroname) != dui.undefined.end())
2839             continue;
2840         const std::string lhs(macrostr.substr(0,eq));
2841         const std::string rhs(eq==std::string::npos ? std::string("1") : macrostr.substr(eq+1));
2842         const Macro macro(lhs, rhs, files);
2843         macros.insert(std::pair<TokenString,Macro>(macro.name(), macro));
2844     }
2845 
2846     macros.insert(std::make_pair("__FILE__", Macro("__FILE__", "__FILE__", files)));
2847     macros.insert(std::make_pair("__LINE__", Macro("__LINE__", "__LINE__", files)));
2848     macros.insert(std::make_pair("__COUNTER__", Macro("__COUNTER__", "__COUNTER__", files)));
2849 
2850     if (dui.std == "c++11")
2851         macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "201103L", files)));
2852     else if (dui.std == "c++14")
2853         macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "201402L", files)));
2854     else if (dui.std == "c++17")
2855         macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "201703L", files)));
2856     else if (dui.std == "c++20")
2857         macros.insert(std::make_pair("__cplusplus", Macro("__cplusplus", "202002L", files)));
2858 
2859     // TRUE => code in current #if block should be kept
2860     // ELSE_IS_TRUE => code in current #if block should be dropped. the code in the #else should be kept.
2861     // ALWAYS_FALSE => drop all code in #if and #else
2862     enum IfState { TRUE, ELSE_IS_TRUE, ALWAYS_FALSE };
2863     std::stack<int> ifstates;
2864     ifstates.push(TRUE);
2865 
2866     std::stack<const Token *> includetokenstack;
2867 
2868     std::set<std::string> pragmaOnce;
2869 
2870     includetokenstack.push(rawtokens.cfront());
2871     for (std::list<std::string>::const_iterator it = dui.includes.begin(); it != dui.includes.end(); ++it) {
2872         const std::map<std::string, TokenList*>::const_iterator f = filedata.find(*it);
2873         if (f != filedata.end())
2874             includetokenstack.push(f->second->cfront());
2875     }
2876 
2877     for (const Token *rawtok = NULL; rawtok || !includetokenstack.empty();) {
2878         if (rawtok == NULL) {
2879             rawtok = includetokenstack.top();
2880             includetokenstack.pop();
2881             continue;
2882         }
2883 
2884         if (rawtok->op == '#' && !sameline(rawtok->previous, rawtok)) {
2885             if (!sameline(rawtok, rawtok->next)) {
2886                 rawtok = rawtok->next;
2887                 continue;
2888             }
2889             rawtok = rawtok->next;
2890             if (!rawtok->name) {
2891                 rawtok = gotoNextLine(rawtok);
2892                 continue;
2893             }
2894 
2895             if (ifstates.size() <= 1U && (rawtok->str() == ELIF || rawtok->str() == ELSE || rawtok->str() == ENDIF)) {
2896                 if (outputList) {
2897                     simplecpp::Output err(files);
2898                     err.type = Output::SYNTAX_ERROR;
2899                     err.location = rawtok->location;
2900                     err.msg = "#" + rawtok->str() + " without #if";
2901                     outputList->push_back(err);
2902                 }
2903                 output.clear();
2904                 return;
2905             }
2906 
2907             if (ifstates.top() == TRUE && (rawtok->str() == ERROR || rawtok->str() == WARNING)) {
2908                 if (outputList) {
2909                     simplecpp::Output err(rawtok->location.files);
2910                     err.type = rawtok->str() == ERROR ? Output::ERROR : Output::WARNING;
2911                     err.location = rawtok->location;
2912                     for (const Token *tok = rawtok->next; tok && sameline(rawtok,tok); tok = tok->next) {
2913                         if (!err.msg.empty() && isNameChar(tok->str()[0]))
2914                             err.msg += ' ';
2915                         err.msg += tok->str();
2916                     }
2917                     err.msg = '#' + rawtok->str() + ' ' + err.msg;
2918                     outputList->push_back(err);
2919                 }
2920                 if (rawtok->str() == ERROR) {
2921                     output.clear();
2922                     return;
2923                 }
2924             }
2925 
2926             if (rawtok->str() == DEFINE) {
2927                 if (ifstates.top() != TRUE)
2928                     continue;
2929                 try {
2930                     const Macro &macro = Macro(rawtok->previous, files);
2931                     if (dui.undefined.find(macro.name()) == dui.undefined.end()) {
2932                         std::map<TokenString, Macro>::iterator it = macros.find(macro.name());
2933                         if (it == macros.end())
2934                             macros.insert(std::pair<TokenString, Macro>(macro.name(), macro));
2935                         else
2936                             it->second = macro;
2937                     }
2938                 } catch (const std::runtime_error &) {
2939                     if (outputList) {
2940                         simplecpp::Output err(files);
2941                         err.type = Output::SYNTAX_ERROR;
2942                         err.location = rawtok->location;
2943                         err.msg = "Failed to parse #define";
2944                         outputList->push_back(err);
2945                     }
2946                     output.clear();
2947                     return;
2948                 }
2949             } else if (ifstates.top() == TRUE && rawtok->str() == INCLUDE) {
2950                 TokenList inc1(files);
2951                 for (const Token *inctok = rawtok->next; sameline(rawtok,inctok); inctok = inctok->next) {
2952                     if (!inctok->comment)
2953                         inc1.push_back(new Token(*inctok));
2954                 }
2955                 TokenList inc2(files);
2956                 if (!inc1.empty() && inc1.cfront()->name) {
2957                     const Token *inctok = inc1.cfront();
2958                     if (!preprocessToken(inc2, &inctok, macros, files, outputList)) {
2959                         output.clear();
2960                         return;
2961                     }
2962                 } else {
2963                     inc2.takeTokens(inc1);
2964                 }
2965 
2966                 if (!inc2.empty() && inc2.cfront()->op == '<' && inc2.cback()->op == '>') {
2967                     TokenString hdr;
2968                     // TODO: Sometimes spaces must be added in the string
2969                     // Somehow preprocessToken etc must be told that the location should be source location not destination location
2970                     for (const Token *tok = inc2.cfront(); tok; tok = tok->next) {
2971                         hdr += tok->str();
2972                     }
2973                     inc2.clear();
2974                     inc2.push_back(new Token(hdr, inc1.cfront()->location));
2975                     inc2.front()->op = '<';
2976                 }
2977 
2978                 if (inc2.empty() || inc2.cfront()->str().size() <= 2U) {
2979                     if (outputList) {
2980                         simplecpp::Output err(files);
2981                         err.type = Output::SYNTAX_ERROR;
2982                         err.location = rawtok->location;
2983                         err.msg = "No header in #include";
2984                         outputList->push_back(err);
2985                     }
2986                     output.clear();
2987                     return;
2988                 }
2989 
2990                 const Token *inctok = inc2.cfront();
2991 
2992                 const bool systemheader = (inctok->op == '<');
2993                 const std::string header(realFilename(inctok->str().substr(1U, inctok->str().size() - 2U)));
2994                 std::string header2 = getFileName(filedata, rawtok->location.file(), header, dui, systemheader);
2995                 if (header2.empty()) {
2996                     // try to load file..
2997                     std::ifstream f;
2998                     header2 = openHeader(f, dui, rawtok->location.file(), header, systemheader);
2999                     if (f.is_open()) {
3000                         TokenList *tokens = new TokenList(f, files, header2, outputList);
3001                         filedata[header2] = tokens;
3002                     }
3003                 }
3004                 if (header2.empty()) {
3005                     if (outputList) {
3006                         simplecpp::Output out(files);
3007                         out.type = Output::MISSING_HEADER;
3008                         out.location = rawtok->location;
3009                         out.msg = "Header not found: " + inctok->str();
3010                         outputList->push_back(out);
3011                     }
3012                 } else if (includetokenstack.size() >= 400) {
3013                     if (outputList) {
3014                         simplecpp::Output out(files);
3015                         out.type = Output::INCLUDE_NESTED_TOO_DEEPLY;
3016                         out.location = rawtok->location;
3017                         out.msg = "#include nested too deeply";
3018                         outputList->push_back(out);
3019                     }
3020                 } else if (pragmaOnce.find(header2) == pragmaOnce.end()) {
3021                     includetokenstack.push(gotoNextLine(rawtok));
3022                     const TokenList *includetokens = filedata.find(header2)->second;
3023                     rawtok = includetokens ? includetokens->cfront() : NULL;
3024                     continue;
3025                 }
3026             } else if (rawtok->str() == IF || rawtok->str() == IFDEF || rawtok->str() == IFNDEF || rawtok->str() == ELIF) {
3027                 if (!sameline(rawtok,rawtok->next)) {
3028                     if (outputList) {
3029                         simplecpp::Output out(files);
3030                         out.type = Output::SYNTAX_ERROR;
3031                         out.location = rawtok->location;
3032                         out.msg = "Syntax error in #" + rawtok->str();
3033                         outputList->push_back(out);
3034                     }
3035                     output.clear();
3036                     return;
3037                 }
3038 
3039                 bool conditionIsTrue;
3040                 if (ifstates.top() == ALWAYS_FALSE || (ifstates.top() == ELSE_IS_TRUE && rawtok->str() != ELIF))
3041                     conditionIsTrue = false;
3042                 else if (rawtok->str() == IFDEF)
3043                     conditionIsTrue = (macros.find(rawtok->next->str()) != macros.end() || (hasInclude && rawtok->next->str() == HAS_INCLUDE));
3044                 else if (rawtok->str() == IFNDEF)
3045                     conditionIsTrue = (macros.find(rawtok->next->str()) == macros.end() && !(hasInclude && rawtok->next->str() == HAS_INCLUDE));
3046                 else { /*if (rawtok->str() == IF || rawtok->str() == ELIF)*/
3047                     TokenList expr(files);
3048                     for (const Token *tok = rawtok->next; tok && tok->location.sameline(rawtok->location); tok = tok->next) {
3049                         if (!tok->name) {
3050                             expr.push_back(new Token(*tok));
3051                             continue;
3052                         }
3053 
3054                         if (tok->str() == DEFINED) {
3055                             tok = tok->next;
3056                             const bool par = (tok && tok->op == '(');
3057                             if (par)
3058                                 tok = tok->next;
3059                             if (tok) {
3060                                 if (macros.find(tok->str()) != macros.end())
3061                                     expr.push_back(new Token("1", tok->location));
3062                                 else if (hasInclude && tok->str() == HAS_INCLUDE)
3063                                     expr.push_back(new Token("1", tok->location));
3064                                 else
3065                                     expr.push_back(new Token("0", tok->location));
3066                             }
3067                             if (par)
3068                                 tok = tok ? tok->next : NULL;
3069                             if (!tok || !sameline(rawtok,tok) || (par && tok->op != ')')) {
3070                                 if (outputList) {
3071                                     Output out(rawtok->location.files);
3072                                     out.type = Output::SYNTAX_ERROR;
3073                                     out.location = rawtok->location;
3074                                     out.msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition";
3075                                     outputList->push_back(out);
3076                                 }
3077                                 output.clear();
3078                                 return;
3079                             }
3080                             continue;
3081                         }
3082 
3083                         if (hasInclude && tok->str() == HAS_INCLUDE) {
3084                             tok = tok->next;
3085                             const bool par = (tok && tok->op == '(');
3086                             if (par)
3087                                 tok = tok->next;
3088                             if (tok) {
3089                                 const std::string &sourcefile = rawtok->location.file();
3090                                 const bool systemheader = (tok->str()[0] == '<');
3091                                 const std::string header(realFilename(tok->str().substr(1U, tok->str().size() - 2U)));
3092                                 std::ifstream f;
3093                                 const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader);
3094                                 expr.push_back(new Token(header2.empty() ? "0" : "1", tok->location));
3095                             }
3096                             if (par)
3097                                 tok = tok ? tok->next : NULL;
3098                             if (!tok || !sameline(rawtok,tok) || (par && tok->op != ')')) {
3099                                 if (outputList) {
3100                                     Output out(rawtok->location.files);
3101                                     out.type = Output::SYNTAX_ERROR;
3102                                     out.location = rawtok->location;
3103                                     out.msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition";
3104                                     outputList->push_back(out);
3105                                 }
3106                                 output.clear();
3107                                 return;
3108                             }
3109                             continue;
3110                         }
3111 
3112                         const Token *tmp = tok;
3113                         if (!preprocessToken(expr, &tmp, macros, files, outputList)) {
3114                             output.clear();
3115                             return;
3116                         }
3117                         if (!tmp)
3118                             break;
3119                         tok = tmp->previous;
3120                     }
3121                     try {
3122                         if (ifCond) {
3123                             std::string E;
3124                             for (const simplecpp::Token *tok = expr.cfront(); tok; tok = tok->next)
3125                                 E += (E.empty() ? "" : " ") + tok->str();
3126                             const long long result = evaluate(expr, sizeOfType);
3127                             conditionIsTrue = (result != 0);
3128                             ifCond->push_back(IfCond(rawtok->location, E, result));
3129                         } else {
3130                             const long long result = evaluate(expr, sizeOfType);
3131                             conditionIsTrue = (result != 0);
3132                         }
3133                     } catch (const std::exception &e) {
3134                         if (outputList) {
3135                             Output out(rawtok->location.files);
3136                             out.type = Output::SYNTAX_ERROR;
3137                             out.location = rawtok->location;
3138                             out.msg = "failed to evaluate " + std::string(rawtok->str() == IF ? "#if" : "#elif") + " condition";
3139                             if (e.what() && *e.what())
3140                                 out.msg += std::string(", ") + e.what();
3141                             outputList->push_back(out);
3142                         }
3143                         output.clear();
3144                         return;
3145                     }
3146                 }
3147 
3148                 if (rawtok->str() != ELIF) {
3149                     // push a new ifstate..
3150                     if (ifstates.top() != TRUE)
3151                         ifstates.push(ALWAYS_FALSE);
3152                     else
3153                         ifstates.push(conditionIsTrue ? TRUE : ELSE_IS_TRUE);
3154                 } else if (ifstates.top() == TRUE) {
3155                     ifstates.top() = ALWAYS_FALSE;
3156                 } else if (ifstates.top() == ELSE_IS_TRUE && conditionIsTrue) {
3157                     ifstates.top() = TRUE;
3158                 }
3159             } else if (rawtok->str() == ELSE) {
3160                 ifstates.top() = (ifstates.top() == ELSE_IS_TRUE) ? TRUE : ALWAYS_FALSE;
3161             } else if (rawtok->str() == ENDIF) {
3162                 ifstates.pop();
3163             } else if (rawtok->str() == UNDEF) {
3164                 if (ifstates.top() == TRUE) {
3165                     const Token *tok = rawtok->next;
3166                     while (sameline(rawtok,tok) && tok->comment)
3167                         tok = tok->next;
3168                     if (sameline(rawtok, tok))
3169                         macros.erase(tok->str());
3170                 }
3171             } else if (ifstates.top() == TRUE && rawtok->str() == PRAGMA && rawtok->next && rawtok->next->str() == ONCE && sameline(rawtok,rawtok->next)) {
3172                 pragmaOnce.insert(rawtok->location.file());
3173             }
3174             rawtok = gotoNextLine(rawtok);
3175             continue;
3176         }
3177 
3178         if (ifstates.top() != TRUE) {
3179             // drop code
3180             rawtok = gotoNextLine(rawtok);
3181             continue;
3182         }
3183 
3184         bool hash=false, hashhash=false;
3185         if (rawtok->op == '#' && sameline(rawtok,rawtok->next)) {
3186             if (rawtok->next->op != '#') {
3187                 hash = true;
3188                 rawtok = rawtok->next; // skip '#'
3189             } else if (sameline(rawtok,rawtok->next->next)) {
3190                 hashhash = true;
3191                 rawtok = rawtok->next->next; // skip '#' '#'
3192             }
3193         }
3194 
3195         const Location loc(rawtok->location);
3196         TokenList tokens(files);
3197 
3198         if (!preprocessToken(tokens, &rawtok, macros, files, outputList)) {
3199             output.clear();
3200             return;
3201         }
3202 
3203         if (hash || hashhash) {
3204             std::string s;
3205             for (const Token *hashtok = tokens.cfront(); hashtok; hashtok = hashtok->next)
3206                 s += hashtok->str();
3207             if (hash)
3208                 output.push_back(new Token('\"' + s + '\"', loc));
3209             else if (output.back())
3210                 output.back()->setstr(output.cback()->str() + s);
3211             else
3212                 output.push_back(new Token(s, loc));
3213         } else {
3214             output.takeTokens(tokens);
3215         }
3216     }
3217 
3218     if (macroUsage) {
3219         for (std::map<TokenString, simplecpp::Macro>::const_iterator macroIt = macros.begin(); macroIt != macros.end(); ++macroIt) {
3220             const Macro &macro = macroIt->second;
3221             const std::list<Location> &usage = macro.usage();
3222             for (std::list<Location>::const_iterator usageIt = usage.begin(); usageIt != usage.end(); ++usageIt) {
3223                 MacroUsage mu(usageIt->files, macro.valueDefinedInCode());
3224                 mu.macroName = macro.name();
3225                 mu.macroLocation = macro.defineLocation();
3226                 mu.useLocation = *usageIt;
3227                 macroUsage->push_back(mu);
3228             }
3229         }
3230     }
3231 }
3232 
cleanup(std::map<std::string,TokenList * > & filedata)3233 void simplecpp::cleanup(std::map<std::string, TokenList*> &filedata)
3234 {
3235     for (std::map<std::string, TokenList*>::iterator it = filedata.begin(); it != filedata.end(); ++it)
3236         delete it->second;
3237     filedata.clear();
3238 }
3239