1 // Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "preprocessor.h"
16 #include "characters.h"
17 #include "message.h"
18 #include "prescan.h"
19 #include "../common/idioms.h"
20 #include <algorithm>
21 #include <cinttypes>
22 #include <cstddef>
23 #include <ctime>
24 #include <map>
25 #include <memory>
26 #include <optional>
27 #include <set>
28 #include <sstream>
29 #include <utility>
30 
31 namespace Fortran::parser {
32 
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)33 Definition::Definition(
34     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
35   : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
36 
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)37 Definition::Definition(const std::vector<std::string> &argNames,
38     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
39     bool isVariadic)
40   : isFunctionLike_{true},
41     argumentCount_(argNames.size()), isVariadic_{isVariadic},
42     replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
43 
Definition(const std::string & predefined,AllSources & sources)44 Definition::Definition(const std::string &predefined, AllSources &sources)
45   : isPredefined_{true}, replacement_{predefined,
46                              sources.AddCompilerInsertion(predefined).start()} {
47 }
48 
set_isDisabled(bool disable)49 bool Definition::set_isDisabled(bool disable) {
50   bool was{isDisabled_};
51   isDisabled_ = disable;
52   return was;
53 }
54 
IsLegalIdentifierStart(const CharBlock & cpl)55 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
56   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
57 }
58 
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)59 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
60     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
61   std::map<std::string, std::string> args;
62   char argIndex{'A'};
63   for (const std::string &arg : argNames) {
64     CHECK(args.find(arg) == args.end());
65     args[arg] = "~"s + argIndex++;
66   }
67   TokenSequence result;
68   for (std::size_t j{0}; j < tokens; ++j) {
69     CharBlock tok{token.TokenAt(firstToken + j)};
70     if (IsLegalIdentifierStart(tok)) {
71       auto it{args.find(tok.ToString())};
72       if (it != args.end()) {
73         result.Put(it->second, token.GetTokenProvenance(j));
74         continue;
75       }
76     }
77     result.Put(token, firstToken + j, 1);
78   }
79   return result;
80 }
81 
AfterLastNonBlank(const TokenSequence & tokens)82 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
83   for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
84     if (!tokens.TokenAt(j - 1).IsBlank()) {
85       return j;
86     }
87   }
88   return 0;
89 }
90 
Stringify(const TokenSequence & tokens,AllSources & allSources)91 static TokenSequence Stringify(
92     const TokenSequence &tokens, AllSources &allSources) {
93   TokenSequence result;
94   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
95   result.PutNextTokenChar('"', quoteProvenance);
96   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
97     const CharBlock &token{tokens.TokenAt(j)};
98     std::size_t bytes{token.size()};
99     for (std::size_t k{0}; k < bytes; ++k) {
100       char ch{token[k]};
101       Provenance from{tokens.GetTokenProvenance(j, k)};
102       if (ch == '"' || ch == '\\') {
103         result.PutNextTokenChar(ch, from);
104       }
105       result.PutNextTokenChar(ch, from);
106     }
107   }
108   result.PutNextTokenChar('"', quoteProvenance);
109   result.CloseToken();
110   return result;
111 }
112 
Apply(const std::vector<TokenSequence> & args,AllSources & allSources)113 TokenSequence Definition::Apply(
114     const std::vector<TokenSequence> &args, AllSources &allSources) {
115   TokenSequence result;
116   bool pasting{false};
117   bool skipping{false};
118   int parenthesesNesting{0};
119   std::size_t tokens{replacement_.SizeInTokens()};
120   for (std::size_t j{0}; j < tokens; ++j) {
121     const CharBlock &token{replacement_.TokenAt(j)};
122     std::size_t bytes{token.size()};
123     if (skipping) {
124       if (bytes == 1) {
125         if (token[0] == '(') {
126           ++parenthesesNesting;
127         } else if (token[0] == ')') {
128           skipping = --parenthesesNesting > 0;
129         }
130       }
131       continue;
132     }
133     if (bytes == 2 && token[0] == '~') {
134       std::size_t index = token[1] - 'A';
135       if (index >= args.size()) {
136         continue;
137       }
138       std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
139       if (afterLastNonBlank > 0 &&
140           result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
141         // stringifying
142         while (result.SizeInTokens() >= afterLastNonBlank) {
143           result.pop_back();
144         }
145         result.Put(Stringify(args[index], allSources));
146       } else {
147         std::size_t argTokens{args[index].SizeInTokens()};
148         for (std::size_t k{0}; k < argTokens; ++k) {
149           if (!pasting || !args[index].TokenAt(k).IsBlank()) {
150             result.Put(args[index], k);
151             pasting = false;
152           }
153         }
154       }
155     } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
156       // Token pasting operator in body (not expanded argument); discard any
157       // immediately preceding white space, then reopen the last token.
158       while (!result.empty() &&
159           result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
160         result.pop_back();
161       }
162       if (!result.empty()) {
163         result.ReopenLastToken();
164         pasting = true;
165       }
166     } else if (pasting && token.IsBlank()) {
167       // Delete whitespace immediately following ## in the body.
168     } else if (bytes == 11 && isVariadic_ &&
169         token.ToString() == "__VA_ARGS__") {
170       Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
171       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
172         if (k > argumentCount_) {
173           result.Put(","s, commaProvenance);
174         }
175         result.Put(args[k]);
176       }
177     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
178         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
179         parenthesesNesting == 0) {
180       parenthesesNesting = 1;
181       skipping = args.size() == argumentCount_;
182       ++j;
183     } else {
184       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
185         ++parenthesesNesting;
186       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
187         if (--parenthesesNesting == 0) {
188           skipping = false;
189           continue;
190         }
191       }
192       result.Put(replacement_, j);
193     }
194   }
195   return result;
196 }
197 
FormatTime(const std::time_t & now,const char * format)198 static std::string FormatTime(const std::time_t &now, const char *format) {
199   char buffer[16];
200   return {buffer,
201       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
202 }
203 
Preprocessor(AllSources & allSources)204 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
205   // Capture current local date & time once now to avoid having the values
206   // of __DATE__ or __TIME__ change during compilation.
207   std::time_t now;
208   std::time(&now);
209   definitions_.emplace(SaveTokenAsName("__DATE__"s),  // e.g., "Jun 16 1904"
210       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
211   definitions_.emplace(SaveTokenAsName("__TIME__"s),  // e.g., "23:59:60"
212       Definition{FormatTime(now, "\"%T\""), allSources});
213   // The values of these predefined macros depend on their invocation sites.
214   definitions_.emplace(
215       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
216   definitions_.emplace(
217       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
218 }
219 
Define(std::string macro,std::string value)220 void Preprocessor::Define(std::string macro, std::string value) {
221   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
222 }
223 
Undefine(std::string macro)224 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
225 
MacroReplacement(const TokenSequence & input,const Prescanner & prescanner)226 std::optional<TokenSequence> Preprocessor::MacroReplacement(
227     const TokenSequence &input, const Prescanner &prescanner) {
228   // Do quick scan for any use of a defined name.
229   std::size_t tokens{input.SizeInTokens()};
230   std::size_t j;
231   for (j = 0; j < tokens; ++j) {
232     CharBlock token{input.TokenAt(j)};
233     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
234         IsNameDefined(token)) {
235       break;
236     }
237   }
238   if (j == tokens) {
239     return std::nullopt;  // input contains nothing that would be replaced
240   }
241   TokenSequence result{input, 0, j};
242   for (; j < tokens; ++j) {
243     const CharBlock &token{input.TokenAt(j)};
244     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
245       result.Put(input, j);
246       continue;
247     }
248     auto it{definitions_.find(token)};
249     if (it == definitions_.end()) {
250       result.Put(input, j);
251       continue;
252     }
253     Definition &def{it->second};
254     if (def.isDisabled()) {
255       result.Put(input, j);
256       continue;
257     }
258     if (!def.isFunctionLike()) {
259       if (def.isPredefined()) {
260         std::string name{def.replacement().TokenAt(0).ToString()};
261         std::string repl;
262         if (name == "__FILE__") {
263           repl = "\""s +
264               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
265         } else if (name == "__LINE__") {
266           std::stringstream ss;
267           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
268           repl = ss.str();
269         }
270         if (!repl.empty()) {
271           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
272           ProvenanceRange call{allSources_.AddMacroCall(
273               insert, input.GetTokenProvenanceRange(j), repl)};
274           result.Put(repl, call.start());
275           continue;
276         }
277       }
278       def.set_isDisabled(true);
279       TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
280       def.set_isDisabled(false);
281       if (!replaced.empty()) {
282         ProvenanceRange from{def.replacement().GetProvenanceRange()};
283         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
284         ProvenanceRange newRange{
285             allSources_.AddMacroCall(from, use, replaced.ToString())};
286         result.Put(replaced, newRange);
287       }
288       continue;
289     }
290     // Possible function-like macro call.  Skip spaces and newlines to see
291     // whether '(' is next.
292     std::size_t k{j};
293     bool leftParen{false};
294     while (++k < tokens) {
295       const CharBlock &lookAhead{input.TokenAt(k)};
296       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
297         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
298         break;
299       }
300     }
301     if (!leftParen) {
302       result.Put(input, j);
303       continue;
304     }
305     std::vector<std::size_t> argStart{++k};
306     for (int nesting{0}; k < tokens; ++k) {
307       CharBlock token{input.TokenAt(k)};
308       if (token.size() == 1) {
309         char ch{token[0]};
310         if (ch == '(') {
311           ++nesting;
312         } else if (ch == ')') {
313           if (nesting == 0) {
314             break;
315           }
316           --nesting;
317         } else if (ch == ',' && nesting == 0) {
318           argStart.push_back(k + 1);
319         }
320       }
321     }
322     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
323       // Subtle: () is zero arguments, not one empty argument,
324       // unless one argument was expected.
325       argStart.clear();
326     }
327     if (k >= tokens || argStart.size() < def.argumentCount() ||
328         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
329       result.Put(input, j);
330       continue;
331     }
332     std::vector<TokenSequence> args;
333     for (std::size_t n{0}; n < argStart.size(); ++n) {
334       std::size_t at{argStart[n]};
335       std::size_t count{
336           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
337       args.emplace_back(TokenSequence(input, at, count));
338     }
339     def.set_isDisabled(true);
340     TokenSequence replaced{
341         ReplaceMacros(def.Apply(args, allSources_), prescanner)};
342     def.set_isDisabled(false);
343     if (!replaced.empty()) {
344       ProvenanceRange from{def.replacement().GetProvenanceRange()};
345       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
346       ProvenanceRange newRange{
347           allSources_.AddMacroCall(from, use, replaced.ToString())};
348       result.Put(replaced, newRange);
349     }
350     j = k;  // advance to the terminal ')'
351   }
352   return result;
353 }
354 
ReplaceMacros(const TokenSequence & tokens,const Prescanner & prescanner)355 TokenSequence Preprocessor::ReplaceMacros(
356     const TokenSequence &tokens, const Prescanner &prescanner) {
357   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
358     return std::move(*repl);
359   }
360   return tokens;
361 }
362 
Directive(const TokenSequence & dir,Prescanner * prescanner)363 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
364   std::size_t tokens{dir.SizeInTokens()};
365   std::size_t j{dir.SkipBlanks(0)};
366   if (j == tokens) {
367     return;
368   }
369   if (dir.TokenAt(j).ToString() != "#") {
370     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
371     return;
372   }
373   j = dir.SkipBlanks(j + 1);
374   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
375     --tokens;
376   }
377   if (j == tokens) {
378     return;
379   }
380   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
381     return;  // treat like #line, ignore it
382   }
383   std::size_t dirOffset{j};
384   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
385   j = dir.SkipBlanks(j + 1);
386   CharBlock nameToken;
387   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
388     nameToken = dir.TokenAt(j);
389   }
390   if (dirName == "line") {
391     // #line is ignored
392   } else if (dirName == "define") {
393     if (nameToken.empty()) {
394       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
395           "#define: missing or invalid name"_err_en_US);
396       return;
397     }
398     nameToken = SaveTokenAsName(nameToken);
399     definitions_.erase(nameToken);
400     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
401         dir.TokenAt(j)[0] == '(') {
402       j = dir.SkipBlanks(j + 1);
403       std::vector<std::string> argName;
404       bool isVariadic{false};
405       if (dir.TokenAt(j).ToString() != ")") {
406         while (true) {
407           std::string an{dir.TokenAt(j).ToString()};
408           if (an == "...") {
409             isVariadic = true;
410           } else {
411             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
412               prescanner->Say(dir.GetTokenProvenanceRange(j),
413                   "#define: missing or invalid argument name"_err_en_US);
414               return;
415             }
416             argName.push_back(an);
417           }
418           j = dir.SkipBlanks(j + 1);
419           if (j == tokens) {
420             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
421                 "#define: malformed argument list"_err_en_US);
422             return;
423           }
424           std::string punc{dir.TokenAt(j).ToString()};
425           if (punc == ")") {
426             break;
427           }
428           if (isVariadic || punc != ",") {
429             prescanner->Say(dir.GetTokenProvenanceRange(j),
430                 "#define: malformed argument list"_err_en_US);
431             return;
432           }
433           j = dir.SkipBlanks(j + 1);
434           if (j == tokens) {
435             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
436                 "#define: malformed argument list"_err_en_US);
437             return;
438           }
439         }
440         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
441             argName.size()) {
442           prescanner->Say(dir.GetTokenProvenance(dirOffset),
443               "#define: argument names are not distinct"_err_en_US);
444           return;
445         }
446       }
447       j = dir.SkipBlanks(j + 1);
448       definitions_.emplace(std::make_pair(
449           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
450     } else {
451       j = dir.SkipBlanks(j + 1);
452       definitions_.emplace(
453           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
454     }
455   } else if (dirName == "undef") {
456     if (nameToken.empty()) {
457       prescanner->Say(
458           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
459           "# missing or invalid name"_err_en_US);
460     } else {
461       j = dir.SkipBlanks(j + 1);
462       if (j != tokens) {
463         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
464             "#undef: excess tokens at end of directive"_err_en_US);
465       } else {
466         definitions_.erase(nameToken);
467       }
468     }
469   } else if (dirName == "ifdef" || dirName == "ifndef") {
470     bool doThen{false};
471     if (nameToken.empty()) {
472       prescanner->Say(
473           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
474           "#%s: missing name"_err_en_US, dirName);
475     } else {
476       j = dir.SkipBlanks(j + 1);
477       if (j != tokens) {
478         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
479             "#%s: excess tokens at end of directive"_en_US, dirName);
480       }
481       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
482     }
483     if (doThen) {
484       ifStack_.push(CanDeadElseAppear::Yes);
485     } else {
486       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
487           dir.GetTokenProvenance(dirOffset));
488     }
489   } else if (dirName == "if") {
490     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
491       ifStack_.push(CanDeadElseAppear::Yes);
492     } else {
493       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
494           dir.GetTokenProvenanceRange(dirOffset));
495     }
496   } else if (dirName == "else") {
497     if (j != tokens) {
498       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
499           "#else: excess tokens at end of directive"_err_en_US);
500     } else if (ifStack_.empty()) {
501       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
502           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
503     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
504       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
505           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
506     } else {
507       ifStack_.pop();
508       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
509           dir.GetTokenProvenanceRange(dirOffset));
510     }
511   } else if (dirName == "elif") {
512     if (ifStack_.empty()) {
513       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
514           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
515     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
516       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
517           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
518     } else {
519       ifStack_.pop();
520       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
521           dir.GetTokenProvenanceRange(dirOffset));
522     }
523   } else if (dirName == "endif") {
524     if (j != tokens) {
525       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
526           "#endif: excess tokens at end of directive"_err_en_US);
527     } else if (ifStack_.empty()) {
528       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
529           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
530     } else {
531       ifStack_.pop();
532     }
533   } else if (dirName == "error") {
534     prescanner->Say(
535         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
536         "%s"_err_en_US, dir.ToString());
537   } else if (dirName == "warning" || dirName == "comment" ||
538       dirName == "note") {
539     prescanner->Say(
540         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
541         "%s"_en_US, dir.ToString());
542   } else if (dirName == "include") {
543     if (j == tokens) {
544       prescanner->Say(
545           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
546           "#include: missing name of file to include"_err_en_US);
547       return;
548     }
549     std::string include;
550     if (dir.TokenAt(j).ToString() == "<") {
551       std::size_t k{j + 1};
552       if (k >= tokens) {
553         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
554             "#include: file name missing"_err_en_US);
555         return;
556       }
557       while (k < tokens && dir.TokenAt(k) != ">") {
558         ++k;
559       }
560       if (k >= tokens) {
561         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
562             "#include: expected '>' at end of included file"_en_US);
563       } else if (k + 1 < tokens) {
564         prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1),
565             "#include: extra stuff ignored after '>'"_en_US);
566       }
567       TokenSequence braced{dir, j + 1, k - j - 1};
568       include = ReplaceMacros(braced, *prescanner).ToString();
569     } else if (j + 1 == tokens &&
570         (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
571         include.substr(include.size() - 1, 1) == "\"") {
572       include = include.substr(1, include.size() - 2);
573     } else {
574       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
575           "#include: expected name of file to include"_err_en_US);
576       return;
577     }
578     if (include.empty()) {
579       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
580           "#include: empty include file name"_err_en_US);
581       return;
582     }
583     std::stringstream error;
584     const SourceFile *included{allSources_.Open(include, &error)};
585     if (included == nullptr) {
586       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
587           "#include: %s"_err_en_US, error.str());
588     } else if (included->bytes() > 0) {
589       ProvenanceRange fileRange{
590           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
591       Prescanner{*prescanner}
592           .set_encoding(included->encoding())
593           .Prescan(fileRange);
594     }
595   } else {
596     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
597         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
598   }
599 }
600 
SaveTokenAsName(const CharBlock & t)601 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
602   names_.push_back(t.ToString());
603   return {names_.back().data(), names_.back().size()};
604 }
605 
IsNameDefined(const CharBlock & token)606 bool Preprocessor::IsNameDefined(const CharBlock &token) {
607   return definitions_.find(token) != definitions_.end();
608 }
609 
GetDirectiveName(const TokenSequence & line,std::size_t * rest)610 static std::string GetDirectiveName(
611     const TokenSequence &line, std::size_t *rest) {
612   std::size_t tokens{line.SizeInTokens()};
613   std::size_t j{line.SkipBlanks(0)};
614   if (j == tokens || line.TokenAt(j).ToString() != "#") {
615     *rest = tokens;
616     return "";
617   }
618   j = line.SkipBlanks(j + 1);
619   if (j == tokens) {
620     *rest = tokens;
621     return "";
622   }
623   *rest = line.SkipBlanks(j + 1);
624   return ToLowerCaseLetters(line.TokenAt(j).ToString());
625 }
626 
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner * prescanner,ProvenanceRange provenanceRange)627 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
628     IsElseActive isElseActive, Prescanner *prescanner,
629     ProvenanceRange provenanceRange) {
630   int nesting{0};
631   while (!prescanner->IsAtEnd()) {
632     if (!prescanner->IsNextLinePreprocessorDirective()) {
633       prescanner->NextLine();
634       continue;
635     }
636     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
637     std::size_t rest{0};
638     std::string dn{GetDirectiveName(line, &rest)};
639     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
640       ++nesting;
641     } else if (dn == "endif") {
642       if (nesting-- == 0) {
643         return;
644       }
645     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
646       if (dn == "else") {
647         ifStack_.push(CanDeadElseAppear::No);
648         return;
649       }
650       if (dn == "elif" &&
651           IsIfPredicateTrue(
652               line, rest, line.SizeInTokens() - rest, prescanner)) {
653         ifStack_.push(CanDeadElseAppear::Yes);
654         return;
655       }
656     }
657   }
658   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
659 }
660 
661 // Precedence level codes used here to accommodate mixed Fortran and C:
662 // 15: parentheses and constants, logical !, bitwise ~
663 // 14: unary + and -
664 // 13: **
665 // 12: *, /, % (modulus)
666 // 11: + and -
667 // 10: << and >>
668 //  9: bitwise &
669 //  8: bitwise ^
670 //  7: bitwise |
671 //  6: relations (.EQ., ==, &c.)
672 //  5: .NOT.
673 //  4: .AND., &&
674 //  3: .OR., ||
675 //  2: .EQV. and .NEQV. / .XOR.
676 //  1: ? :
677 //  0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)678 static std::int64_t ExpressionValue(const TokenSequence &token,
679     int minimumPrecedence, std::size_t *atToken,
680     std::optional<Message> *error) {
681   enum Operator {
682     PARENS,
683     CONST,
684     NOTZERO,  // !
685     COMPLEMENT,  // ~
686     UPLUS,
687     UMINUS,
688     POWER,
689     TIMES,
690     DIVIDE,
691     MODULUS,
692     ADD,
693     SUBTRACT,
694     LEFTSHIFT,
695     RIGHTSHIFT,
696     BITAND,
697     BITXOR,
698     BITOR,
699     LT,
700     LE,
701     EQ,
702     NE,
703     GE,
704     GT,
705     NOT,
706     AND,
707     OR,
708     EQV,
709     NEQV,
710     SELECT,
711     COMMA
712   };
713   static const int precedence[]{
714       15, 15, 15, 15,  // (), 6, !, ~
715       14, 14,  // unary +, -
716       13, 12, 12, 12, 11, 11, 10, 10,  // **, *, /, %, +, -, <<, >>
717       9, 8, 7,  // &, ^, |
718       6, 6, 6, 6, 6, 6,  // relations .LT. to .GT.
719       5, 4, 3, 2, 2,  // .NOT., .AND., .OR., .EQV., .NEQV.
720       1, 0  // ?: and ,
721   };
722   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
723       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
724 
725   static std::map<std::string, enum Operator> opNameMap;
726   if (opNameMap.empty()) {
727     opNameMap["("] = PARENS;
728     opNameMap["!"] = NOTZERO;
729     opNameMap["~"] = COMPLEMENT;
730     opNameMap["**"] = POWER;
731     opNameMap["*"] = TIMES;
732     opNameMap["/"] = DIVIDE;
733     opNameMap["%"] = MODULUS;
734     opNameMap["+"] = ADD;
735     opNameMap["-"] = SUBTRACT;
736     opNameMap["<<"] = LEFTSHIFT;
737     opNameMap[">>"] = RIGHTSHIFT;
738     opNameMap["&"] = BITAND;
739     opNameMap["^"] = BITXOR;
740     opNameMap["|"] = BITOR;
741     opNameMap[".lt."] = opNameMap["<"] = LT;
742     opNameMap[".le."] = opNameMap["<="] = LE;
743     opNameMap[".eq."] = opNameMap["=="] = EQ;
744     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
745     opNameMap[".ge."] = opNameMap[">="] = GE;
746     opNameMap[".gt."] = opNameMap[">"] = GT;
747     opNameMap[".not."] = NOT;
748     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
749     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
750     opNameMap[".eqv."] = EQV;
751     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
752     opNameMap["?"] = SELECT;
753     opNameMap[","] = COMMA;
754   }
755 
756   std::size_t tokens{token.SizeInTokens()};
757   CHECK(tokens > 0);
758   if (*atToken >= tokens) {
759     *error =
760         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
761     return 0;
762   }
763 
764   // Parse and evaluate a primary or a unary operator and its operand.
765   std::size_t opAt{*atToken};
766   std::string t{token.TokenAt(opAt).ToString()};
767   enum Operator op;
768   std::int64_t left{0};
769   if (t == "(") {
770     op = PARENS;
771   } else if (IsDecimalDigit(t[0])) {
772     op = CONST;
773     std::size_t consumed{0};
774     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
775     if (consumed < t.size()) {
776       *error = Message{token.GetTokenProvenanceRange(opAt),
777           "Uninterpretable numeric constant '%s'"_err_en_US, t};
778       return 0;
779     }
780   } else if (IsLegalIdentifierStart(t[0])) {
781     // undefined macro name -> zero
782     // TODO: BOZ constants?
783     op = CONST;
784   } else if (t == "+") {
785     op = UPLUS;
786   } else if (t == "-") {
787     op = UMINUS;
788   } else if (t == "." && *atToken + 2 < tokens &&
789       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
790       token.TokenAt(*atToken + 2).ToString() == ".") {
791     op = NOT;
792     *atToken += 2;
793   } else {
794     auto it{opNameMap.find(t)};
795     if (it != opNameMap.end()) {
796       op = it->second;
797     } else {
798       *error = Message{token.GetTokenProvenanceRange(opAt),
799           "operand expected in expression"_err_en_US};
800       return 0;
801     }
802   }
803   if (precedence[op] < minimumPrecedence) {
804     *error = Message{token.GetTokenProvenanceRange(opAt),
805         "operator precedence error"_err_en_US};
806     return 0;
807   }
808   ++*atToken;
809   if (op != CONST) {
810     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
811     if (error->has_value()) {
812       return 0;
813     }
814     switch (op) {
815     case PARENS:
816       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
817         ++*atToken;
818         break;
819       }
820       if (*atToken >= tokens) {
821         *error = Message{token.GetProvenanceRange(),
822             "')' missing from expression"_err_en_US};
823       } else {
824         *error = Message{
825             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
826       }
827       return 0;
828     case NOTZERO: left = !left; break;
829     case COMPLEMENT: left = ~left; break;
830     case UPLUS: break;
831     case UMINUS: left = -left; break;
832     case NOT: left = -!left; break;
833     default: CRASH_NO_CASE;
834     }
835   }
836 
837   // Parse and evaluate binary operators and their second operands, if present.
838   while (*atToken < tokens) {
839     int advance{1};
840     t = token.TokenAt(*atToken).ToString();
841     if (t == "." && *atToken + 2 < tokens &&
842         token.TokenAt(*atToken + 2).ToString() == ".") {
843       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
844       advance = 3;
845     }
846     auto it{opNameMap.find(t)};
847     if (it == opNameMap.end()) {
848       break;
849     }
850     op = it->second;
851     if (op < POWER || precedence[op] < minimumPrecedence) {
852       break;
853     }
854     opAt = *atToken;
855     *atToken += advance;
856 
857     std::int64_t right{
858         ExpressionValue(token, operandPrecedence[op], atToken, error)};
859     if (error->has_value()) {
860       return 0;
861     }
862 
863     switch (op) {
864     case POWER:
865       if (left == 0) {
866         if (right < 0) {
867           *error = Message{token.GetTokenProvenanceRange(opAt),
868               "0 ** negative power"_err_en_US};
869         }
870       } else if (left != 1 && right != 1) {
871         if (right <= 0) {
872           left = !right;
873         } else {
874           std::int64_t power{1};
875           for (; right > 0; --right) {
876             if ((power * left) / left != power) {
877               *error = Message{token.GetTokenProvenanceRange(opAt),
878                   "overflow in exponentation"_err_en_US};
879               left = 1;
880             }
881             power *= left;
882           }
883           left = power;
884         }
885       }
886       break;
887     case TIMES:
888       if (left != 0 && right != 0 && ((left * right) / left) != right) {
889         *error = Message{token.GetTokenProvenanceRange(opAt),
890             "overflow in multiplication"_err_en_US};
891       }
892       left = left * right;
893       break;
894     case DIVIDE:
895       if (right == 0) {
896         *error = Message{
897             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
898         left = 0;
899       } else {
900         left = left / right;
901       }
902       break;
903     case MODULUS:
904       if (right == 0) {
905         *error = Message{
906             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
907         left = 0;
908       } else {
909         left = left % right;
910       }
911       break;
912     case ADD:
913       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
914         *error = Message{token.GetTokenProvenanceRange(opAt),
915             "overflow in addition"_err_en_US};
916       }
917       left = left + right;
918       break;
919     case SUBTRACT:
920       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
921         *error = Message{token.GetTokenProvenanceRange(opAt),
922             "overflow in subtraction"_err_en_US};
923       }
924       left = left - right;
925       break;
926     case LEFTSHIFT:
927       if (right < 0 || right > 64) {
928         *error = Message{token.GetTokenProvenanceRange(opAt),
929             "bad left shift count"_err_en_US};
930       }
931       left = right >= 64 ? 0 : left << right;
932       break;
933     case RIGHTSHIFT:
934       if (right < 0 || right > 64) {
935         *error = Message{token.GetTokenProvenanceRange(opAt),
936             "bad right shift count"_err_en_US};
937       }
938       left = right >= 64 ? 0 : left >> right;
939       break;
940     case BITAND:
941     case AND: left = left & right; break;
942     case BITXOR: left = left ^ right; break;
943     case BITOR:
944     case OR: left = left | right; break;
945     case LT: left = -(left < right); break;
946     case LE: left = -(left <= right); break;
947     case EQ: left = -(left == right); break;
948     case NE: left = -(left != right); break;
949     case GE: left = -(left >= right); break;
950     case GT: left = -(left > right); break;
951     case EQV: left = -(!left == !right); break;
952     case NEQV: left = -(!left != !right); break;
953     case SELECT:
954       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
955         *error = Message{token.GetTokenProvenanceRange(opAt),
956             "':' required in selection expression"_err_en_US};
957         return 0;
958       } else {
959         ++*atToken;
960         std::int64_t third{
961             ExpressionValue(token, operandPrecedence[op], atToken, error)};
962         left = left != 0 ? right : third;
963       }
964       break;
965     case COMMA: left = right; break;
966     default: CRASH_NO_CASE;
967     }
968   }
969   return left;
970 }
971 
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner * prescanner)972 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
973     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
974   TokenSequence expr1{expr, first, exprTokens};
975   if (expr1.HasBlanks()) {
976     expr1.RemoveBlanks();
977   }
978   TokenSequence expr2;
979   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
980     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
981       CharBlock name;
982       if (j + 3 < expr1.SizeInTokens() &&
983           expr1.TokenAt(j + 1).ToString() == "(" &&
984           expr1.TokenAt(j + 3).ToString() == ")") {
985         name = expr1.TokenAt(j + 2);
986         j += 3;
987       } else if (j + 1 < expr1.SizeInTokens() &&
988           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
989         name = expr1.TokenAt(j++);
990       }
991       if (!name.empty()) {
992         char truth{IsNameDefined(name) ? '1' : '0'};
993         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
994         continue;
995       }
996     }
997     expr2.Put(expr1, j);
998   }
999   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1000   if (expr3.HasBlanks()) {
1001     expr3.RemoveBlanks();
1002   }
1003   if (expr3.empty()) {
1004     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1005     return false;
1006   }
1007   std::size_t atToken{0};
1008   std::optional<Message> error;
1009   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1010   if (error.has_value()) {
1011     prescanner->Say(std::move(*error));
1012   } else if (atToken < expr3.SizeInTokens() &&
1013       expr3.TokenAt(atToken).ToString() != "!") {
1014     prescanner->Say(expr3.GetIntervalProvenanceRange(
1015                         atToken, expr3.SizeInTokens() - atToken),
1016         atToken == 0 ? "could not parse any expression"_err_en_US
1017                      : "excess characters after expression"_err_en_US);
1018   }
1019   return result;
1020 }
1021 }
1022