1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
Definition(const std::string & predefined,AllSources & sources)38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
set_isDisabled(bool disable)43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
IsLegalIdentifierStart(const CharBlock & cpl)49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
Stringify(const TokenSequence & tokens,AllSources & allSources)76 static TokenSequence Stringify(
77     const TokenSequence &tokens, AllSources &allSources) {
78   TokenSequence result;
79   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80   result.PutNextTokenChar('"', quoteProvenance);
81   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82     const CharBlock &token{tokens.TokenAt(j)};
83     std::size_t bytes{token.size()};
84     for (std::size_t k{0}; k < bytes; ++k) {
85       char ch{token[k]};
86       Provenance from{tokens.GetTokenProvenance(j, k)};
87       if (ch == '"' || ch == '\\') {
88         result.PutNextTokenChar(ch, from);
89       }
90       result.PutNextTokenChar(ch, from);
91     }
92   }
93   result.PutNextTokenChar('"', quoteProvenance);
94   result.CloseToken();
95   return result;
96 }
97 
IsTokenPasting(CharBlock opr)98 constexpr bool IsTokenPasting(CharBlock opr) {
99   return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101 
AnyTokenPasting(const TokenSequence & text)102 static bool AnyTokenPasting(const TokenSequence &text) {
103   std::size_t tokens{text.SizeInTokens()};
104   for (std::size_t j{0}; j < tokens; ++j) {
105     if (IsTokenPasting(text.TokenAt(j))) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
TokenPasting(TokenSequence && text)112 static TokenSequence TokenPasting(TokenSequence &&text) {
113   if (!AnyTokenPasting(text)) {
114     return std::move(text);
115   }
116   TokenSequence result;
117   std::size_t tokens{text.SizeInTokens()};
118   bool pasting{false};
119   for (std::size_t j{0}; j < tokens; ++j) {
120     if (IsTokenPasting(text.TokenAt(j))) {
121       if (!pasting) {
122         while (!result.empty() &&
123             result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124           result.pop_back();
125         }
126         if (!result.empty()) {
127           result.ReopenLastToken();
128           pasting = true;
129         }
130       }
131     } else if (pasting && text.TokenAt(j).IsBlank()) {
132     } else {
133       result.Put(text, j, 1);
134       pasting = false;
135     }
136   }
137   return result;
138 }
139 
Apply(const std::vector<TokenSequence> & args,Prescanner & prescanner)140 TokenSequence Definition::Apply(
141     const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142   TokenSequence result;
143   bool skipping{false};
144   int parenthesesNesting{0};
145   std::size_t tokens{replacement_.SizeInTokens()};
146   for (std::size_t j{0}; j < tokens; ++j) {
147     CharBlock token{replacement_.TokenAt(j)};
148     std::size_t bytes{token.size()};
149     if (skipping) {
150       if (bytes == 1) {
151         if (token[0] == '(') {
152           ++parenthesesNesting;
153         } else if (token[0] == ')') {
154           skipping = --parenthesesNesting > 0;
155         }
156       }
157       continue;
158     }
159     if (bytes == 2 && token[0] == '~') { // argument substitution
160       std::size_t index = token[1] - 'A';
161       if (index >= args.size()) {
162         continue;
163       }
164       std::size_t prev{j};
165       while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166         --prev;
167       }
168       if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169           replacement_.TokenAt(prev - 1)[0] ==
170               '#') { // stringify argument without macro replacement
171         std::size_t resultSize{result.SizeInTokens()};
172         while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
173           result.pop_back();
174         }
175         CHECK(resultSize > 0 &&
176             result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
177         result.pop_back();
178         result.Put(Stringify(args[index], prescanner.allSources()));
179       } else {
180         const TokenSequence *arg{&args[index]};
181         std::optional<TokenSequence> replaced;
182         // Don't replace macros in the actual argument if it is preceded or
183         // followed by the token-pasting operator ## in the replacement text.
184         if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
185           auto next{replacement_.SkipBlanks(j + 1)};
186           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
187             // Apply macro replacement to the actual argument
188             replaced =
189                 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
190             if (replaced) {
191               arg = &*replaced;
192             }
193           }
194         }
195         result.Put(DEREF(arg));
196       }
197     } else if (bytes == 11 && isVariadic_ &&
198         token.ToString() == "__VA_ARGS__") {
199       Provenance commaProvenance{
200           prescanner.preprocessor().allSources().CompilerInsertionProvenance(
201               ',')};
202       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
203         if (k > argumentCount_) {
204           result.Put(","s, commaProvenance);
205         }
206         result.Put(args[k]);
207       }
208     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
209         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
210         parenthesesNesting == 0) {
211       parenthesesNesting = 1;
212       skipping = args.size() == argumentCount_;
213       ++j;
214     } else {
215       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
216         ++parenthesesNesting;
217       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
218         if (--parenthesesNesting == 0) {
219           skipping = false;
220           continue;
221         }
222       }
223       result.Put(replacement_, j);
224     }
225   }
226   return TokenPasting(std::move(result));
227 }
228 
FormatTime(const std::time_t & now,const char * format)229 static std::string FormatTime(const std::time_t &now, const char *format) {
230   char buffer[16];
231   return {buffer,
232       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
233 }
234 
Preprocessor(AllSources & allSources)235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
236 
DefineStandardMacros()237 void Preprocessor::DefineStandardMacros() {
238   // Capture current local date & time once now to avoid having the values
239   // of __DATE__ or __TIME__ change during compilation.
240   std::time_t now;
241   std::time(&now);
242   Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
243   Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
244   // The values of these predefined macros depend on their invocation sites.
245   Define("__FILE__"s, "__FILE__"s);
246   Define("__LINE__"s, "__LINE__"s);
247 }
248 
Define(std::string macro,std::string value)249 void Preprocessor::Define(std::string macro, std::string value) {
250   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
251 }
252 
Undefine(std::string macro)253 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
254 
MacroReplacement(const TokenSequence & input,Prescanner & prescanner)255 std::optional<TokenSequence> Preprocessor::MacroReplacement(
256     const TokenSequence &input, Prescanner &prescanner) {
257   // Do quick scan for any use of a defined name.
258   if (definitions_.empty()) {
259     return std::nullopt;
260   }
261   std::size_t tokens{input.SizeInTokens()};
262   std::size_t j;
263   for (j = 0; j < tokens; ++j) {
264     CharBlock token{input.TokenAt(j)};
265     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
266         IsNameDefined(token)) {
267       break;
268     }
269   }
270   if (j == tokens) {
271     return std::nullopt; // input contains nothing that would be replaced
272   }
273   TokenSequence result{input, 0, j};
274   for (; j < tokens; ++j) {
275     const CharBlock &token{input.TokenAt(j)};
276     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
277       result.Put(input, j);
278       continue;
279     }
280     auto it{definitions_.find(token)};
281     if (it == definitions_.end()) {
282       result.Put(input, j);
283       continue;
284     }
285     Definition &def{it->second};
286     if (def.isDisabled()) {
287       result.Put(input, j);
288       continue;
289     }
290     if (!def.isFunctionLike()) {
291       if (def.isPredefined()) {
292         std::string name{def.replacement().TokenAt(0).ToString()};
293         std::string repl;
294         if (name == "__FILE__") {
295           repl = "\""s +
296               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
297         } else if (name == "__LINE__") {
298           std::string buf;
299           llvm::raw_string_ostream ss{buf};
300           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
301           repl = ss.str();
302         }
303         if (!repl.empty()) {
304           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
305           ProvenanceRange call{allSources_.AddMacroCall(
306               insert, input.GetTokenProvenanceRange(j), repl)};
307           result.Put(repl, call.start());
308           continue;
309         }
310       }
311       def.set_isDisabled(true);
312       TokenSequence replaced{
313           TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
314       def.set_isDisabled(false);
315       if (!replaced.empty()) {
316         ProvenanceRange from{def.replacement().GetProvenanceRange()};
317         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
318         ProvenanceRange newRange{
319             allSources_.AddMacroCall(from, use, replaced.ToString())};
320         result.Put(replaced, newRange);
321       }
322       continue;
323     }
324     // Possible function-like macro call.  Skip spaces and newlines to see
325     // whether '(' is next.
326     std::size_t k{j};
327     bool leftParen{false};
328     while (++k < tokens) {
329       const CharBlock &lookAhead{input.TokenAt(k)};
330       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
331         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
332         break;
333       }
334     }
335     if (!leftParen) {
336       result.Put(input, j);
337       continue;
338     }
339     std::vector<std::size_t> argStart{++k};
340     for (int nesting{0}; k < tokens; ++k) {
341       CharBlock token{input.TokenAt(k)};
342       if (token.size() == 1) {
343         char ch{token[0]};
344         if (ch == '(') {
345           ++nesting;
346         } else if (ch == ')') {
347           if (nesting == 0) {
348             break;
349           }
350           --nesting;
351         } else if (ch == ',' && nesting == 0) {
352           argStart.push_back(k + 1);
353         }
354       }
355     }
356     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
357       // Subtle: () is zero arguments, not one empty argument,
358       // unless one argument was expected.
359       argStart.clear();
360     }
361     if (k >= tokens || argStart.size() < def.argumentCount() ||
362         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
363       result.Put(input, j);
364       continue;
365     }
366     std::vector<TokenSequence> args;
367     for (std::size_t n{0}; n < argStart.size(); ++n) {
368       std::size_t at{argStart[n]};
369       std::size_t count{
370           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
371       args.emplace_back(TokenSequence(input, at, count));
372     }
373     def.set_isDisabled(true);
374     TokenSequence replaced{
375         ReplaceMacros(def.Apply(args, prescanner), prescanner)};
376     def.set_isDisabled(false);
377     if (!replaced.empty()) {
378       ProvenanceRange from{def.replacement().GetProvenanceRange()};
379       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
380       ProvenanceRange newRange{
381           allSources_.AddMacroCall(from, use, replaced.ToString())};
382       result.Put(replaced, newRange);
383     }
384     j = k; // advance to the terminal ')'
385   }
386   return result;
387 }
388 
ReplaceMacros(const TokenSequence & tokens,Prescanner & prescanner)389 TokenSequence Preprocessor::ReplaceMacros(
390     const TokenSequence &tokens, Prescanner &prescanner) {
391   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
392     return std::move(*repl);
393   }
394   return tokens;
395 }
396 
Directive(const TokenSequence & dir,Prescanner & prescanner)397 void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
398   std::size_t tokens{dir.SizeInTokens()};
399   std::size_t j{dir.SkipBlanks(0)};
400   if (j == tokens) {
401     return;
402   }
403   if (dir.TokenAt(j).ToString() != "#") {
404     prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
405     return;
406   }
407   j = dir.SkipBlanks(j + 1);
408   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
409     --tokens;
410   }
411   if (j == tokens) {
412     return;
413   }
414   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
415     return; // treat like #line, ignore it
416   }
417   std::size_t dirOffset{j};
418   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
419   j = dir.SkipBlanks(j + 1);
420   CharBlock nameToken;
421   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
422     nameToken = dir.TokenAt(j);
423   }
424   if (dirName == "line") {
425     // #line is ignored
426   } else if (dirName == "define") {
427     if (nameToken.empty()) {
428       prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
429           "#define: missing or invalid name"_err_en_US);
430       return;
431     }
432     nameToken = SaveTokenAsName(nameToken);
433     definitions_.erase(nameToken);
434     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
435         dir.TokenAt(j)[0] == '(') {
436       j = dir.SkipBlanks(j + 1);
437       std::vector<std::string> argName;
438       bool isVariadic{false};
439       if (dir.TokenAt(j).ToString() != ")") {
440         while (true) {
441           std::string an{dir.TokenAt(j).ToString()};
442           if (an == "...") {
443             isVariadic = true;
444           } else {
445             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
446               prescanner.Say(dir.GetTokenProvenanceRange(j),
447                   "#define: missing or invalid argument name"_err_en_US);
448               return;
449             }
450             argName.push_back(an);
451           }
452           j = dir.SkipBlanks(j + 1);
453           if (j == tokens) {
454             prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
455                 "#define: malformed argument list"_err_en_US);
456             return;
457           }
458           std::string punc{dir.TokenAt(j).ToString()};
459           if (punc == ")") {
460             break;
461           }
462           if (isVariadic || punc != ",") {
463             prescanner.Say(dir.GetTokenProvenanceRange(j),
464                 "#define: malformed argument list"_err_en_US);
465             return;
466           }
467           j = dir.SkipBlanks(j + 1);
468           if (j == tokens) {
469             prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
470                 "#define: malformed argument list"_err_en_US);
471             return;
472           }
473         }
474         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
475             argName.size()) {
476           prescanner.Say(dir.GetTokenProvenance(dirOffset),
477               "#define: argument names are not distinct"_err_en_US);
478           return;
479         }
480       }
481       j = dir.SkipBlanks(j + 1);
482       definitions_.emplace(std::make_pair(
483           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
484     } else {
485       j = dir.SkipBlanks(j + 1);
486       definitions_.emplace(
487           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
488     }
489   } else if (dirName == "undef") {
490     if (nameToken.empty()) {
491       prescanner.Say(
492           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
493           "# missing or invalid name"_err_en_US);
494     } else {
495       if (dir.IsAnythingLeft(++j)) {
496         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
497             "#undef: excess tokens at end of directive"_en_US);
498       } else {
499         definitions_.erase(nameToken);
500       }
501     }
502   } else if (dirName == "ifdef" || dirName == "ifndef") {
503     bool doThen{false};
504     if (nameToken.empty()) {
505       prescanner.Say(
506           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
507           "#%s: missing name"_err_en_US, dirName);
508     } else {
509       if (dir.IsAnythingLeft(++j)) {
510         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
511             "#%s: excess tokens at end of directive"_en_US, dirName);
512       }
513       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
514     }
515     if (doThen) {
516       ifStack_.push(CanDeadElseAppear::Yes);
517     } else {
518       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
519           dir.GetTokenProvenance(dirOffset));
520     }
521   } else if (dirName == "if") {
522     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
523       ifStack_.push(CanDeadElseAppear::Yes);
524     } else {
525       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
526           dir.GetTokenProvenanceRange(dirOffset));
527     }
528   } else if (dirName == "else") {
529     if (dir.IsAnythingLeft(j)) {
530       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
531           "#else: excess tokens at end of directive"_en_US);
532     } else if (ifStack_.empty()) {
533       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
534           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
535     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
536       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
537           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
538     } else {
539       ifStack_.pop();
540       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
541           dir.GetTokenProvenanceRange(dirOffset));
542     }
543   } else if (dirName == "elif") {
544     if (ifStack_.empty()) {
545       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
546           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
547     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
548       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
549           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
550     } else {
551       ifStack_.pop();
552       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
553           dir.GetTokenProvenanceRange(dirOffset));
554     }
555   } else if (dirName == "endif") {
556     if (dir.IsAnythingLeft(j)) {
557       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
558           "#endif: excess tokens at end of directive"_en_US);
559     } else if (ifStack_.empty()) {
560       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
561           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
562     } else {
563       ifStack_.pop();
564     }
565   } else if (dirName == "error") {
566     prescanner.Say(
567         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
568         "%s"_err_en_US, dir.ToString());
569   } else if (dirName == "warning" || dirName == "comment" ||
570       dirName == "note") {
571     prescanner.Say(
572         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
573         "%s"_en_US, dir.ToString());
574   } else if (dirName == "include") {
575     if (j == tokens) {
576       prescanner.Say(
577           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
578           "#include: missing name of file to include"_err_en_US);
579       return;
580     }
581     std::string include;
582     std::optional<std::string> prependPath;
583     if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
584       std::size_t k{j + 1};
585       if (k >= tokens) {
586         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
587             "#include: file name missing"_err_en_US);
588         return;
589       }
590       while (k < tokens && dir.TokenAt(k) != ">") {
591         ++k;
592       }
593       if (k >= tokens) {
594         prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
595             "#include: expected '>' at end of included file"_en_US);
596       }
597       TokenSequence braced{dir, j + 1, k - j - 1};
598       include = ReplaceMacros(braced, prescanner).ToString();
599       j = k;
600     } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
601         include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
602       include = include.substr(1, include.size() - 2);
603       // #include "foo" starts search in directory of file containing
604       // the directive
605       auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
606       if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
607         prependPath = DirectoryName(currentFile->path());
608       }
609     } else {
610       prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
611           "#include: expected name of file to include"_err_en_US);
612       return;
613     }
614     if (include.empty()) {
615       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
616           "#include: empty include file name"_err_en_US);
617       return;
618     }
619     j = dir.SkipBlanks(j + 1);
620     if (j < tokens && dir.TokenAt(j).ToString() != "!") {
621       prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
622           "#include: extra stuff ignored after file name"_en_US);
623     }
624     std::string buf;
625     llvm::raw_string_ostream error{buf};
626     const SourceFile *included{
627         allSources_.Open(include, error, std::move(prependPath))};
628     if (!included) {
629       prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
630           "#include: %s"_err_en_US, error.str());
631     } else if (included->bytes() > 0) {
632       ProvenanceRange fileRange{
633           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
634       Prescanner{prescanner}
635           .set_encoding(included->encoding())
636           .Prescan(fileRange);
637     }
638   } else {
639     prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
640         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
641   }
642 }
643 
SaveTokenAsName(const CharBlock & t)644 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
645   names_.push_back(t.ToString());
646   return {names_.back().data(), names_.back().size()};
647 }
648 
IsNameDefined(const CharBlock & token)649 bool Preprocessor::IsNameDefined(const CharBlock &token) {
650   return definitions_.find(token) != definitions_.end();
651 }
652 
GetDirectiveName(const TokenSequence & line,std::size_t * rest)653 static std::string GetDirectiveName(
654     const TokenSequence &line, std::size_t *rest) {
655   std::size_t tokens{line.SizeInTokens()};
656   std::size_t j{line.SkipBlanks(0)};
657   if (j == tokens || line.TokenAt(j).ToString() != "#") {
658     *rest = tokens;
659     return "";
660   }
661   j = line.SkipBlanks(j + 1);
662   if (j == tokens) {
663     *rest = tokens;
664     return "";
665   }
666   *rest = line.SkipBlanks(j + 1);
667   return ToLowerCaseLetters(line.TokenAt(j).ToString());
668 }
669 
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner & prescanner,ProvenanceRange provenanceRange)670 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
671     IsElseActive isElseActive, Prescanner &prescanner,
672     ProvenanceRange provenanceRange) {
673   int nesting{0};
674   while (!prescanner.IsAtEnd()) {
675     if (!prescanner.IsNextLinePreprocessorDirective()) {
676       prescanner.NextLine();
677       continue;
678     }
679     TokenSequence line{prescanner.TokenizePreprocessorDirective()};
680     std::size_t rest{0};
681     std::string dn{GetDirectiveName(line, &rest)};
682     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
683       ++nesting;
684     } else if (dn == "endif") {
685       if (nesting-- == 0) {
686         return;
687       }
688     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
689       if (dn == "else") {
690         ifStack_.push(CanDeadElseAppear::No);
691         return;
692       }
693       if (dn == "elif" &&
694           IsIfPredicateTrue(
695               line, rest, line.SizeInTokens() - rest, prescanner)) {
696         ifStack_.push(CanDeadElseAppear::Yes);
697         return;
698       }
699     }
700   }
701   prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
702 }
703 
704 // Precedence level codes used here to accommodate mixed Fortran and C:
705 // 15: parentheses and constants, logical !, bitwise ~
706 // 14: unary + and -
707 // 13: **
708 // 12: *, /, % (modulus)
709 // 11: + and -
710 // 10: << and >>
711 //  9: bitwise &
712 //  8: bitwise ^
713 //  7: bitwise |
714 //  6: relations (.EQ., ==, &c.)
715 //  5: .NOT.
716 //  4: .AND., &&
717 //  3: .OR., ||
718 //  2: .EQV. and .NEQV. / .XOR.
719 //  1: ? :
720 //  0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)721 static std::int64_t ExpressionValue(const TokenSequence &token,
722     int minimumPrecedence, std::size_t *atToken,
723     std::optional<Message> *error) {
724   enum Operator {
725     PARENS,
726     CONST,
727     NOTZERO, // !
728     COMPLEMENT, // ~
729     UPLUS,
730     UMINUS,
731     POWER,
732     TIMES,
733     DIVIDE,
734     MODULUS,
735     ADD,
736     SUBTRACT,
737     LEFTSHIFT,
738     RIGHTSHIFT,
739     BITAND,
740     BITXOR,
741     BITOR,
742     LT,
743     LE,
744     EQ,
745     NE,
746     GE,
747     GT,
748     NOT,
749     AND,
750     OR,
751     EQV,
752     NEQV,
753     SELECT,
754     COMMA
755   };
756   static const int precedence[]{
757       15, 15, 15, 15, // (), 6, !, ~
758       14, 14, // unary +, -
759       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
760       9, 8, 7, // &, ^, |
761       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
762       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
763       1, 0 // ?: and ,
764   };
765   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
766       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
767 
768   static std::map<std::string, enum Operator> opNameMap;
769   if (opNameMap.empty()) {
770     opNameMap["("] = PARENS;
771     opNameMap["!"] = NOTZERO;
772     opNameMap["~"] = COMPLEMENT;
773     opNameMap["**"] = POWER;
774     opNameMap["*"] = TIMES;
775     opNameMap["/"] = DIVIDE;
776     opNameMap["%"] = MODULUS;
777     opNameMap["+"] = ADD;
778     opNameMap["-"] = SUBTRACT;
779     opNameMap["<<"] = LEFTSHIFT;
780     opNameMap[">>"] = RIGHTSHIFT;
781     opNameMap["&"] = BITAND;
782     opNameMap["^"] = BITXOR;
783     opNameMap["|"] = BITOR;
784     opNameMap[".lt."] = opNameMap["<"] = LT;
785     opNameMap[".le."] = opNameMap["<="] = LE;
786     opNameMap[".eq."] = opNameMap["=="] = EQ;
787     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
788     opNameMap[".ge."] = opNameMap[">="] = GE;
789     opNameMap[".gt."] = opNameMap[">"] = GT;
790     opNameMap[".not."] = NOT;
791     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
792     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
793     opNameMap[".eqv."] = EQV;
794     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
795     opNameMap["?"] = SELECT;
796     opNameMap[","] = COMMA;
797   }
798 
799   std::size_t tokens{token.SizeInTokens()};
800   CHECK(tokens > 0);
801   if (*atToken >= tokens) {
802     *error =
803         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
804     return 0;
805   }
806 
807   // Parse and evaluate a primary or a unary operator and its operand.
808   std::size_t opAt{*atToken};
809   std::string t{token.TokenAt(opAt).ToString()};
810   enum Operator op;
811   std::int64_t left{0};
812   if (t == "(") {
813     op = PARENS;
814   } else if (IsDecimalDigit(t[0])) {
815     op = CONST;
816     std::size_t consumed{0};
817     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
818     if (consumed < t.size()) {
819       *error = Message{token.GetTokenProvenanceRange(opAt),
820           "Uninterpretable numeric constant '%s'"_err_en_US, t};
821       return 0;
822     }
823   } else if (IsLegalIdentifierStart(t[0])) {
824     // undefined macro name -> zero
825     // TODO: BOZ constants?
826     op = CONST;
827   } else if (t == "+") {
828     op = UPLUS;
829   } else if (t == "-") {
830     op = UMINUS;
831   } else if (t == "." && *atToken + 2 < tokens &&
832       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
833       token.TokenAt(*atToken + 2).ToString() == ".") {
834     op = NOT;
835     *atToken += 2;
836   } else {
837     auto it{opNameMap.find(t)};
838     if (it != opNameMap.end()) {
839       op = it->second;
840     } else {
841       *error = Message{token.GetTokenProvenanceRange(opAt),
842           "operand expected in expression"_err_en_US};
843       return 0;
844     }
845   }
846   if (precedence[op] < minimumPrecedence) {
847     *error = Message{token.GetTokenProvenanceRange(opAt),
848         "operator precedence error"_err_en_US};
849     return 0;
850   }
851   ++*atToken;
852   if (op != CONST) {
853     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
854     if (*error) {
855       return 0;
856     }
857     switch (op) {
858     case PARENS:
859       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
860         ++*atToken;
861         break;
862       }
863       if (*atToken >= tokens) {
864         *error = Message{token.GetProvenanceRange(),
865             "')' missing from expression"_err_en_US};
866       } else {
867         *error = Message{
868             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
869       }
870       return 0;
871     case NOTZERO:
872       left = !left;
873       break;
874     case COMPLEMENT:
875       left = ~left;
876       break;
877     case UPLUS:
878       break;
879     case UMINUS:
880       left = -left;
881       break;
882     case NOT:
883       left = -!left;
884       break;
885     default:
886       CRASH_NO_CASE;
887     }
888   }
889 
890   // Parse and evaluate binary operators and their second operands, if present.
891   while (*atToken < tokens) {
892     int advance{1};
893     t = token.TokenAt(*atToken).ToString();
894     if (t == "." && *atToken + 2 < tokens &&
895         token.TokenAt(*atToken + 2).ToString() == ".") {
896       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
897       advance = 3;
898     }
899     auto it{opNameMap.find(t)};
900     if (it == opNameMap.end()) {
901       break;
902     }
903     op = it->second;
904     if (op < POWER || precedence[op] < minimumPrecedence) {
905       break;
906     }
907     opAt = *atToken;
908     *atToken += advance;
909 
910     std::int64_t right{
911         ExpressionValue(token, operandPrecedence[op], atToken, error)};
912     if (*error) {
913       return 0;
914     }
915 
916     switch (op) {
917     case POWER:
918       if (left == 0) {
919         if (right < 0) {
920           *error = Message{token.GetTokenProvenanceRange(opAt),
921               "0 ** negative power"_err_en_US};
922         }
923       } else if (left != 1 && right != 1) {
924         if (right <= 0) {
925           left = !right;
926         } else {
927           std::int64_t power{1};
928           for (; right > 0; --right) {
929             if ((power * left) / left != power) {
930               *error = Message{token.GetTokenProvenanceRange(opAt),
931                   "overflow in exponentation"_err_en_US};
932               left = 1;
933             }
934             power *= left;
935           }
936           left = power;
937         }
938       }
939       break;
940     case TIMES:
941       if (left != 0 && right != 0 && ((left * right) / left) != right) {
942         *error = Message{token.GetTokenProvenanceRange(opAt),
943             "overflow in multiplication"_err_en_US};
944       }
945       left = left * right;
946       break;
947     case DIVIDE:
948       if (right == 0) {
949         *error = Message{
950             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
951         left = 0;
952       } else {
953         left = left / right;
954       }
955       break;
956     case MODULUS:
957       if (right == 0) {
958         *error = Message{
959             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
960         left = 0;
961       } else {
962         left = left % right;
963       }
964       break;
965     case ADD:
966       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
967         *error = Message{token.GetTokenProvenanceRange(opAt),
968             "overflow in addition"_err_en_US};
969       }
970       left = left + right;
971       break;
972     case SUBTRACT:
973       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
974         *error = Message{token.GetTokenProvenanceRange(opAt),
975             "overflow in subtraction"_err_en_US};
976       }
977       left = left - right;
978       break;
979     case LEFTSHIFT:
980       if (right < 0 || right > 64) {
981         *error = Message{token.GetTokenProvenanceRange(opAt),
982             "bad left shift count"_err_en_US};
983       }
984       left = right >= 64 ? 0 : left << right;
985       break;
986     case RIGHTSHIFT:
987       if (right < 0 || right > 64) {
988         *error = Message{token.GetTokenProvenanceRange(opAt),
989             "bad right shift count"_err_en_US};
990       }
991       left = right >= 64 ? 0 : left >> right;
992       break;
993     case BITAND:
994     case AND:
995       left = left & right;
996       break;
997     case BITXOR:
998       left = left ^ right;
999       break;
1000     case BITOR:
1001     case OR:
1002       left = left | right;
1003       break;
1004     case LT:
1005       left = -(left < right);
1006       break;
1007     case LE:
1008       left = -(left <= right);
1009       break;
1010     case EQ:
1011       left = -(left == right);
1012       break;
1013     case NE:
1014       left = -(left != right);
1015       break;
1016     case GE:
1017       left = -(left >= right);
1018       break;
1019     case GT:
1020       left = -(left > right);
1021       break;
1022     case EQV:
1023       left = -(!left == !right);
1024       break;
1025     case NEQV:
1026       left = -(!left != !right);
1027       break;
1028     case SELECT:
1029       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1030         *error = Message{token.GetTokenProvenanceRange(opAt),
1031             "':' required in selection expression"_err_en_US};
1032         return 0;
1033       } else {
1034         ++*atToken;
1035         std::int64_t third{
1036             ExpressionValue(token, operandPrecedence[op], atToken, error)};
1037         left = left != 0 ? right : third;
1038       }
1039       break;
1040     case COMMA:
1041       left = right;
1042       break;
1043     default:
1044       CRASH_NO_CASE;
1045     }
1046   }
1047   return left;
1048 }
1049 
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner & prescanner)1050 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1051     std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
1052   TokenSequence expr1{expr, first, exprTokens};
1053   if (expr1.HasBlanks()) {
1054     expr1.RemoveBlanks();
1055   }
1056   TokenSequence expr2;
1057   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1058     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1059       CharBlock name;
1060       if (j + 3 < expr1.SizeInTokens() &&
1061           expr1.TokenAt(j + 1).ToString() == "(" &&
1062           expr1.TokenAt(j + 3).ToString() == ")") {
1063         name = expr1.TokenAt(j + 2);
1064         j += 3;
1065       } else if (j + 1 < expr1.SizeInTokens() &&
1066           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1067         name = expr1.TokenAt(++j);
1068       }
1069       if (!name.empty()) {
1070         char truth{IsNameDefined(name) ? '1' : '0'};
1071         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1072         continue;
1073       }
1074     }
1075     expr2.Put(expr1, j);
1076   }
1077   TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
1078   if (expr3.HasBlanks()) {
1079     expr3.RemoveBlanks();
1080   }
1081   if (expr3.empty()) {
1082     prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1083     return false;
1084   }
1085   std::size_t atToken{0};
1086   std::optional<Message> error;
1087   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1088   if (error) {
1089     prescanner.Say(std::move(*error));
1090   } else if (atToken < expr3.SizeInTokens() &&
1091       expr3.TokenAt(atToken).ToString() != "!") {
1092     prescanner.Say(expr3.GetIntervalProvenanceRange(
1093                        atToken, expr3.SizeInTokens() - atToken),
1094         atToken == 0 ? "could not parse any expression"_err_en_US
1095                      : "excess characters after expression"_err_en_US);
1096   }
1097   return result;
1098 }
1099 } // namespace Fortran::parser
1100