1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24
25 namespace Fortran::parser {
26
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)27 Definition::Definition(
28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)31 Definition::Definition(const std::vector<std::string> &argNames,
32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33 bool isVariadic)
34 : isFunctionLike_{true},
35 argumentCount_(argNames.size()), isVariadic_{isVariadic},
36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37
Definition(const std::string & predefined,AllSources & sources)38 Definition::Definition(const std::string &predefined, AllSources &sources)
39 : isPredefined_{true},
40 replacement_{
41 predefined, sources.AddCompilerInsertion(predefined).start()} {}
42
set_isDisabled(bool disable)43 bool Definition::set_isDisabled(bool disable) {
44 bool was{isDisabled_};
45 isDisabled_ = disable;
46 return was;
47 }
48
IsLegalIdentifierStart(const CharBlock & cpl)49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55 std::map<std::string, std::string> args;
56 char argIndex{'A'};
57 for (const std::string &arg : argNames) {
58 CHECK(args.find(arg) == args.end());
59 args[arg] = "~"s + argIndex++;
60 }
61 TokenSequence result;
62 for (std::size_t j{0}; j < tokens; ++j) {
63 CharBlock tok{token.TokenAt(firstToken + j)};
64 if (IsLegalIdentifierStart(tok)) {
65 auto it{args.find(tok.ToString())};
66 if (it != args.end()) {
67 result.Put(it->second, token.GetTokenProvenance(j));
68 continue;
69 }
70 }
71 result.Put(token, firstToken + j, 1);
72 }
73 return result;
74 }
75
Stringify(const TokenSequence & tokens,AllSources & allSources)76 static TokenSequence Stringify(
77 const TokenSequence &tokens, AllSources &allSources) {
78 TokenSequence result;
79 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80 result.PutNextTokenChar('"', quoteProvenance);
81 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82 const CharBlock &token{tokens.TokenAt(j)};
83 std::size_t bytes{token.size()};
84 for (std::size_t k{0}; k < bytes; ++k) {
85 char ch{token[k]};
86 Provenance from{tokens.GetTokenProvenance(j, k)};
87 if (ch == '"' || ch == '\\') {
88 result.PutNextTokenChar(ch, from);
89 }
90 result.PutNextTokenChar(ch, from);
91 }
92 }
93 result.PutNextTokenChar('"', quoteProvenance);
94 result.CloseToken();
95 return result;
96 }
97
IsTokenPasting(CharBlock opr)98 constexpr bool IsTokenPasting(CharBlock opr) {
99 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101
AnyTokenPasting(const TokenSequence & text)102 static bool AnyTokenPasting(const TokenSequence &text) {
103 std::size_t tokens{text.SizeInTokens()};
104 for (std::size_t j{0}; j < tokens; ++j) {
105 if (IsTokenPasting(text.TokenAt(j))) {
106 return true;
107 }
108 }
109 return false;
110 }
111
TokenPasting(TokenSequence && text)112 static TokenSequence TokenPasting(TokenSequence &&text) {
113 if (!AnyTokenPasting(text)) {
114 return std::move(text);
115 }
116 TokenSequence result;
117 std::size_t tokens{text.SizeInTokens()};
118 bool pasting{false};
119 for (std::size_t j{0}; j < tokens; ++j) {
120 if (IsTokenPasting(text.TokenAt(j))) {
121 if (!pasting) {
122 while (!result.empty() &&
123 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124 result.pop_back();
125 }
126 if (!result.empty()) {
127 result.ReopenLastToken();
128 pasting = true;
129 }
130 }
131 } else if (pasting && text.TokenAt(j).IsBlank()) {
132 } else {
133 result.Put(text, j, 1);
134 pasting = false;
135 }
136 }
137 return result;
138 }
139
Apply(const std::vector<TokenSequence> & args,Prescanner & prescanner)140 TokenSequence Definition::Apply(
141 const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142 TokenSequence result;
143 bool skipping{false};
144 int parenthesesNesting{0};
145 std::size_t tokens{replacement_.SizeInTokens()};
146 for (std::size_t j{0}; j < tokens; ++j) {
147 CharBlock token{replacement_.TokenAt(j)};
148 std::size_t bytes{token.size()};
149 if (skipping) {
150 if (bytes == 1) {
151 if (token[0] == '(') {
152 ++parenthesesNesting;
153 } else if (token[0] == ')') {
154 skipping = --parenthesesNesting > 0;
155 }
156 }
157 continue;
158 }
159 if (bytes == 2 && token[0] == '~') { // argument substitution
160 std::size_t index = token[1] - 'A';
161 if (index >= args.size()) {
162 continue;
163 }
164 std::size_t prev{j};
165 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166 --prev;
167 }
168 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169 replacement_.TokenAt(prev - 1)[0] ==
170 '#') { // stringify argument without macro replacement
171 std::size_t resultSize{result.SizeInTokens()};
172 while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
173 result.pop_back();
174 }
175 CHECK(resultSize > 0 &&
176 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
177 result.pop_back();
178 result.Put(Stringify(args[index], prescanner.allSources()));
179 } else {
180 const TokenSequence *arg{&args[index]};
181 std::optional<TokenSequence> replaced;
182 // Don't replace macros in the actual argument if it is preceded or
183 // followed by the token-pasting operator ## in the replacement text.
184 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
185 auto next{replacement_.SkipBlanks(j + 1)};
186 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
187 // Apply macro replacement to the actual argument
188 replaced =
189 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
190 if (replaced) {
191 arg = &*replaced;
192 }
193 }
194 }
195 result.Put(DEREF(arg));
196 }
197 } else if (bytes == 11 && isVariadic_ &&
198 token.ToString() == "__VA_ARGS__") {
199 Provenance commaProvenance{
200 prescanner.preprocessor().allSources().CompilerInsertionProvenance(
201 ',')};
202 for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
203 if (k > argumentCount_) {
204 result.Put(","s, commaProvenance);
205 }
206 result.Put(args[k]);
207 }
208 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
209 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
210 parenthesesNesting == 0) {
211 parenthesesNesting = 1;
212 skipping = args.size() == argumentCount_;
213 ++j;
214 } else {
215 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
216 ++parenthesesNesting;
217 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
218 if (--parenthesesNesting == 0) {
219 skipping = false;
220 continue;
221 }
222 }
223 result.Put(replacement_, j);
224 }
225 }
226 return TokenPasting(std::move(result));
227 }
228
FormatTime(const std::time_t & now,const char * format)229 static std::string FormatTime(const std::time_t &now, const char *format) {
230 char buffer[16];
231 return {buffer,
232 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
233 }
234
Preprocessor(AllSources & allSources)235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {}
236
DefineStandardMacros()237 void Preprocessor::DefineStandardMacros() {
238 // Capture current local date & time once now to avoid having the values
239 // of __DATE__ or __TIME__ change during compilation.
240 std::time_t now;
241 std::time(&now);
242 Define("__DATE__"s, FormatTime(now, "\"%h %e %Y\"")); // e.g., "Jun 16 1904"
243 Define("__TIME__"s, FormatTime(now, "\"%T\"")); // e.g., "23:59:60"
244 // The values of these predefined macros depend on their invocation sites.
245 Define("__FILE__"s, "__FILE__"s);
246 Define("__LINE__"s, "__LINE__"s);
247 }
248
Define(std::string macro,std::string value)249 void Preprocessor::Define(std::string macro, std::string value) {
250 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
251 }
252
Undefine(std::string macro)253 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
254
MacroReplacement(const TokenSequence & input,Prescanner & prescanner)255 std::optional<TokenSequence> Preprocessor::MacroReplacement(
256 const TokenSequence &input, Prescanner &prescanner) {
257 // Do quick scan for any use of a defined name.
258 if (definitions_.empty()) {
259 return std::nullopt;
260 }
261 std::size_t tokens{input.SizeInTokens()};
262 std::size_t j;
263 for (j = 0; j < tokens; ++j) {
264 CharBlock token{input.TokenAt(j)};
265 if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
266 IsNameDefined(token)) {
267 break;
268 }
269 }
270 if (j == tokens) {
271 return std::nullopt; // input contains nothing that would be replaced
272 }
273 TokenSequence result{input, 0, j};
274 for (; j < tokens; ++j) {
275 const CharBlock &token{input.TokenAt(j)};
276 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
277 result.Put(input, j);
278 continue;
279 }
280 auto it{definitions_.find(token)};
281 if (it == definitions_.end()) {
282 result.Put(input, j);
283 continue;
284 }
285 Definition &def{it->second};
286 if (def.isDisabled()) {
287 result.Put(input, j);
288 continue;
289 }
290 if (!def.isFunctionLike()) {
291 if (def.isPredefined()) {
292 std::string name{def.replacement().TokenAt(0).ToString()};
293 std::string repl;
294 if (name == "__FILE__") {
295 repl = "\""s +
296 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
297 } else if (name == "__LINE__") {
298 std::string buf;
299 llvm::raw_string_ostream ss{buf};
300 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
301 repl = ss.str();
302 }
303 if (!repl.empty()) {
304 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
305 ProvenanceRange call{allSources_.AddMacroCall(
306 insert, input.GetTokenProvenanceRange(j), repl)};
307 result.Put(repl, call.start());
308 continue;
309 }
310 }
311 def.set_isDisabled(true);
312 TokenSequence replaced{
313 TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
314 def.set_isDisabled(false);
315 if (!replaced.empty()) {
316 ProvenanceRange from{def.replacement().GetProvenanceRange()};
317 ProvenanceRange use{input.GetTokenProvenanceRange(j)};
318 ProvenanceRange newRange{
319 allSources_.AddMacroCall(from, use, replaced.ToString())};
320 result.Put(replaced, newRange);
321 }
322 continue;
323 }
324 // Possible function-like macro call. Skip spaces and newlines to see
325 // whether '(' is next.
326 std::size_t k{j};
327 bool leftParen{false};
328 while (++k < tokens) {
329 const CharBlock &lookAhead{input.TokenAt(k)};
330 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
331 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
332 break;
333 }
334 }
335 if (!leftParen) {
336 result.Put(input, j);
337 continue;
338 }
339 std::vector<std::size_t> argStart{++k};
340 for (int nesting{0}; k < tokens; ++k) {
341 CharBlock token{input.TokenAt(k)};
342 if (token.size() == 1) {
343 char ch{token[0]};
344 if (ch == '(') {
345 ++nesting;
346 } else if (ch == ')') {
347 if (nesting == 0) {
348 break;
349 }
350 --nesting;
351 } else if (ch == ',' && nesting == 0) {
352 argStart.push_back(k + 1);
353 }
354 }
355 }
356 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
357 // Subtle: () is zero arguments, not one empty argument,
358 // unless one argument was expected.
359 argStart.clear();
360 }
361 if (k >= tokens || argStart.size() < def.argumentCount() ||
362 (argStart.size() > def.argumentCount() && !def.isVariadic())) {
363 result.Put(input, j);
364 continue;
365 }
366 std::vector<TokenSequence> args;
367 for (std::size_t n{0}; n < argStart.size(); ++n) {
368 std::size_t at{argStart[n]};
369 std::size_t count{
370 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
371 args.emplace_back(TokenSequence(input, at, count));
372 }
373 def.set_isDisabled(true);
374 TokenSequence replaced{
375 ReplaceMacros(def.Apply(args, prescanner), prescanner)};
376 def.set_isDisabled(false);
377 if (!replaced.empty()) {
378 ProvenanceRange from{def.replacement().GetProvenanceRange()};
379 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
380 ProvenanceRange newRange{
381 allSources_.AddMacroCall(from, use, replaced.ToString())};
382 result.Put(replaced, newRange);
383 }
384 j = k; // advance to the terminal ')'
385 }
386 return result;
387 }
388
ReplaceMacros(const TokenSequence & tokens,Prescanner & prescanner)389 TokenSequence Preprocessor::ReplaceMacros(
390 const TokenSequence &tokens, Prescanner &prescanner) {
391 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
392 return std::move(*repl);
393 }
394 return tokens;
395 }
396
Directive(const TokenSequence & dir,Prescanner & prescanner)397 void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
398 std::size_t tokens{dir.SizeInTokens()};
399 std::size_t j{dir.SkipBlanks(0)};
400 if (j == tokens) {
401 return;
402 }
403 if (dir.TokenAt(j).ToString() != "#") {
404 prescanner.Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
405 return;
406 }
407 j = dir.SkipBlanks(j + 1);
408 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
409 --tokens;
410 }
411 if (j == tokens) {
412 return;
413 }
414 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
415 return; // treat like #line, ignore it
416 }
417 std::size_t dirOffset{j};
418 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
419 j = dir.SkipBlanks(j + 1);
420 CharBlock nameToken;
421 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
422 nameToken = dir.TokenAt(j);
423 }
424 if (dirName == "line") {
425 // #line is ignored
426 } else if (dirName == "define") {
427 if (nameToken.empty()) {
428 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
429 "#define: missing or invalid name"_err_en_US);
430 return;
431 }
432 nameToken = SaveTokenAsName(nameToken);
433 definitions_.erase(nameToken);
434 if (++j < tokens && dir.TokenAt(j).size() == 1 &&
435 dir.TokenAt(j)[0] == '(') {
436 j = dir.SkipBlanks(j + 1);
437 std::vector<std::string> argName;
438 bool isVariadic{false};
439 if (dir.TokenAt(j).ToString() != ")") {
440 while (true) {
441 std::string an{dir.TokenAt(j).ToString()};
442 if (an == "...") {
443 isVariadic = true;
444 } else {
445 if (an.empty() || !IsLegalIdentifierStart(an[0])) {
446 prescanner.Say(dir.GetTokenProvenanceRange(j),
447 "#define: missing or invalid argument name"_err_en_US);
448 return;
449 }
450 argName.push_back(an);
451 }
452 j = dir.SkipBlanks(j + 1);
453 if (j == tokens) {
454 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
455 "#define: malformed argument list"_err_en_US);
456 return;
457 }
458 std::string punc{dir.TokenAt(j).ToString()};
459 if (punc == ")") {
460 break;
461 }
462 if (isVariadic || punc != ",") {
463 prescanner.Say(dir.GetTokenProvenanceRange(j),
464 "#define: malformed argument list"_err_en_US);
465 return;
466 }
467 j = dir.SkipBlanks(j + 1);
468 if (j == tokens) {
469 prescanner.Say(dir.GetTokenProvenanceRange(tokens - 1),
470 "#define: malformed argument list"_err_en_US);
471 return;
472 }
473 }
474 if (std::set<std::string>(argName.begin(), argName.end()).size() !=
475 argName.size()) {
476 prescanner.Say(dir.GetTokenProvenance(dirOffset),
477 "#define: argument names are not distinct"_err_en_US);
478 return;
479 }
480 }
481 j = dir.SkipBlanks(j + 1);
482 definitions_.emplace(std::make_pair(
483 nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
484 } else {
485 j = dir.SkipBlanks(j + 1);
486 definitions_.emplace(
487 std::make_pair(nameToken, Definition{dir, j, tokens - j}));
488 }
489 } else if (dirName == "undef") {
490 if (nameToken.empty()) {
491 prescanner.Say(
492 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
493 "# missing or invalid name"_err_en_US);
494 } else {
495 if (dir.IsAnythingLeft(++j)) {
496 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
497 "#undef: excess tokens at end of directive"_en_US);
498 } else {
499 definitions_.erase(nameToken);
500 }
501 }
502 } else if (dirName == "ifdef" || dirName == "ifndef") {
503 bool doThen{false};
504 if (nameToken.empty()) {
505 prescanner.Say(
506 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
507 "#%s: missing name"_err_en_US, dirName);
508 } else {
509 if (dir.IsAnythingLeft(++j)) {
510 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
511 "#%s: excess tokens at end of directive"_en_US, dirName);
512 }
513 doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
514 }
515 if (doThen) {
516 ifStack_.push(CanDeadElseAppear::Yes);
517 } else {
518 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
519 dir.GetTokenProvenance(dirOffset));
520 }
521 } else if (dirName == "if") {
522 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
523 ifStack_.push(CanDeadElseAppear::Yes);
524 } else {
525 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
526 dir.GetTokenProvenanceRange(dirOffset));
527 }
528 } else if (dirName == "else") {
529 if (dir.IsAnythingLeft(j)) {
530 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
531 "#else: excess tokens at end of directive"_en_US);
532 } else if (ifStack_.empty()) {
533 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
534 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
535 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
536 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
537 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
538 } else {
539 ifStack_.pop();
540 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
541 dir.GetTokenProvenanceRange(dirOffset));
542 }
543 } else if (dirName == "elif") {
544 if (ifStack_.empty()) {
545 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
546 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
547 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
548 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
549 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
550 } else {
551 ifStack_.pop();
552 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
553 dir.GetTokenProvenanceRange(dirOffset));
554 }
555 } else if (dirName == "endif") {
556 if (dir.IsAnythingLeft(j)) {
557 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
558 "#endif: excess tokens at end of directive"_en_US);
559 } else if (ifStack_.empty()) {
560 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
561 "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
562 } else {
563 ifStack_.pop();
564 }
565 } else if (dirName == "error") {
566 prescanner.Say(
567 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
568 "%s"_err_en_US, dir.ToString());
569 } else if (dirName == "warning" || dirName == "comment" ||
570 dirName == "note") {
571 prescanner.Say(
572 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
573 "%s"_en_US, dir.ToString());
574 } else if (dirName == "include") {
575 if (j == tokens) {
576 prescanner.Say(
577 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
578 "#include: missing name of file to include"_err_en_US);
579 return;
580 }
581 std::string include;
582 std::optional<std::string> prependPath;
583 if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
584 std::size_t k{j + 1};
585 if (k >= tokens) {
586 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
587 "#include: file name missing"_err_en_US);
588 return;
589 }
590 while (k < tokens && dir.TokenAt(k) != ">") {
591 ++k;
592 }
593 if (k >= tokens) {
594 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
595 "#include: expected '>' at end of included file"_en_US);
596 }
597 TokenSequence braced{dir, j + 1, k - j - 1};
598 include = ReplaceMacros(braced, prescanner).ToString();
599 j = k;
600 } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
601 include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
602 include = include.substr(1, include.size() - 2);
603 // #include "foo" starts search in directory of file containing
604 // the directive
605 auto prov{dir.GetTokenProvenanceRange(dirOffset).start()};
606 if (const auto *currentFile{allSources_.GetSourceFile(prov)}) {
607 prependPath = DirectoryName(currentFile->path());
608 }
609 } else {
610 prescanner.Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
611 "#include: expected name of file to include"_err_en_US);
612 return;
613 }
614 if (include.empty()) {
615 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
616 "#include: empty include file name"_err_en_US);
617 return;
618 }
619 j = dir.SkipBlanks(j + 1);
620 if (j < tokens && dir.TokenAt(j).ToString() != "!") {
621 prescanner.Say(dir.GetIntervalProvenanceRange(j, tokens - j),
622 "#include: extra stuff ignored after file name"_en_US);
623 }
624 std::string buf;
625 llvm::raw_string_ostream error{buf};
626 const SourceFile *included{
627 allSources_.Open(include, error, std::move(prependPath))};
628 if (!included) {
629 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
630 "#include: %s"_err_en_US, error.str());
631 } else if (included->bytes() > 0) {
632 ProvenanceRange fileRange{
633 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
634 Prescanner{prescanner}
635 .set_encoding(included->encoding())
636 .Prescan(fileRange);
637 }
638 } else {
639 prescanner.Say(dir.GetTokenProvenanceRange(dirOffset),
640 "#%s: unknown or unimplemented directive"_err_en_US, dirName);
641 }
642 }
643
SaveTokenAsName(const CharBlock & t)644 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
645 names_.push_back(t.ToString());
646 return {names_.back().data(), names_.back().size()};
647 }
648
IsNameDefined(const CharBlock & token)649 bool Preprocessor::IsNameDefined(const CharBlock &token) {
650 return definitions_.find(token) != definitions_.end();
651 }
652
GetDirectiveName(const TokenSequence & line,std::size_t * rest)653 static std::string GetDirectiveName(
654 const TokenSequence &line, std::size_t *rest) {
655 std::size_t tokens{line.SizeInTokens()};
656 std::size_t j{line.SkipBlanks(0)};
657 if (j == tokens || line.TokenAt(j).ToString() != "#") {
658 *rest = tokens;
659 return "";
660 }
661 j = line.SkipBlanks(j + 1);
662 if (j == tokens) {
663 *rest = tokens;
664 return "";
665 }
666 *rest = line.SkipBlanks(j + 1);
667 return ToLowerCaseLetters(line.TokenAt(j).ToString());
668 }
669
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner & prescanner,ProvenanceRange provenanceRange)670 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
671 IsElseActive isElseActive, Prescanner &prescanner,
672 ProvenanceRange provenanceRange) {
673 int nesting{0};
674 while (!prescanner.IsAtEnd()) {
675 if (!prescanner.IsNextLinePreprocessorDirective()) {
676 prescanner.NextLine();
677 continue;
678 }
679 TokenSequence line{prescanner.TokenizePreprocessorDirective()};
680 std::size_t rest{0};
681 std::string dn{GetDirectiveName(line, &rest)};
682 if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
683 ++nesting;
684 } else if (dn == "endif") {
685 if (nesting-- == 0) {
686 return;
687 }
688 } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
689 if (dn == "else") {
690 ifStack_.push(CanDeadElseAppear::No);
691 return;
692 }
693 if (dn == "elif" &&
694 IsIfPredicateTrue(
695 line, rest, line.SizeInTokens() - rest, prescanner)) {
696 ifStack_.push(CanDeadElseAppear::Yes);
697 return;
698 }
699 }
700 }
701 prescanner.Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
702 }
703
704 // Precedence level codes used here to accommodate mixed Fortran and C:
705 // 15: parentheses and constants, logical !, bitwise ~
706 // 14: unary + and -
707 // 13: **
708 // 12: *, /, % (modulus)
709 // 11: + and -
710 // 10: << and >>
711 // 9: bitwise &
712 // 8: bitwise ^
713 // 7: bitwise |
714 // 6: relations (.EQ., ==, &c.)
715 // 5: .NOT.
716 // 4: .AND., &&
717 // 3: .OR., ||
718 // 2: .EQV. and .NEQV. / .XOR.
719 // 1: ? :
720 // 0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)721 static std::int64_t ExpressionValue(const TokenSequence &token,
722 int minimumPrecedence, std::size_t *atToken,
723 std::optional<Message> *error) {
724 enum Operator {
725 PARENS,
726 CONST,
727 NOTZERO, // !
728 COMPLEMENT, // ~
729 UPLUS,
730 UMINUS,
731 POWER,
732 TIMES,
733 DIVIDE,
734 MODULUS,
735 ADD,
736 SUBTRACT,
737 LEFTSHIFT,
738 RIGHTSHIFT,
739 BITAND,
740 BITXOR,
741 BITOR,
742 LT,
743 LE,
744 EQ,
745 NE,
746 GE,
747 GT,
748 NOT,
749 AND,
750 OR,
751 EQV,
752 NEQV,
753 SELECT,
754 COMMA
755 };
756 static const int precedence[]{
757 15, 15, 15, 15, // (), 6, !, ~
758 14, 14, // unary +, -
759 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
760 9, 8, 7, // &, ^, |
761 6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
762 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
763 1, 0 // ?: and ,
764 };
765 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
766 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
767
768 static std::map<std::string, enum Operator> opNameMap;
769 if (opNameMap.empty()) {
770 opNameMap["("] = PARENS;
771 opNameMap["!"] = NOTZERO;
772 opNameMap["~"] = COMPLEMENT;
773 opNameMap["**"] = POWER;
774 opNameMap["*"] = TIMES;
775 opNameMap["/"] = DIVIDE;
776 opNameMap["%"] = MODULUS;
777 opNameMap["+"] = ADD;
778 opNameMap["-"] = SUBTRACT;
779 opNameMap["<<"] = LEFTSHIFT;
780 opNameMap[">>"] = RIGHTSHIFT;
781 opNameMap["&"] = BITAND;
782 opNameMap["^"] = BITXOR;
783 opNameMap["|"] = BITOR;
784 opNameMap[".lt."] = opNameMap["<"] = LT;
785 opNameMap[".le."] = opNameMap["<="] = LE;
786 opNameMap[".eq."] = opNameMap["=="] = EQ;
787 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
788 opNameMap[".ge."] = opNameMap[">="] = GE;
789 opNameMap[".gt."] = opNameMap[">"] = GT;
790 opNameMap[".not."] = NOT;
791 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
792 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
793 opNameMap[".eqv."] = EQV;
794 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
795 opNameMap["?"] = SELECT;
796 opNameMap[","] = COMMA;
797 }
798
799 std::size_t tokens{token.SizeInTokens()};
800 CHECK(tokens > 0);
801 if (*atToken >= tokens) {
802 *error =
803 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
804 return 0;
805 }
806
807 // Parse and evaluate a primary or a unary operator and its operand.
808 std::size_t opAt{*atToken};
809 std::string t{token.TokenAt(opAt).ToString()};
810 enum Operator op;
811 std::int64_t left{0};
812 if (t == "(") {
813 op = PARENS;
814 } else if (IsDecimalDigit(t[0])) {
815 op = CONST;
816 std::size_t consumed{0};
817 left = std::stoll(t, &consumed, 0 /*base to be detected*/);
818 if (consumed < t.size()) {
819 *error = Message{token.GetTokenProvenanceRange(opAt),
820 "Uninterpretable numeric constant '%s'"_err_en_US, t};
821 return 0;
822 }
823 } else if (IsLegalIdentifierStart(t[0])) {
824 // undefined macro name -> zero
825 // TODO: BOZ constants?
826 op = CONST;
827 } else if (t == "+") {
828 op = UPLUS;
829 } else if (t == "-") {
830 op = UMINUS;
831 } else if (t == "." && *atToken + 2 < tokens &&
832 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
833 token.TokenAt(*atToken + 2).ToString() == ".") {
834 op = NOT;
835 *atToken += 2;
836 } else {
837 auto it{opNameMap.find(t)};
838 if (it != opNameMap.end()) {
839 op = it->second;
840 } else {
841 *error = Message{token.GetTokenProvenanceRange(opAt),
842 "operand expected in expression"_err_en_US};
843 return 0;
844 }
845 }
846 if (precedence[op] < minimumPrecedence) {
847 *error = Message{token.GetTokenProvenanceRange(opAt),
848 "operator precedence error"_err_en_US};
849 return 0;
850 }
851 ++*atToken;
852 if (op != CONST) {
853 left = ExpressionValue(token, operandPrecedence[op], atToken, error);
854 if (*error) {
855 return 0;
856 }
857 switch (op) {
858 case PARENS:
859 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
860 ++*atToken;
861 break;
862 }
863 if (*atToken >= tokens) {
864 *error = Message{token.GetProvenanceRange(),
865 "')' missing from expression"_err_en_US};
866 } else {
867 *error = Message{
868 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
869 }
870 return 0;
871 case NOTZERO:
872 left = !left;
873 break;
874 case COMPLEMENT:
875 left = ~left;
876 break;
877 case UPLUS:
878 break;
879 case UMINUS:
880 left = -left;
881 break;
882 case NOT:
883 left = -!left;
884 break;
885 default:
886 CRASH_NO_CASE;
887 }
888 }
889
890 // Parse and evaluate binary operators and their second operands, if present.
891 while (*atToken < tokens) {
892 int advance{1};
893 t = token.TokenAt(*atToken).ToString();
894 if (t == "." && *atToken + 2 < tokens &&
895 token.TokenAt(*atToken + 2).ToString() == ".") {
896 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
897 advance = 3;
898 }
899 auto it{opNameMap.find(t)};
900 if (it == opNameMap.end()) {
901 break;
902 }
903 op = it->second;
904 if (op < POWER || precedence[op] < minimumPrecedence) {
905 break;
906 }
907 opAt = *atToken;
908 *atToken += advance;
909
910 std::int64_t right{
911 ExpressionValue(token, operandPrecedence[op], atToken, error)};
912 if (*error) {
913 return 0;
914 }
915
916 switch (op) {
917 case POWER:
918 if (left == 0) {
919 if (right < 0) {
920 *error = Message{token.GetTokenProvenanceRange(opAt),
921 "0 ** negative power"_err_en_US};
922 }
923 } else if (left != 1 && right != 1) {
924 if (right <= 0) {
925 left = !right;
926 } else {
927 std::int64_t power{1};
928 for (; right > 0; --right) {
929 if ((power * left) / left != power) {
930 *error = Message{token.GetTokenProvenanceRange(opAt),
931 "overflow in exponentation"_err_en_US};
932 left = 1;
933 }
934 power *= left;
935 }
936 left = power;
937 }
938 }
939 break;
940 case TIMES:
941 if (left != 0 && right != 0 && ((left * right) / left) != right) {
942 *error = Message{token.GetTokenProvenanceRange(opAt),
943 "overflow in multiplication"_err_en_US};
944 }
945 left = left * right;
946 break;
947 case DIVIDE:
948 if (right == 0) {
949 *error = Message{
950 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
951 left = 0;
952 } else {
953 left = left / right;
954 }
955 break;
956 case MODULUS:
957 if (right == 0) {
958 *error = Message{
959 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
960 left = 0;
961 } else {
962 left = left % right;
963 }
964 break;
965 case ADD:
966 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
967 *error = Message{token.GetTokenProvenanceRange(opAt),
968 "overflow in addition"_err_en_US};
969 }
970 left = left + right;
971 break;
972 case SUBTRACT:
973 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
974 *error = Message{token.GetTokenProvenanceRange(opAt),
975 "overflow in subtraction"_err_en_US};
976 }
977 left = left - right;
978 break;
979 case LEFTSHIFT:
980 if (right < 0 || right > 64) {
981 *error = Message{token.GetTokenProvenanceRange(opAt),
982 "bad left shift count"_err_en_US};
983 }
984 left = right >= 64 ? 0 : left << right;
985 break;
986 case RIGHTSHIFT:
987 if (right < 0 || right > 64) {
988 *error = Message{token.GetTokenProvenanceRange(opAt),
989 "bad right shift count"_err_en_US};
990 }
991 left = right >= 64 ? 0 : left >> right;
992 break;
993 case BITAND:
994 case AND:
995 left = left & right;
996 break;
997 case BITXOR:
998 left = left ^ right;
999 break;
1000 case BITOR:
1001 case OR:
1002 left = left | right;
1003 break;
1004 case LT:
1005 left = -(left < right);
1006 break;
1007 case LE:
1008 left = -(left <= right);
1009 break;
1010 case EQ:
1011 left = -(left == right);
1012 break;
1013 case NE:
1014 left = -(left != right);
1015 break;
1016 case GE:
1017 left = -(left >= right);
1018 break;
1019 case GT:
1020 left = -(left > right);
1021 break;
1022 case EQV:
1023 left = -(!left == !right);
1024 break;
1025 case NEQV:
1026 left = -(!left != !right);
1027 break;
1028 case SELECT:
1029 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1030 *error = Message{token.GetTokenProvenanceRange(opAt),
1031 "':' required in selection expression"_err_en_US};
1032 return 0;
1033 } else {
1034 ++*atToken;
1035 std::int64_t third{
1036 ExpressionValue(token, operandPrecedence[op], atToken, error)};
1037 left = left != 0 ? right : third;
1038 }
1039 break;
1040 case COMMA:
1041 left = right;
1042 break;
1043 default:
1044 CRASH_NO_CASE;
1045 }
1046 }
1047 return left;
1048 }
1049
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner & prescanner)1050 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1051 std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
1052 TokenSequence expr1{expr, first, exprTokens};
1053 if (expr1.HasBlanks()) {
1054 expr1.RemoveBlanks();
1055 }
1056 TokenSequence expr2;
1057 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1058 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1059 CharBlock name;
1060 if (j + 3 < expr1.SizeInTokens() &&
1061 expr1.TokenAt(j + 1).ToString() == "(" &&
1062 expr1.TokenAt(j + 3).ToString() == ")") {
1063 name = expr1.TokenAt(j + 2);
1064 j += 3;
1065 } else if (j + 1 < expr1.SizeInTokens() &&
1066 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1067 name = expr1.TokenAt(++j);
1068 }
1069 if (!name.empty()) {
1070 char truth{IsNameDefined(name) ? '1' : '0'};
1071 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1072 continue;
1073 }
1074 }
1075 expr2.Put(expr1, j);
1076 }
1077 TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
1078 if (expr3.HasBlanks()) {
1079 expr3.RemoveBlanks();
1080 }
1081 if (expr3.empty()) {
1082 prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1083 return false;
1084 }
1085 std::size_t atToken{0};
1086 std::optional<Message> error;
1087 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1088 if (error) {
1089 prescanner.Say(std::move(*error));
1090 } else if (atToken < expr3.SizeInTokens() &&
1091 expr3.TokenAt(atToken).ToString() != "!") {
1092 prescanner.Say(expr3.GetIntervalProvenanceRange(
1093 atToken, expr3.SizeInTokens() - atToken),
1094 atToken == 0 ? "could not parse any expression"_err_en_US
1095 : "excess characters after expression"_err_en_US);
1096 }
1097 return result;
1098 }
1099 } // namespace Fortran::parser
1100