1 // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "preprocessor.h"
16 #include "characters.h"
17 #include "message.h"
18 #include "prescan.h"
19 #include "../common/idioms.h"
20 #include <algorithm>
21 #include <cinttypes>
22 #include <cstddef>
23 #include <ctime>
24 #include <map>
25 #include <memory>
26 #include <optional>
27 #include <set>
28 #include <sstream>
29 #include <utility>
31 namespace Fortran::parser {
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)33 Definition::Definition(
34 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
35 : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)37 Definition::Definition(const std::vector<std::string> &argNames,
38 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
39 bool isVariadic)
40 : isFunctionLike_{true},
41 argumentCount_(argNames.size()), isVariadic_{isVariadic},
42 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
Definition(const std::string & predefined,AllSources & sources)44 Definition::Definition(const std::string &predefined, AllSources &sources)
45 : isPredefined_{true}, replacement_{predefined,
46 sources.AddCompilerInsertion(predefined).start()} {
47 }
set_isDisabled(bool disable)49 bool Definition::set_isDisabled(bool disable) {
50 bool was{isDisabled_};
51 isDisabled_ = disable;
52 return was;
53 }
IsLegalIdentifierStart(const CharBlock & cpl)55 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
56 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
57 }
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)59 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
60 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
61 std::map<std::string, std::string> args;
62 char argIndex{'A'};
63 for (const std::string &arg : argNames) {
64 CHECK(args.find(arg) == args.end());
65 args[arg] = "~"s + argIndex++;
66 }
67 TokenSequence result;
68 for (std::size_t j{0}; j < tokens; ++j) {
69 CharBlock tok{token.TokenAt(firstToken + j)};
70 if (IsLegalIdentifierStart(tok)) {
71 auto it{args.find(tok.ToString())};
72 if (it != args.end()) {
73 result.Put(it->second, token.GetTokenProvenance(j));
74 continue;
75 }
76 }
77 result.Put(token, firstToken + j, 1);
78 }
79 return result;
80 }
AfterLastNonBlank(const TokenSequence & tokens)82 static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
83 for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
84 if (!tokens.TokenAt(j - 1).IsBlank()) {
85 return j;
86 }
87 }
88 return 0;
89 }
Stringify(const TokenSequence & tokens,AllSources & allSources)91 static TokenSequence Stringify(
92 const TokenSequence &tokens, AllSources &allSources) {
93 TokenSequence result;
94 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
95 result.PutNextTokenChar('"', quoteProvenance);
96 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
97 const CharBlock &token{tokens.TokenAt(j)};
98 std::size_t bytes{token.size()};
99 for (std::size_t k{0}; k < bytes; ++k) {
100 char ch{token[k]};
101 Provenance from{tokens.GetTokenProvenance(j, k)};
102 if (ch == '"' || ch == '\\') {
103 result.PutNextTokenChar(ch, from);
104 }
105 result.PutNextTokenChar(ch, from);
106 }
107 }
108 result.PutNextTokenChar('"', quoteProvenance);
109 result.CloseToken();
110 return result;
111 }
Apply(const std::vector<TokenSequence> & args,AllSources & allSources)113 TokenSequence Definition::Apply(
114 const std::vector<TokenSequence> &args, AllSources &allSources) {
115 TokenSequence result;
116 bool pasting{false};
117 bool skipping{false};
118 int parenthesesNesting{0};
119 std::size_t tokens{replacement_.SizeInTokens()};
120 for (std::size_t j{0}; j < tokens; ++j) {
121 const CharBlock &token{replacement_.TokenAt(j)};
122 std::size_t bytes{token.size()};
123 if (skipping) {
124 if (bytes == 1) {
125 if (token[0] == '(') {
126 ++parenthesesNesting;
127 } else if (token[0] == ')') {
128 skipping = --parenthesesNesting > 0;
129 }
130 }
131 continue;
132 }
133 if (bytes == 2 && token[0] == '~') {
134 std::size_t index = token[1] - 'A';
135 if (index >= args.size()) {
136 continue;
137 }
138 std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
139 if (afterLastNonBlank > 0 &&
140 result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
141 // stringifying
142 while (result.SizeInTokens() >= afterLastNonBlank) {
143 result.pop_back();
144 }
145 result.Put(Stringify(args[index], allSources));
146 } else {
147 std::size_t argTokens{args[index].SizeInTokens()};
148 for (std::size_t k{0}; k < argTokens; ++k) {
149 if (!pasting || !args[index].TokenAt(k).IsBlank()) {
150 result.Put(args[index], k);
151 pasting = false;
152 }
153 }
154 }
155 } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
156 // Token pasting operator in body (not expanded argument); discard any
157 // immediately preceding white space, then reopen the last token.
158 while (!result.empty() &&
159 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
160 result.pop_back();
161 }
162 if (!result.empty()) {
163 result.ReopenLastToken();
164 pasting = true;
165 }
166 } else if (pasting && token.IsBlank()) {
167 // Delete whitespace immediately following ## in the body.
168 } else if (bytes == 11 && isVariadic_ &&
169 token.ToString() == "__VA_ARGS__") {
170 Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
171 for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
172 if (k > argumentCount_) {
173 result.Put(","s, commaProvenance);
174 }
175 result.Put(args[k]);
176 }
177 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
178 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
179 parenthesesNesting == 0) {
180 parenthesesNesting = 1;
181 skipping = args.size() == argumentCount_;
182 ++j;
183 } else {
184 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
185 ++parenthesesNesting;
186 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
187 if (--parenthesesNesting == 0) {
188 skipping = false;
189 continue;
190 }
191 }
192 result.Put(replacement_, j);
193 }
194 }
195 return result;
196 }
FormatTime(const std::time_t & now,const char * format)198 static std::string FormatTime(const std::time_t &now, const char *format) {
199 char buffer[16];
200 return {buffer,
201 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
202 }
Preprocessor(AllSources & allSources)204 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
205 // Capture current local date & time once now to avoid having the values
206 // of __DATE__ or __TIME__ change during compilation.
207 std::time_t now;
208 std::time(&now);
209 definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904"
210 Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
211 definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60"
212 Definition{FormatTime(now, "\"%T\""), allSources});
213 // The values of these predefined macros depend on their invocation sites.
214 definitions_.emplace(
215 SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
216 definitions_.emplace(
217 SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
218 }
Define(std::string macro,std::string value)220 void Preprocessor::Define(std::string macro, std::string value) {
221 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
222 }
Undefine(std::string macro)224 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
MacroReplacement(const TokenSequence & input,const Prescanner & prescanner)226 std::optional<TokenSequence> Preprocessor::MacroReplacement(
227 const TokenSequence &input, const Prescanner &prescanner) {
228 // Do quick scan for any use of a defined name.
229 std::size_t tokens{input.SizeInTokens()};
230 std::size_t j;
231 for (j = 0; j < tokens; ++j) {
232 CharBlock token{input.TokenAt(j)};
233 if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
234 IsNameDefined(token)) {
235 break;
236 }
237 }
238 if (j == tokens) {
239 return std::nullopt; // input contains nothing that would be replaced
240 }
241 TokenSequence result{input, 0, j};
242 for (; j < tokens; ++j) {
243 const CharBlock &token{input.TokenAt(j)};
244 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
245 result.Put(input, j);
246 continue;
247 }
248 auto it{definitions_.find(token)};
249 if (it == definitions_.end()) {
250 result.Put(input, j);
251 continue;
252 }
253 Definition &def{it->second};
254 if (def.isDisabled()) {
255 result.Put(input, j);
256 continue;
257 }
258 if (!def.isFunctionLike()) {
259 if (def.isPredefined()) {
260 std::string name{def.replacement().TokenAt(0).ToString()};
261 std::string repl;
262 if (name == "__FILE__") {
263 repl = "\""s +
264 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
265 } else if (name == "__LINE__") {
266 std::stringstream ss;
267 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
268 repl = ss.str();
269 }
270 if (!repl.empty()) {
271 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
272 ProvenanceRange call{allSources_.AddMacroCall(
273 insert, input.GetTokenProvenanceRange(j), repl)};
274 result.Put(repl, call.start());
275 continue;
276 }
277 }
278 def.set_isDisabled(true);
279 TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
280 def.set_isDisabled(false);
281 if (!replaced.empty()) {
282 ProvenanceRange from{def.replacement().GetProvenanceRange()};
283 ProvenanceRange use{input.GetTokenProvenanceRange(j)};
284 ProvenanceRange newRange{
285 allSources_.AddMacroCall(from, use, replaced.ToString())};
286 result.Put(replaced, newRange);
287 }
288 continue;
289 }
290 // Possible function-like macro call. Skip spaces and newlines to see
291 // whether '(' is next.
292 std::size_t k{j};
293 bool leftParen{false};
294 while (++k < tokens) {
295 const CharBlock &lookAhead{input.TokenAt(k)};
296 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
297 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
298 break;
299 }
300 }
301 if (!leftParen) {
302 result.Put(input, j);
303 continue;
304 }
305 std::vector<std::size_t> argStart{++k};
306 for (int nesting{0}; k < tokens; ++k) {
307 CharBlock token{input.TokenAt(k)};
308 if (token.size() == 1) {
309 char ch{token[0]};
310 if (ch == '(') {
311 ++nesting;
312 } else if (ch == ')') {
313 if (nesting == 0) {
314 break;
315 }
316 --nesting;
317 } else if (ch == ',' && nesting == 0) {
318 argStart.push_back(k + 1);
319 }
320 }
321 }
322 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
323 // Subtle: () is zero arguments, not one empty argument,
324 // unless one argument was expected.
325 argStart.clear();
326 }
327 if (k >= tokens || argStart.size() < def.argumentCount() ||
328 (argStart.size() > def.argumentCount() && !def.isVariadic())) {
329 result.Put(input, j);
330 continue;
331 }
332 std::vector<TokenSequence> args;
333 for (std::size_t n{0}; n < argStart.size(); ++n) {
334 std::size_t at{argStart[n]};
335 std::size_t count{
336 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
337 args.emplace_back(TokenSequence(input, at, count));
338 }
339 def.set_isDisabled(true);
340 TokenSequence replaced{
341 ReplaceMacros(def.Apply(args, allSources_), prescanner)};
342 def.set_isDisabled(false);
343 if (!replaced.empty()) {
344 ProvenanceRange from{def.replacement().GetProvenanceRange()};
345 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
346 ProvenanceRange newRange{
347 allSources_.AddMacroCall(from, use, replaced.ToString())};
348 result.Put(replaced, newRange);
349 }
350 j = k; // advance to the terminal ')'
351 }
352 return result;
353 }
ReplaceMacros(const TokenSequence & tokens,const Prescanner & prescanner)355 TokenSequence Preprocessor::ReplaceMacros(
356 const TokenSequence &tokens, const Prescanner &prescanner) {
357 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
358 return std::move(*repl);
359 }
360 return tokens;
361 }
Directive(const TokenSequence & dir,Prescanner * prescanner)363 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
364 std::size_t tokens{dir.SizeInTokens()};
365 std::size_t j{dir.SkipBlanks(0)};
366 if (j == tokens) {
367 return;
368 }
369 if (dir.TokenAt(j).ToString() != "#") {
370 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
371 return;
372 }
373 j = dir.SkipBlanks(j + 1);
374 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
375 --tokens;
376 }
377 if (j == tokens) {
378 return;
379 }
380 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
381 return; // treat like #line, ignore it
382 }
383 std::size_t dirOffset{j};
384 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
385 j = dir.SkipBlanks(j + 1);
386 CharBlock nameToken;
387 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
388 nameToken = dir.TokenAt(j);
389 }
390 if (dirName == "line") {
391 // #line is ignored
392 } else if (dirName == "define") {
393 if (nameToken.empty()) {
394 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
395 "#define: missing or invalid name"_err_en_US);
396 return;
397 }
398 nameToken = SaveTokenAsName(nameToken);
399 definitions_.erase(nameToken);
400 if (++j < tokens && dir.TokenAt(j).size() == 1 &&
401 dir.TokenAt(j)[0] == '(') {
402 j = dir.SkipBlanks(j + 1);
403 std::vector<std::string> argName;
404 bool isVariadic{false};
405 if (dir.TokenAt(j).ToString() != ")") {
406 while (true) {
407 std::string an{dir.TokenAt(j).ToString()};
408 if (an == "...") {
409 isVariadic = true;
410 } else {
411 if (an.empty() || !IsLegalIdentifierStart(an[0])) {
412 prescanner->Say(dir.GetTokenProvenanceRange(j),
413 "#define: missing or invalid argument name"_err_en_US);
414 return;
415 }
416 argName.push_back(an);
417 }
418 j = dir.SkipBlanks(j + 1);
419 if (j == tokens) {
420 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
421 "#define: malformed argument list"_err_en_US);
422 return;
423 }
424 std::string punc{dir.TokenAt(j).ToString()};
425 if (punc == ")") {
426 break;
427 }
428 if (isVariadic || punc != ",") {
429 prescanner->Say(dir.GetTokenProvenanceRange(j),
430 "#define: malformed argument list"_err_en_US);
431 return;
432 }
433 j = dir.SkipBlanks(j + 1);
434 if (j == tokens) {
435 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
436 "#define: malformed argument list"_err_en_US);
437 return;
438 }
439 }
440 if (std::set<std::string>(argName.begin(), argName.end()).size() !=
441 argName.size()) {
442 prescanner->Say(dir.GetTokenProvenance(dirOffset),
443 "#define: argument names are not distinct"_err_en_US);
444 return;
445 }
446 }
447 j = dir.SkipBlanks(j + 1);
448 definitions_.emplace(std::make_pair(
449 nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
450 } else {
451 j = dir.SkipBlanks(j + 1);
452 definitions_.emplace(
453 std::make_pair(nameToken, Definition{dir, j, tokens - j}));
454 }
455 } else if (dirName == "undef") {
456 if (nameToken.empty()) {
457 prescanner->Say(
458 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
459 "# missing or invalid name"_err_en_US);
460 } else {
461 j = dir.SkipBlanks(j + 1);
462 if (j != tokens) {
463 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
464 "#undef: excess tokens at end of directive"_err_en_US);
465 } else {
466 definitions_.erase(nameToken);
467 }
468 }
469 } else if (dirName == "ifdef" || dirName == "ifndef") {
470 bool doThen{false};
471 if (nameToken.empty()) {
472 prescanner->Say(
473 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
474 "#%s: missing name"_err_en_US, dirName);
475 } else {
476 j = dir.SkipBlanks(j + 1);
477 if (j != tokens) {
478 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
479 "#%s: excess tokens at end of directive"_en_US, dirName);
480 }
481 doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
482 }
483 if (doThen) {
484 ifStack_.push(CanDeadElseAppear::Yes);
485 } else {
486 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
487 dir.GetTokenProvenance(dirOffset));
488 }
489 } else if (dirName == "if") {
490 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
491 ifStack_.push(CanDeadElseAppear::Yes);
492 } else {
493 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
494 dir.GetTokenProvenanceRange(dirOffset));
495 }
496 } else if (dirName == "else") {
497 if (j != tokens) {
498 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
499 "#else: excess tokens at end of directive"_err_en_US);
500 } else if (ifStack_.empty()) {
501 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
502 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
503 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
504 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
505 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
506 } else {
507 ifStack_.pop();
508 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
509 dir.GetTokenProvenanceRange(dirOffset));
510 }
511 } else if (dirName == "elif") {
512 if (ifStack_.empty()) {
513 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
514 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
515 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
516 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
517 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
518 } else {
519 ifStack_.pop();
520 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
521 dir.GetTokenProvenanceRange(dirOffset));
522 }
523 } else if (dirName == "endif") {
524 if (j != tokens) {
525 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
526 "#endif: excess tokens at end of directive"_err_en_US);
527 } else if (ifStack_.empty()) {
528 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
529 "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
530 } else {
531 ifStack_.pop();
532 }
533 } else if (dirName == "error") {
534 prescanner->Say(
535 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
536 "%s"_err_en_US, dir.ToString());
537 } else if (dirName == "warning" || dirName == "comment" ||
538 dirName == "note") {
539 prescanner->Say(
540 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
541 "%s"_en_US, dir.ToString());
542 } else if (dirName == "include") {
543 if (j == tokens) {
544 prescanner->Say(
545 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
546 "#include: missing name of file to include"_err_en_US);
547 return;
548 }
549 std::string include;
550 if (dir.TokenAt(j).ToString() == "<") {
551 std::size_t k{j + 1};
552 if (k >= tokens) {
553 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
554 "#include: file name missing"_err_en_US);
555 return;
556 }
557 while (k < tokens && dir.TokenAt(k) != ">") {
558 ++k;
559 }
560 if (k >= tokens) {
561 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
562 "#include: expected '>' at end of included file"_en_US);
563 } else if (k + 1 < tokens) {
564 prescanner->Say(dir.GetIntervalProvenanceRange(k + 1, tokens - k - 1),
565 "#include: extra stuff ignored after '>'"_en_US);
566 }
567 TokenSequence braced{dir, j + 1, k - j - 1};
568 include = ReplaceMacros(braced, *prescanner).ToString();
569 } else if (j + 1 == tokens &&
570 (include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
571 include.substr(include.size() - 1, 1) == "\"") {
572 include = include.substr(1, include.size() - 2);
573 } else {
574 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
575 "#include: expected name of file to include"_err_en_US);
576 return;
577 }
578 if (include.empty()) {
579 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
580 "#include: empty include file name"_err_en_US);
581 return;
582 }
583 std::stringstream error;
584 const SourceFile *included{allSources_.Open(include, &error)};
585 if (included == nullptr) {
586 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
587 "#include: %s"_err_en_US, error.str());
588 } else if (included->bytes() > 0) {
589 ProvenanceRange fileRange{
590 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
591 Prescanner{*prescanner}
592 .set_encoding(included->encoding())
593 .Prescan(fileRange);
594 }
595 } else {
596 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
597 "#%s: unknown or unimplemented directive"_err_en_US, dirName);
598 }
599 }
SaveTokenAsName(const CharBlock & t)601 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
602 names_.push_back(t.ToString());
603 return {names_.back().data(), names_.back().size()};
604 }
IsNameDefined(const CharBlock & token)606 bool Preprocessor::IsNameDefined(const CharBlock &token) {
607 return definitions_.find(token) != definitions_.end();
608 }
GetDirectiveName(const TokenSequence & line,std::size_t * rest)610 static std::string GetDirectiveName(
611 const TokenSequence &line, std::size_t *rest) {
612 std::size_t tokens{line.SizeInTokens()};
613 std::size_t j{line.SkipBlanks(0)};
614 if (j == tokens || line.TokenAt(j).ToString() != "#") {
615 *rest = tokens;
616 return "";
617 }
618 j = line.SkipBlanks(j + 1);
619 if (j == tokens) {
620 *rest = tokens;
621 return "";
622 }
623 *rest = line.SkipBlanks(j + 1);
624 return ToLowerCaseLetters(line.TokenAt(j).ToString());
625 }
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner * prescanner,ProvenanceRange provenanceRange)627 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
628 IsElseActive isElseActive, Prescanner *prescanner,
629 ProvenanceRange provenanceRange) {
630 int nesting{0};
631 while (!prescanner->IsAtEnd()) {
632 if (!prescanner->IsNextLinePreprocessorDirective()) {
633 prescanner->NextLine();
634 continue;
635 }
636 TokenSequence line{prescanner->TokenizePreprocessorDirective()};
637 std::size_t rest{0};
638 std::string dn{GetDirectiveName(line, &rest)};
639 if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
640 ++nesting;
641 } else if (dn == "endif") {
642 if (nesting-- == 0) {
643 return;
644 }
645 } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
646 if (dn == "else") {
647 ifStack_.push(CanDeadElseAppear::No);
648 return;
649 }
650 if (dn == "elif" &&
651 IsIfPredicateTrue(
652 line, rest, line.SizeInTokens() - rest, prescanner)) {
653 ifStack_.push(CanDeadElseAppear::Yes);
654 return;
655 }
656 }
657 }
658 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
659 }
661 // Precedence level codes used here to accommodate mixed Fortran and C:
662 // 15: parentheses and constants, logical !, bitwise ~
663 // 14: unary + and -
664 // 13: **
665 // 12: *, /, % (modulus)
666 // 11: + and -
667 // 10: << and >>
668 // 9: bitwise &
669 // 8: bitwise ^
670 // 7: bitwise |
671 // 6: relations (.EQ., ==, &c.)
672 // 5: .NOT.
673 // 4: .AND., &&
674 // 3: .OR., ||
675 // 2: .EQV. and .NEQV. / .XOR.
676 // 1: ? :
677 // 0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)678 static std::int64_t ExpressionValue(const TokenSequence &token,
679 int minimumPrecedence, std::size_t *atToken,
680 std::optional<Message> *error) {
681 enum Operator {
683 CONST,
684 NOTZERO, // !
685 COMPLEMENT, // ~
686 UPLUS,
688 POWER,
689 TIMES,
692 ADD,
698 BITOR,
699 LT,
700 LE,
701 EQ,
702 NE,
703 GE,
704 GT,
705 NOT,
706 AND,
707 OR,
708 EQV,
709 NEQV,
712 };
713 static const int precedence[]{
714 15, 15, 15, 15, // (), 6, !, ~
715 14, 14, // unary +, -
716 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
717 9, 8, 7, // &, ^, |
718 6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
719 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
720 1, 0 // ?: and ,
721 };
722 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
723 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
725 static std::map<std::string, enum Operator> opNameMap;
726 if (opNameMap.empty()) {
727 opNameMap["("] = PARENS;
728 opNameMap["!"] = NOTZERO;
729 opNameMap["~"] = COMPLEMENT;
730 opNameMap["**"] = POWER;
731 opNameMap["*"] = TIMES;
732 opNameMap["/"] = DIVIDE;
733 opNameMap["%"] = MODULUS;
734 opNameMap["+"] = ADD;
735 opNameMap["-"] = SUBTRACT;
736 opNameMap["<<"] = LEFTSHIFT;
737 opNameMap[">>"] = RIGHTSHIFT;
738 opNameMap["&"] = BITAND;
739 opNameMap["^"] = BITXOR;
740 opNameMap["|"] = BITOR;
741 opNameMap[".lt."] = opNameMap["<"] = LT;
742 opNameMap[".le."] = opNameMap["<="] = LE;
743 opNameMap[".eq."] = opNameMap["=="] = EQ;
744 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
745 opNameMap[".ge."] = opNameMap[">="] = GE;
746 opNameMap[".gt."] = opNameMap[">"] = GT;
747 opNameMap[".not."] = NOT;
748 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
749 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
750 opNameMap[".eqv."] = EQV;
751 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
752 opNameMap["?"] = SELECT;
753 opNameMap[","] = COMMA;
754 }
756 std::size_t tokens{token.SizeInTokens()};
757 CHECK(tokens > 0);
758 if (*atToken >= tokens) {
759 *error =
760 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
761 return 0;
762 }
764 // Parse and evaluate a primary or a unary operator and its operand.
765 std::size_t opAt{*atToken};
766 std::string t{token.TokenAt(opAt).ToString()};
767 enum Operator op;
768 std::int64_t left{0};
769 if (t == "(") {
770 op = PARENS;
771 } else if (IsDecimalDigit(t[0])) {
772 op = CONST;
773 std::size_t consumed{0};
774 left = std::stoll(t, &consumed, 0 /*base to be detected*/);
775 if (consumed < t.size()) {
776 *error = Message{token.GetTokenProvenanceRange(opAt),
777 "Uninterpretable numeric constant '%s'"_err_en_US, t};
778 return 0;
779 }
780 } else if (IsLegalIdentifierStart(t[0])) {
781 // undefined macro name -> zero
782 // TODO: BOZ constants?
783 op = CONST;
784 } else if (t == "+") {
785 op = UPLUS;
786 } else if (t == "-") {
787 op = UMINUS;
788 } else if (t == "." && *atToken + 2 < tokens &&
789 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
790 token.TokenAt(*atToken + 2).ToString() == ".") {
791 op = NOT;
792 *atToken += 2;
793 } else {
794 auto it{opNameMap.find(t)};
795 if (it != opNameMap.end()) {
796 op = it->second;
797 } else {
798 *error = Message{token.GetTokenProvenanceRange(opAt),
799 "operand expected in expression"_err_en_US};
800 return 0;
801 }
802 }
803 if (precedence[op] < minimumPrecedence) {
804 *error = Message{token.GetTokenProvenanceRange(opAt),
805 "operator precedence error"_err_en_US};
806 return 0;
807 }
808 ++*atToken;
809 if (op != CONST) {
810 left = ExpressionValue(token, operandPrecedence[op], atToken, error);
811 if (error->has_value()) {
812 return 0;
813 }
814 switch (op) {
815 case PARENS:
816 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
817 ++*atToken;
818 break;
819 }
820 if (*atToken >= tokens) {
821 *error = Message{token.GetProvenanceRange(),
822 "')' missing from expression"_err_en_US};
823 } else {
824 *error = Message{
825 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
826 }
827 return 0;
828 case NOTZERO: left = !left; break;
829 case COMPLEMENT: left = ~left; break;
830 case UPLUS: break;
831 case UMINUS: left = -left; break;
832 case NOT: left = -!left; break;
833 default: CRASH_NO_CASE;
834 }
835 }
837 // Parse and evaluate binary operators and their second operands, if present.
838 while (*atToken < tokens) {
839 int advance{1};
840 t = token.TokenAt(*atToken).ToString();
841 if (t == "." && *atToken + 2 < tokens &&
842 token.TokenAt(*atToken + 2).ToString() == ".") {
843 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
844 advance = 3;
845 }
846 auto it{opNameMap.find(t)};
847 if (it == opNameMap.end()) {
848 break;
849 }
850 op = it->second;
851 if (op < POWER || precedence[op] < minimumPrecedence) {
852 break;
853 }
854 opAt = *atToken;
855 *atToken += advance;
857 std::int64_t right{
858 ExpressionValue(token, operandPrecedence[op], atToken, error)};
859 if (error->has_value()) {
860 return 0;
861 }
863 switch (op) {
864 case POWER:
865 if (left == 0) {
866 if (right < 0) {
867 *error = Message{token.GetTokenProvenanceRange(opAt),
868 "0 ** negative power"_err_en_US};
869 }
870 } else if (left != 1 && right != 1) {
871 if (right <= 0) {
872 left = !right;
873 } else {
874 std::int64_t power{1};
875 for (; right > 0; --right) {
876 if ((power * left) / left != power) {
877 *error = Message{token.GetTokenProvenanceRange(opAt),
878 "overflow in exponentation"_err_en_US};
879 left = 1;
880 }
881 power *= left;
882 }
883 left = power;
884 }
885 }
886 break;
887 case TIMES:
888 if (left != 0 && right != 0 && ((left * right) / left) != right) {
889 *error = Message{token.GetTokenProvenanceRange(opAt),
890 "overflow in multiplication"_err_en_US};
891 }
892 left = left * right;
893 break;
894 case DIVIDE:
895 if (right == 0) {
896 *error = Message{
897 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
898 left = 0;
899 } else {
900 left = left / right;
901 }
902 break;
903 case MODULUS:
904 if (right == 0) {
905 *error = Message{
906 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
907 left = 0;
908 } else {
909 left = left % right;
910 }
911 break;
912 case ADD:
913 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
914 *error = Message{token.GetTokenProvenanceRange(opAt),
915 "overflow in addition"_err_en_US};
916 }
917 left = left + right;
918 break;
919 case SUBTRACT:
920 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
921 *error = Message{token.GetTokenProvenanceRange(opAt),
922 "overflow in subtraction"_err_en_US};
923 }
924 left = left - right;
925 break;
926 case LEFTSHIFT:
927 if (right < 0 || right > 64) {
928 *error = Message{token.GetTokenProvenanceRange(opAt),
929 "bad left shift count"_err_en_US};
930 }
931 left = right >= 64 ? 0 : left << right;
932 break;
933 case RIGHTSHIFT:
934 if (right < 0 || right > 64) {
935 *error = Message{token.GetTokenProvenanceRange(opAt),
936 "bad right shift count"_err_en_US};
937 }
938 left = right >= 64 ? 0 : left >> right;
939 break;
940 case BITAND:
941 case AND: left = left & right; break;
942 case BITXOR: left = left ^ right; break;
943 case BITOR:
944 case OR: left = left | right; break;
945 case LT: left = -(left < right); break;
946 case LE: left = -(left <= right); break;
947 case EQ: left = -(left == right); break;
948 case NE: left = -(left != right); break;
949 case GE: left = -(left >= right); break;
950 case GT: left = -(left > right); break;
951 case EQV: left = -(!left == !right); break;
952 case NEQV: left = -(!left != !right); break;
953 case SELECT:
954 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
955 *error = Message{token.GetTokenProvenanceRange(opAt),
956 "':' required in selection expression"_err_en_US};
957 return 0;
958 } else {
959 ++*atToken;
960 std::int64_t third{
961 ExpressionValue(token, operandPrecedence[op], atToken, error)};
962 left = left != 0 ? right : third;
963 }
964 break;
965 case COMMA: left = right; break;
966 default: CRASH_NO_CASE;
967 }
968 }
969 return left;
970 }
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner * prescanner)972 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
973 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
974 TokenSequence expr1{expr, first, exprTokens};
975 if (expr1.HasBlanks()) {
976 expr1.RemoveBlanks();
977 }
978 TokenSequence expr2;
979 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
980 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
981 CharBlock name;
982 if (j + 3 < expr1.SizeInTokens() &&
983 expr1.TokenAt(j + 1).ToString() == "(" &&
984 expr1.TokenAt(j + 3).ToString() == ")") {
985 name = expr1.TokenAt(j + 2);
986 j += 3;
987 } else if (j + 1 < expr1.SizeInTokens() &&
988 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
989 name = expr1.TokenAt(j++);
990 }
991 if (!name.empty()) {
992 char truth{IsNameDefined(name) ? '1' : '0'};
993 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
994 continue;
995 }
996 }
997 expr2.Put(expr1, j);
998 }
999 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1000 if (expr3.HasBlanks()) {
1001 expr3.RemoveBlanks();
1002 }
1003 if (expr3.empty()) {
1004 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1005 return false;
1006 }
1007 std::size_t atToken{0};
1008 std::optional<Message> error;
1009 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1010 if (error.has_value()) {
1011 prescanner->Say(std::move(*error));
1012 } else if (atToken < expr3.SizeInTokens() &&
1013 expr3.TokenAt(atToken).ToString() != "!") {
1014 prescanner->Say(expr3.GetIntervalProvenanceRange(
1015 atToken, expr3.SizeInTokens() - atToken),
1016 atToken == 0 ? "could not parse any expression"_err_en_US
1017 : "excess characters after expression"_err_en_US);
1018 }
1019 return result;
1020 }
1021 }