1 // Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef FORTRAN_PARSER_PRESCAN_H_
16 #define FORTRAN_PARSER_PRESCAN_H_
17 
18 // Defines a fast Fortran source prescanning phase that implements some
19 // character-level features of the language that can be inefficient to
20 // support directly in a backtracking parser.  This phase handles Fortran
21 // line continuation, comment removal, card image margins, padding out
22 // fixed form character literals on truncated card images, file
23 // inclusion, and driving the Fortran source preprocessor.
24 
25 #include "characters.h"
26 #include "features.h"
27 #include "message.h"
28 #include "provenance.h"
29 #include "token-sequence.h"
30 #include <bitset>
31 #include <optional>
32 #include <string>
33 #include <unordered_set>
34 
35 namespace Fortran::parser {
36 
37 class Messages;
38 class Preprocessor;
39 
40 class Prescanner {
41 public:
42   Prescanner(
43       Messages &, CookedSource &, Preprocessor &, LanguageFeatureControl);
44   Prescanner(const Prescanner &);
45 
messages()46   Messages &messages() const { return messages_; }
47 
set_fixedForm(bool yes)48   Prescanner &set_fixedForm(bool yes) {
49     inFixedForm_ = yes;
50     return *this;
51   }
set_encoding(Encoding code)52   Prescanner &set_encoding(Encoding code) {
53     encoding_ = code;
54     return *this;
55   }
set_fixedFormColumnLimit(int limit)56   Prescanner &set_fixedFormColumnLimit(int limit) {
57     fixedFormColumnLimit_ = limit;
58     return *this;
59   }
60 
61   Prescanner &AddCompilerDirectiveSentinel(const std::string &);
62 
63   void Prescan(ProvenanceRange);
64   void Statement();
65   void NextLine();
66 
67   // Callbacks for use by Preprocessor.
IsAtEnd()68   bool IsAtEnd() const { return nextLine_ >= limit_; }
69   bool IsNextLinePreprocessorDirective() const;
70   TokenSequence TokenizePreprocessorDirective();
GetCurrentProvenance()71   Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
72 
Say(A &&...a)73   template<typename... A> Message &Say(A &&... a) {
74     Message &m{messages_.Say(std::forward<A>(a)...)};
75     std::optional<ProvenanceRange> range{m.GetProvenanceRange(cooked_)};
76     CHECK(!range.has_value() || cooked_.IsValid(*range));
77     return m;
78   }
79 
80 private:
81   struct LineClassification {
82     enum class Kind {
83       Comment,
84       ConditionalCompilationDirective,
85       IncludeDirective,  // #include
86       DefinitionDirective,  // #define & #undef
87       PreprocessorDirective,
88       IncludeLine,  // Fortran INCLUDE
89       CompilerDirective,
90       Source
91     };
92     LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
93       : kind{k}, payloadOffset{po}, sentinel{s} {}
94     LineClassification(LineClassification &&) = default;
95     Kind kind;
96     std::size_t payloadOffset;  // byte offset of content
97     const char *sentinel;  // if it's a compiler directive
98   };
99 
BeginSourceLine(const char * at)100   void BeginSourceLine(const char *at) {
101     at_ = at;
102     column_ = 1;
103     tabInCurrentLine_ = false;
104     slashInCurrentLine_ = false;
105     preventHollerith_ = false;
106     delimiterNesting_ = 0;
107   }
108 
BeginSourceLineAndAdvance()109   void BeginSourceLineAndAdvance() {
110     BeginSourceLine(nextLine_);
111     NextLine();
112   }
113 
GetProvenance(const char * sourceChar)114   Provenance GetProvenance(const char *sourceChar) const {
115     return startProvenance_ + (sourceChar - start_);
116   }
117 
GetProvenanceRange(const char * first,const char * afterLast)118   ProvenanceRange GetProvenanceRange(
119       const char *first, const char *afterLast) const {
120     std::size_t bytes = afterLast - first;
121     return {startProvenance_ + (first - start_), bytes};
122   }
123 
EmitChar(TokenSequence & tokens,char ch)124   void EmitChar(TokenSequence &tokens, char ch) {
125     tokens.PutNextTokenChar(ch, GetCurrentProvenance());
126   }
127 
EmitInsertedChar(TokenSequence & tokens,char ch)128   void EmitInsertedChar(TokenSequence &tokens, char ch) {
129     Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)};
130     tokens.PutNextTokenChar(ch, provenance);
131   }
132 
EmitCharAndAdvance(TokenSequence & tokens,char ch)133   char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
134     EmitChar(tokens, ch);
135     NextChar();
136     return *at_;
137   }
138 
InCompilerDirective()139   bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
InFixedFormSource()140   bool InFixedFormSource() const {
141     return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
142   }
143 
IsCComment(const char * p)144   bool IsCComment(const char *p) const {
145     return p[0] == '/' && p[1] == '*' &&
146         (inPreprocessorDirective_ ||
147             (!inCharLiteral_ &&
148                 features_.IsEnabled(LanguageFeature::ClassicCComments)));
149   }
150 
151   void LabelField(TokenSequence &, int outCol = 1);
152   void SkipToEndOfLine();
153   bool MustSkipToEndOfLine() const;
154   void NextChar();
155   void SkipCComments();
156   void SkipSpaces();
157   static const char *SkipWhiteSpace(const char *);
158   const char *SkipWhiteSpaceAndCComments(const char *) const;
159   const char *SkipCComment(const char *) const;
160   bool NextToken(TokenSequence &);
161   bool ExponentAndKind(TokenSequence &);
162   void QuotedCharacterLiteral(TokenSequence &, const char *start);
163   void Hollerith(TokenSequence &, int count, const char *start);
164   bool PadOutCharacterLiteral(TokenSequence &);
165   bool SkipCommentLine(bool afterAmpersand);
166   bool IsFixedFormCommentLine(const char *) const;
167   const char *IsFreeFormComment(const char *) const;
168   std::optional<std::size_t> IsIncludeLine(const char *) const;
169   void FortranInclude(const char *quote);
170   const char *IsPreprocessorDirectiveLine(const char *) const;
171   const char *FixedFormContinuationLine(bool mightNeedSpace);
172   const char *FreeFormContinuationLine(bool ampersand);
173   bool FixedFormContinuation(bool mightNeedSpace);
174   bool FreeFormContinuation();
175   bool Continuation(bool mightNeedFixedFormSpace);
176   std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
177       const char *) const;
178   std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
179       const char *) const;
180   const char *IsCompilerDirectiveSentinel(const char *) const;
181   LineClassification ClassifyLine(const char *) const;
182   void SourceFormChange(std::string &&);
183 
184   Messages &messages_;
185   CookedSource &cooked_;
186   Preprocessor &preprocessor_;
187   LanguageFeatureControl features_;
188   bool inFixedForm_{false};
189   int fixedFormColumnLimit_{72};
190   Encoding encoding_{Encoding::UTF_8};
191   int delimiterNesting_{0};
192   int prescannerNesting_{0};
193 
194   Provenance startProvenance_;
195   const char *start_{nullptr};  // beginning of current source file content
196   const char *limit_{nullptr};  // first address after end of current source
197   const char *nextLine_{nullptr};  // next line to process; <= limit_
198   const char *directiveSentinel_{nullptr};  // current compiler directive
199 
200   // This data members are state for processing the source line containing
201   // "at_", which goes to up to the newline character before "nextLine_".
202   const char *at_{nullptr};  // next character to process; < nextLine_
203   int column_{1};  // card image column position of next character
204   bool tabInCurrentLine_{false};
205   bool slashInCurrentLine_{false};
206   bool preventHollerith_{false};
207   bool inCharLiteral_{false};
208   bool inPreprocessorDirective_{false};
209 
210   // In some edge cases of compiler directive continuation lines, it
211   // is necessary to treat the line break as a space character by
212   // setting this flag, which is cleared by EmitChar().
213   bool insertASpace_{false};
214 
215   // When a free form continuation marker (&) appears at the end of a line
216   // before a INCLUDE or #include, we delete it and omit the newline, so
217   // that the first line of the included file is truly a continuation of
218   // the line before.  Also used when the & appears at the end of the last
219   // line in an include file.
220   bool omitNewline_{false};
221   bool skipLeadingAmpersand_{false};
222 
223   const Provenance spaceProvenance_{
224       cooked_.allSources().CompilerInsertionProvenance(' ')};
225   const Provenance backslashProvenance_{
226       cooked_.allSources().CompilerInsertionProvenance('\\')};
227   const ProvenanceRange sixSpaceProvenance_{
228       cooked_.allSources().AddCompilerInsertion("      "s)};
229 
230   // To avoid probing the set of active compiler directive sentinel strings
231   // on every comment line, they're checked first with a cheap Bloom filter.
232   static const int prime1{1019}, prime2{1021};
233   std::bitset<prime2> compilerDirectiveBloomFilter_;  // 128 bytes
234   std::unordered_set<std::string> compilerDirectiveSentinels_;
235 };
236 }
237 #endif  // FORTRAN_PARSER_PRESCAN_H_
238