1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
11 
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser.  This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
18 
19 #include "token-sequence.h"
20 #include "flang/Common/Fortran-features.h"
21 #include "flang/Parser/characters.h"
22 #include "flang/Parser/message.h"
23 #include "flang/Parser/provenance.h"
24 #include <bitset>
25 #include <optional>
26 #include <string>
27 #include <unordered_set>
28 
29 namespace Fortran::parser {
30 
31 class Messages;
32 class Preprocessor;
33 
34 class Prescanner {
35 public:
36   Prescanner(Messages &, CookedSource &, Preprocessor &,
37       common::LanguageFeatureControl);
38   Prescanner(const Prescanner &);
39 
messages()40   Messages &messages() const { return messages_; }
41 
set_fixedForm(bool yes)42   Prescanner &set_fixedForm(bool yes) {
43     inFixedForm_ = yes;
44     return *this;
45   }
set_encoding(Encoding code)46   Prescanner &set_encoding(Encoding code) {
47     encoding_ = code;
48     return *this;
49   }
set_fixedFormColumnLimit(int limit)50   Prescanner &set_fixedFormColumnLimit(int limit) {
51     fixedFormColumnLimit_ = limit;
52     return *this;
53   }
54 
55   Prescanner &AddCompilerDirectiveSentinel(const std::string &);
56 
57   void Prescan(ProvenanceRange);
58   void Statement();
59   void NextLine();
60 
61   // Callbacks for use by Preprocessor.
IsAtEnd()62   bool IsAtEnd() const { return nextLine_ >= limit_; }
63   bool IsNextLinePreprocessorDirective() const;
64   TokenSequence TokenizePreprocessorDirective();
GetCurrentProvenance()65   Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
66 
Say(A &&...a)67   template <typename... A> Message &Say(A &&... a) {
68     Message &m{messages_.Say(std::forward<A>(a)...)};
69     std::optional<ProvenanceRange> range{m.GetProvenanceRange(cooked_)};
70     CHECK(!range || cooked_.IsValid(*range));
71     return m;
72   }
73 
74 private:
75   struct LineClassification {
76     enum class Kind {
77       Comment,
78       ConditionalCompilationDirective,
79       IncludeDirective, // #include
80       DefinitionDirective, // #define & #undef
81       PreprocessorDirective,
82       IncludeLine, // Fortran INCLUDE
83       CompilerDirective,
84       Source
85     };
86     LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
87         : kind{k}, payloadOffset{po}, sentinel{s} {}
88     LineClassification(LineClassification &&) = default;
89     Kind kind;
90     std::size_t payloadOffset; // byte offset of content
91     const char *sentinel; // if it's a compiler directive
92   };
93 
BeginSourceLine(const char * at)94   void BeginSourceLine(const char *at) {
95     at_ = at;
96     column_ = 1;
97     tabInCurrentLine_ = false;
98     slashInCurrentLine_ = false;
99     preventHollerith_ = false;
100     delimiterNesting_ = 0;
101   }
102 
BeginSourceLineAndAdvance()103   void BeginSourceLineAndAdvance() {
104     BeginSourceLine(nextLine_);
105     NextLine();
106   }
107 
GetProvenance(const char * sourceChar)108   Provenance GetProvenance(const char *sourceChar) const {
109     return startProvenance_ + (sourceChar - start_);
110   }
111 
GetProvenanceRange(const char * first,const char * afterLast)112   ProvenanceRange GetProvenanceRange(
113       const char *first, const char *afterLast) const {
114     std::size_t bytes = afterLast - first;
115     return {startProvenance_ + (first - start_), bytes};
116   }
117 
EmitChar(TokenSequence & tokens,char ch)118   void EmitChar(TokenSequence &tokens, char ch) {
119     tokens.PutNextTokenChar(ch, GetCurrentProvenance());
120   }
121 
EmitInsertedChar(TokenSequence & tokens,char ch)122   void EmitInsertedChar(TokenSequence &tokens, char ch) {
123     Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)};
124     tokens.PutNextTokenChar(ch, provenance);
125   }
126 
EmitCharAndAdvance(TokenSequence & tokens,char ch)127   char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
128     EmitChar(tokens, ch);
129     NextChar();
130     return *at_;
131   }
132 
InCompilerDirective()133   bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
InFixedFormSource()134   bool InFixedFormSource() const {
135     return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
136   }
137 
IsCComment(const char * p)138   bool IsCComment(const char *p) const {
139     return p[0] == '/' && p[1] == '*' &&
140         (inPreprocessorDirective_ ||
141             (!inCharLiteral_ &&
142                 features_.IsEnabled(
143                     common::LanguageFeature::ClassicCComments)));
144   }
145 
146   void LabelField(TokenSequence &, int outCol = 1);
147   void SkipToEndOfLine();
148   bool MustSkipToEndOfLine() const;
149   void NextChar();
150   void SkipToNextSignificantCharacter();
151   void SkipCComments();
152   void SkipSpaces();
153   static const char *SkipWhiteSpace(const char *);
154   const char *SkipWhiteSpaceAndCComments(const char *) const;
155   const char *SkipCComment(const char *) const;
156   bool NextToken(TokenSequence &);
157   bool ExponentAndKind(TokenSequence &);
158   void QuotedCharacterLiteral(TokenSequence &, const char *start);
159   void Hollerith(TokenSequence &, int count, const char *start);
160   bool PadOutCharacterLiteral(TokenSequence &);
161   bool SkipCommentLine(bool afterAmpersand);
162   bool IsFixedFormCommentLine(const char *) const;
163   const char *IsFreeFormComment(const char *) const;
164   std::optional<std::size_t> IsIncludeLine(const char *) const;
165   void FortranInclude(const char *quote);
166   const char *IsPreprocessorDirectiveLine(const char *) const;
167   const char *FixedFormContinuationLine(bool mightNeedSpace);
168   const char *FreeFormContinuationLine(bool ampersand);
169   bool FixedFormContinuation(bool mightNeedSpace);
170   bool FreeFormContinuation();
171   bool Continuation(bool mightNeedFixedFormSpace);
172   std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
173       const char *) const;
174   std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
175       const char *) const;
176   const char *IsCompilerDirectiveSentinel(const char *) const;
177   LineClassification ClassifyLine(const char *) const;
178   void SourceFormChange(std::string &&);
179 
180   Messages &messages_;
181   CookedSource &cooked_;
182   Preprocessor &preprocessor_;
183   common::LanguageFeatureControl features_;
184   bool inFixedForm_{false};
185   int fixedFormColumnLimit_{72};
186   Encoding encoding_{Encoding::UTF_8};
187   int delimiterNesting_{0};
188   int prescannerNesting_{0};
189 
190   Provenance startProvenance_;
191   const char *start_{nullptr}; // beginning of current source file content
192   const char *limit_{nullptr}; // first address after end of current source
193   const char *nextLine_{nullptr}; // next line to process; <= limit_
194   const char *directiveSentinel_{nullptr}; // current compiler directive
195 
196   // This data members are state for processing the source line containing
197   // "at_", which goes to up to the newline character before "nextLine_".
198   const char *at_{nullptr}; // next character to process; < nextLine_
199   int column_{1}; // card image column position of next character
200   bool tabInCurrentLine_{false};
201   bool slashInCurrentLine_{false};
202   bool preventHollerith_{false};
203   bool inCharLiteral_{false};
204   bool inPreprocessorDirective_{false};
205 
206   // In some edge cases of compiler directive continuation lines, it
207   // is necessary to treat the line break as a space character by
208   // setting this flag, which is cleared by EmitChar().
209   bool insertASpace_{false};
210 
211   // When a free form continuation marker (&) appears at the end of a line
212   // before a INCLUDE or #include, we delete it and omit the newline, so
213   // that the first line of the included file is truly a continuation of
214   // the line before.  Also used when the & appears at the end of the last
215   // line in an include file.
216   bool omitNewline_{false};
217   bool skipLeadingAmpersand_{false};
218 
219   const Provenance spaceProvenance_{
220       cooked_.allSources().CompilerInsertionProvenance(' ')};
221   const Provenance backslashProvenance_{
222       cooked_.allSources().CompilerInsertionProvenance('\\')};
223   const ProvenanceRange sixSpaceProvenance_{
224       cooked_.allSources().AddCompilerInsertion("      "s)};
225 
226   // To avoid probing the set of active compiler directive sentinel strings
227   // on every comment line, they're checked first with a cheap Bloom filter.
228   static const int prime1{1019}, prime2{1021};
229   std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
230   std::unordered_set<std::string> compilerDirectiveSentinels_;
231 };
232 } // namespace Fortran::parser
233 #endif // FORTRAN_PARSER_PRESCAN_H_
234