1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_PRESCAN_H_ 10 #define FORTRAN_PARSER_PRESCAN_H_ 11 12 // Defines a fast Fortran source prescanning phase that implements some 13 // character-level features of the language that can be inefficient to 14 // support directly in a backtracking parser. This phase handles Fortran 15 // line continuation, comment removal, card image margins, padding out 16 // fixed form character literals on truncated card images, file 17 // inclusion, and driving the Fortran source preprocessor. 18 19 #include "token-sequence.h" 20 #include "flang/Common/Fortran-features.h" 21 #include "flang/Parser/characters.h" 22 #include "flang/Parser/message.h" 23 #include "flang/Parser/provenance.h" 24 #include <bitset> 25 #include <optional> 26 #include <string> 27 #include <unordered_set> 28 29 namespace Fortran::parser { 30 31 class Messages; 32 class Preprocessor; 33 34 class Prescanner { 35 public: 36 Prescanner(Messages &, CookedSource &, Preprocessor &, 37 common::LanguageFeatureControl); 38 Prescanner(const Prescanner &); 39 messages()40 Messages &messages() const { return messages_; } 41 set_fixedForm(bool yes)42 Prescanner &set_fixedForm(bool yes) { 43 inFixedForm_ = yes; 44 return *this; 45 } set_encoding(Encoding code)46 Prescanner &set_encoding(Encoding code) { 47 encoding_ = code; 48 return *this; 49 } set_fixedFormColumnLimit(int limit)50 Prescanner &set_fixedFormColumnLimit(int limit) { 51 fixedFormColumnLimit_ = limit; 52 return *this; 53 } 54 55 Prescanner &AddCompilerDirectiveSentinel(const std::string &); 56 57 void Prescan(ProvenanceRange); 58 void Statement(); 59 void NextLine(); 60 61 // Callbacks for use by Preprocessor. IsAtEnd()62 bool IsAtEnd() const { return nextLine_ >= limit_; } 63 bool IsNextLinePreprocessorDirective() const; 64 TokenSequence TokenizePreprocessorDirective(); GetCurrentProvenance()65 Provenance GetCurrentProvenance() const { return GetProvenance(at_); } 66 Say(A &&...a)67 template <typename... A> Message &Say(A &&... a) { 68 Message &m{messages_.Say(std::forward<A>(a)...)}; 69 std::optional<ProvenanceRange> range{m.GetProvenanceRange(cooked_)}; 70 CHECK(!range || cooked_.IsValid(*range)); 71 return m; 72 } 73 74 private: 75 struct LineClassification { 76 enum class Kind { 77 Comment, 78 ConditionalCompilationDirective, 79 IncludeDirective, // #include 80 DefinitionDirective, // #define & #undef 81 PreprocessorDirective, 82 IncludeLine, // Fortran INCLUDE 83 CompilerDirective, 84 Source 85 }; 86 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) 87 : kind{k}, payloadOffset{po}, sentinel{s} {} 88 LineClassification(LineClassification &&) = default; 89 Kind kind; 90 std::size_t payloadOffset; // byte offset of content 91 const char *sentinel; // if it's a compiler directive 92 }; 93 BeginSourceLine(const char * at)94 void BeginSourceLine(const char *at) { 95 at_ = at; 96 column_ = 1; 97 tabInCurrentLine_ = false; 98 slashInCurrentLine_ = false; 99 preventHollerith_ = false; 100 delimiterNesting_ = 0; 101 } 102 BeginSourceLineAndAdvance()103 void BeginSourceLineAndAdvance() { 104 BeginSourceLine(nextLine_); 105 NextLine(); 106 } 107 GetProvenance(const char * sourceChar)108 Provenance GetProvenance(const char *sourceChar) const { 109 return startProvenance_ + (sourceChar - start_); 110 } 111 GetProvenanceRange(const char * first,const char * afterLast)112 ProvenanceRange GetProvenanceRange( 113 const char *first, const char *afterLast) const { 114 std::size_t bytes = afterLast - first; 115 return {startProvenance_ + (first - start_), bytes}; 116 } 117 EmitChar(TokenSequence & tokens,char ch)118 void EmitChar(TokenSequence &tokens, char ch) { 119 tokens.PutNextTokenChar(ch, GetCurrentProvenance()); 120 } 121 EmitInsertedChar(TokenSequence & tokens,char ch)122 void EmitInsertedChar(TokenSequence &tokens, char ch) { 123 Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)}; 124 tokens.PutNextTokenChar(ch, provenance); 125 } 126 EmitCharAndAdvance(TokenSequence & tokens,char ch)127 char EmitCharAndAdvance(TokenSequence &tokens, char ch) { 128 EmitChar(tokens, ch); 129 NextChar(); 130 return *at_; 131 } 132 InCompilerDirective()133 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } InFixedFormSource()134 bool InFixedFormSource() const { 135 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); 136 } 137 IsCComment(const char * p)138 bool IsCComment(const char *p) const { 139 return p[0] == '/' && p[1] == '*' && 140 (inPreprocessorDirective_ || 141 (!inCharLiteral_ && 142 features_.IsEnabled( 143 common::LanguageFeature::ClassicCComments))); 144 } 145 146 void LabelField(TokenSequence &, int outCol = 1); 147 void SkipToEndOfLine(); 148 bool MustSkipToEndOfLine() const; 149 void NextChar(); 150 void SkipToNextSignificantCharacter(); 151 void SkipCComments(); 152 void SkipSpaces(); 153 static const char *SkipWhiteSpace(const char *); 154 const char *SkipWhiteSpaceAndCComments(const char *) const; 155 const char *SkipCComment(const char *) const; 156 bool NextToken(TokenSequence &); 157 bool ExponentAndKind(TokenSequence &); 158 void QuotedCharacterLiteral(TokenSequence &, const char *start); 159 void Hollerith(TokenSequence &, int count, const char *start); 160 bool PadOutCharacterLiteral(TokenSequence &); 161 bool SkipCommentLine(bool afterAmpersand); 162 bool IsFixedFormCommentLine(const char *) const; 163 const char *IsFreeFormComment(const char *) const; 164 std::optional<std::size_t> IsIncludeLine(const char *) const; 165 void FortranInclude(const char *quote); 166 const char *IsPreprocessorDirectiveLine(const char *) const; 167 const char *FixedFormContinuationLine(bool mightNeedSpace); 168 const char *FreeFormContinuationLine(bool ampersand); 169 bool FixedFormContinuation(bool mightNeedSpace); 170 bool FreeFormContinuation(); 171 bool Continuation(bool mightNeedFixedFormSpace); 172 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( 173 const char *) const; 174 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( 175 const char *) const; 176 const char *IsCompilerDirectiveSentinel(const char *) const; 177 LineClassification ClassifyLine(const char *) const; 178 void SourceFormChange(std::string &&); 179 180 Messages &messages_; 181 CookedSource &cooked_; 182 Preprocessor &preprocessor_; 183 common::LanguageFeatureControl features_; 184 bool inFixedForm_{false}; 185 int fixedFormColumnLimit_{72}; 186 Encoding encoding_{Encoding::UTF_8}; 187 int delimiterNesting_{0}; 188 int prescannerNesting_{0}; 189 190 Provenance startProvenance_; 191 const char *start_{nullptr}; // beginning of current source file content 192 const char *limit_{nullptr}; // first address after end of current source 193 const char *nextLine_{nullptr}; // next line to process; <= limit_ 194 const char *directiveSentinel_{nullptr}; // current compiler directive 195 196 // This data members are state for processing the source line containing 197 // "at_", which goes to up to the newline character before "nextLine_". 198 const char *at_{nullptr}; // next character to process; < nextLine_ 199 int column_{1}; // card image column position of next character 200 bool tabInCurrentLine_{false}; 201 bool slashInCurrentLine_{false}; 202 bool preventHollerith_{false}; 203 bool inCharLiteral_{false}; 204 bool inPreprocessorDirective_{false}; 205 206 // In some edge cases of compiler directive continuation lines, it 207 // is necessary to treat the line break as a space character by 208 // setting this flag, which is cleared by EmitChar(). 209 bool insertASpace_{false}; 210 211 // When a free form continuation marker (&) appears at the end of a line 212 // before a INCLUDE or #include, we delete it and omit the newline, so 213 // that the first line of the included file is truly a continuation of 214 // the line before. Also used when the & appears at the end of the last 215 // line in an include file. 216 bool omitNewline_{false}; 217 bool skipLeadingAmpersand_{false}; 218 219 const Provenance spaceProvenance_{ 220 cooked_.allSources().CompilerInsertionProvenance(' ')}; 221 const Provenance backslashProvenance_{ 222 cooked_.allSources().CompilerInsertionProvenance('\\')}; 223 const ProvenanceRange sixSpaceProvenance_{ 224 cooked_.allSources().AddCompilerInsertion(" "s)}; 225 226 // To avoid probing the set of active compiler directive sentinel strings 227 // on every comment line, they're checked first with a cheap Bloom filter. 228 static const int prime1{1019}, prime2{1021}; 229 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes 230 std::unordered_set<std::string> compilerDirectiveSentinels_; 231 }; 232 } // namespace Fortran::parser 233 #endif // FORTRAN_PARSER_PRESCAN_H_ 234