1 // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef FORTRAN_PARSER_PRESCAN_H_ 16 #define FORTRAN_PARSER_PRESCAN_H_ 17 18 // Defines a fast Fortran source prescanning phase that implements some 19 // character-level features of the language that can be inefficient to 20 // support directly in a backtracking parser. This phase handles Fortran 21 // line continuation, comment removal, card image margins, padding out 22 // fixed form character literals on truncated card images, file 23 // inclusion, and driving the Fortran source preprocessor. 24 25 #include "characters.h" 26 #include "features.h" 27 #include "message.h" 28 #include "provenance.h" 29 #include "token-sequence.h" 30 #include <bitset> 31 #include <optional> 32 #include <string> 33 #include <unordered_set> 34 35 namespace Fortran::parser { 36 37 class Messages; 38 class Preprocessor; 39 40 class Prescanner { 41 public: 42 Prescanner( 43 Messages &, CookedSource &, Preprocessor &, LanguageFeatureControl); 44 Prescanner(const Prescanner &); 45 messages()46 Messages &messages() const { return messages_; } 47 set_fixedForm(bool yes)48 Prescanner &set_fixedForm(bool yes) { 49 inFixedForm_ = yes; 50 return *this; 51 } set_encoding(Encoding code)52 Prescanner &set_encoding(Encoding code) { 53 encoding_ = code; 54 return *this; 55 } set_fixedFormColumnLimit(int limit)56 Prescanner &set_fixedFormColumnLimit(int limit) { 57 fixedFormColumnLimit_ = limit; 58 return *this; 59 } 60 61 Prescanner &AddCompilerDirectiveSentinel(const std::string &); 62 63 void Prescan(ProvenanceRange); 64 void Statement(); 65 void NextLine(); 66 67 // Callbacks for use by Preprocessor. IsAtEnd()68 bool IsAtEnd() const { return nextLine_ >= limit_; } 69 bool IsNextLinePreprocessorDirective() const; 70 TokenSequence TokenizePreprocessorDirective(); GetCurrentProvenance()71 Provenance GetCurrentProvenance() const { return GetProvenance(at_); } 72 Say(A &&...a)73 template<typename... A> Message &Say(A &&... a) { 74 Message &m{messages_.Say(std::forward<A>(a)...)}; 75 std::optional<ProvenanceRange> range{m.GetProvenanceRange(cooked_)}; 76 CHECK(!range.has_value() || cooked_.IsValid(*range)); 77 return m; 78 } 79 80 private: 81 struct LineClassification { 82 enum class Kind { 83 Comment, 84 ConditionalCompilationDirective, 85 IncludeDirective, // #include 86 DefinitionDirective, // #define & #undef 87 PreprocessorDirective, 88 IncludeLine, // Fortran INCLUDE 89 CompilerDirective, 90 Source 91 }; 92 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) 93 : kind{k}, payloadOffset{po}, sentinel{s} {} 94 LineClassification(LineClassification &&) = default; 95 Kind kind; 96 std::size_t payloadOffset; // byte offset of content 97 const char *sentinel; // if it's a compiler directive 98 }; 99 BeginSourceLine(const char * at)100 void BeginSourceLine(const char *at) { 101 at_ = at; 102 column_ = 1; 103 tabInCurrentLine_ = false; 104 slashInCurrentLine_ = false; 105 preventHollerith_ = false; 106 delimiterNesting_ = 0; 107 } 108 BeginSourceLineAndAdvance()109 void BeginSourceLineAndAdvance() { 110 BeginSourceLine(nextLine_); 111 NextLine(); 112 } 113 GetProvenance(const char * sourceChar)114 Provenance GetProvenance(const char *sourceChar) const { 115 return startProvenance_ + (sourceChar - start_); 116 } 117 GetProvenanceRange(const char * first,const char * afterLast)118 ProvenanceRange GetProvenanceRange( 119 const char *first, const char *afterLast) const { 120 std::size_t bytes = afterLast - first; 121 return {startProvenance_ + (first - start_), bytes}; 122 } 123 EmitChar(TokenSequence & tokens,char ch)124 void EmitChar(TokenSequence &tokens, char ch) { 125 tokens.PutNextTokenChar(ch, GetCurrentProvenance()); 126 } 127 EmitInsertedChar(TokenSequence & tokens,char ch)128 void EmitInsertedChar(TokenSequence &tokens, char ch) { 129 Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)}; 130 tokens.PutNextTokenChar(ch, provenance); 131 } 132 EmitCharAndAdvance(TokenSequence & tokens,char ch)133 char EmitCharAndAdvance(TokenSequence &tokens, char ch) { 134 EmitChar(tokens, ch); 135 NextChar(); 136 return *at_; 137 } 138 InCompilerDirective()139 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } InFixedFormSource()140 bool InFixedFormSource() const { 141 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); 142 } 143 IsCComment(const char * p)144 bool IsCComment(const char *p) const { 145 return p[0] == '/' && p[1] == '*' && 146 (inPreprocessorDirective_ || 147 (!inCharLiteral_ && 148 features_.IsEnabled(LanguageFeature::ClassicCComments))); 149 } 150 151 void LabelField(TokenSequence &, int outCol = 1); 152 void SkipToEndOfLine(); 153 bool MustSkipToEndOfLine() const; 154 void NextChar(); 155 void SkipCComments(); 156 void SkipSpaces(); 157 static const char *SkipWhiteSpace(const char *); 158 const char *SkipWhiteSpaceAndCComments(const char *) const; 159 const char *SkipCComment(const char *) const; 160 bool NextToken(TokenSequence &); 161 bool ExponentAndKind(TokenSequence &); 162 void QuotedCharacterLiteral(TokenSequence &, const char *start); 163 void Hollerith(TokenSequence &, int count, const char *start); 164 bool PadOutCharacterLiteral(TokenSequence &); 165 bool SkipCommentLine(bool afterAmpersand); 166 bool IsFixedFormCommentLine(const char *) const; 167 const char *IsFreeFormComment(const char *) const; 168 std::optional<std::size_t> IsIncludeLine(const char *) const; 169 void FortranInclude(const char *quote); 170 const char *IsPreprocessorDirectiveLine(const char *) const; 171 const char *FixedFormContinuationLine(bool mightNeedSpace); 172 const char *FreeFormContinuationLine(bool ampersand); 173 bool FixedFormContinuation(bool mightNeedSpace); 174 bool FreeFormContinuation(); 175 bool Continuation(bool mightNeedFixedFormSpace); 176 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( 177 const char *) const; 178 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( 179 const char *) const; 180 const char *IsCompilerDirectiveSentinel(const char *) const; 181 LineClassification ClassifyLine(const char *) const; 182 void SourceFormChange(std::string &&); 183 184 Messages &messages_; 185 CookedSource &cooked_; 186 Preprocessor &preprocessor_; 187 LanguageFeatureControl features_; 188 bool inFixedForm_{false}; 189 int fixedFormColumnLimit_{72}; 190 Encoding encoding_{Encoding::UTF_8}; 191 int delimiterNesting_{0}; 192 int prescannerNesting_{0}; 193 194 Provenance startProvenance_; 195 const char *start_{nullptr}; // beginning of current source file content 196 const char *limit_{nullptr}; // first address after end of current source 197 const char *nextLine_{nullptr}; // next line to process; <= limit_ 198 const char *directiveSentinel_{nullptr}; // current compiler directive 199 200 // This data members are state for processing the source line containing 201 // "at_", which goes to up to the newline character before "nextLine_". 202 const char *at_{nullptr}; // next character to process; < nextLine_ 203 int column_{1}; // card image column position of next character 204 bool tabInCurrentLine_{false}; 205 bool slashInCurrentLine_{false}; 206 bool preventHollerith_{false}; 207 bool inCharLiteral_{false}; 208 bool inPreprocessorDirective_{false}; 209 210 // In some edge cases of compiler directive continuation lines, it 211 // is necessary to treat the line break as a space character by 212 // setting this flag, which is cleared by EmitChar(). 213 bool insertASpace_{false}; 214 215 // When a free form continuation marker (&) appears at the end of a line 216 // before a INCLUDE or #include, we delete it and omit the newline, so 217 // that the first line of the included file is truly a continuation of 218 // the line before. Also used when the & appears at the end of the last 219 // line in an include file. 220 bool omitNewline_{false}; 221 bool skipLeadingAmpersand_{false}; 222 223 const Provenance spaceProvenance_{ 224 cooked_.allSources().CompilerInsertionProvenance(' ')}; 225 const Provenance backslashProvenance_{ 226 cooked_.allSources().CompilerInsertionProvenance('\\')}; 227 const ProvenanceRange sixSpaceProvenance_{ 228 cooked_.allSources().AddCompilerInsertion(" "s)}; 229 230 // To avoid probing the set of active compiler directive sentinel strings 231 // on every comment line, they're checked first with a cheap Bloom filter. 232 static const int prime1{1019}, prime2{1021}; 233 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes 234 std::unordered_set<std::string> compilerDirectiveSentinels_; 235 }; 236 } 237 #endif // FORTRAN_PARSER_PRESCAN_H_ 238