1 // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef FORTRAN_PARSER_PROVENANCE_H_ 16 #define FORTRAN_PARSER_PROVENANCE_H_ 17 18 #include "char-block.h" 19 #include "char-buffer.h" 20 #include "characters.h" 21 #include "source.h" 22 #include "../common/idioms.h" 23 #include "../common/interval.h" 24 #include <cstddef> 25 #include <map> 26 #include <memory> 27 #include <optional> 28 #include <ostream> 29 #include <sstream> 30 #include <string> 31 #include <utility> 32 #include <variant> 33 #include <vector> 34 35 namespace Fortran::parser { 36 37 // Each character in the contiguous source stream built by the 38 // prescanner corresponds to a particular character in a source file, 39 // include file, macro expansion, or compiler-inserted padding. 40 // The location of this original character to which a parsable character 41 // corresponds is its provenance. 42 // 43 // Provenances are offsets into an (unmaterialized) marshaling of the 44 // entire contents of all the original source files, include files, macro 45 // expansions, &c. for each visit to each source. These origins of the 46 // original source characters constitute a forest whose roots are 47 // the original source files named on the compiler's command line. 48 // Given a Provenance, we can find the tree node that contains it in time 49 // O(log(# of origins)), and describe the position precisely by walking 50 // up the tree. (It would be possible, via a time/space trade-off, to 51 // cap the time by the use of an intermediate table that would be indexed 52 // by the upper bits of an offset, but that does not appear to be 53 // necessary.) 54 55 class AllSources; 56 57 class Provenance { 58 public: Provenance()59 Provenance() {} Provenance(std::size_t offset)60 Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); } 61 Provenance(const Provenance &that) = default; 62 Provenance(Provenance &&that) = default; 63 Provenance &operator=(const Provenance &that) = default; 64 Provenance &operator=(Provenance &&that) = default; 65 offset()66 std::size_t offset() const { return offset_; } 67 68 Provenance operator+(ptrdiff_t n) const { 69 CHECK(n > -static_cast<ptrdiff_t>(offset_)); 70 return {offset_ + static_cast<std::size_t>(n)}; 71 } 72 Provenance operator+(std::size_t n) const { return {offset_ + n}; } 73 std::size_t operator-(Provenance that) const { 74 CHECK(that <= *this); 75 return offset_ - that.offset_; 76 } 77 bool operator<(Provenance that) const { return offset_ < that.offset_; } 78 bool operator<=(Provenance that) const { return !(that < *this); } 79 bool operator==(Provenance that) const { return offset_ == that.offset_; } 80 bool operator!=(Provenance that) const { return !(*this == that); } 81 82 private: 83 std::size_t offset_{0}; 84 }; 85 86 using ProvenanceRange = common::Interval<Provenance>; 87 88 // Maps contiguous ranges of byte offsets in original source files to 89 // contiguous ranges in the cooked character stream; essentially a 90 // partial inversion of OffsetToProvenanceMappings (below). 91 // Used for implementing the first step of mapping an identifier 92 // selected in a code editor to one of its declarative statements. 93 class ProvenanceRangeToOffsetMappings { 94 public: 95 ProvenanceRangeToOffsetMappings(); 96 ~ProvenanceRangeToOffsetMappings(); empty()97 bool empty() const { return map_.empty(); } 98 void Put(ProvenanceRange, std::size_t offset); 99 std::optional<std::size_t> Map(ProvenanceRange) const; 100 std::ostream &Dump(std::ostream &) const; 101 102 private: 103 // A comparison function object for use in std::multimap<Compare=>. 104 // Intersecting intervals will effectively compare equal, not being 105 // either < nor >= each other. 106 struct WhollyPrecedes { 107 bool operator()(ProvenanceRange, ProvenanceRange) const; 108 }; 109 110 std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_; 111 }; 112 113 // Maps 0-based local offsets in some contiguous range (e.g., a token 114 // sequence) to their provenances. Lookup time is on the order of 115 // O(log(#of intervals with contiguous provenances)). As mentioned 116 // above, this time could be capped via a time/space trade-off. 117 class OffsetToProvenanceMappings { 118 public: OffsetToProvenanceMappings()119 OffsetToProvenanceMappings() {} 120 void clear(); 121 void swap(OffsetToProvenanceMappings &); 122 void shrink_to_fit(); 123 std::size_t SizeInBytes() const; 124 void Put(ProvenanceRange); 125 void Put(const OffsetToProvenanceMappings &); 126 ProvenanceRange Map(std::size_t at) const; 127 void RemoveLastBytes(std::size_t); 128 ProvenanceRangeToOffsetMappings Invert(const AllSources &) const; 129 std::ostream &Dump(std::ostream &) const; 130 131 private: 132 struct ContiguousProvenanceMapping { 133 std::size_t start; 134 ProvenanceRange range; 135 }; 136 137 // Elements appear in ascending order of distinct .start values; 138 // their .range values are disjoint and not necessarily adjacent. 139 std::vector<ContiguousProvenanceMapping> provenanceMap_; 140 }; 141 142 // A singleton AllSources instance for the whole compilation 143 // is shared by reference. 144 class AllSources { 145 public: 146 AllSources(); 147 ~AllSources(); 148 size()149 std::size_t size() const { return range_.size(); } 150 const char &operator[](Provenance) const; encoding()151 Encoding encoding() const { return encoding_; } set_encoding(Encoding e)152 AllSources &set_encoding(Encoding e) { 153 encoding_ = e; 154 return *this; 155 } 156 157 void PushSearchPathDirectory(std::string); 158 std::string PopSearchPathDirectory(); 159 const SourceFile *Open(std::string path, std::stringstream *error); 160 const SourceFile *ReadStandardInput(std::stringstream *error); 161 162 ProvenanceRange AddIncludedFile( 163 const SourceFile &, ProvenanceRange, bool isModule = false); 164 ProvenanceRange AddMacroCall( 165 ProvenanceRange def, ProvenanceRange use, const std::string &expansion); 166 ProvenanceRange AddCompilerInsertion(std::string); 167 IsValid(Provenance at)168 bool IsValid(Provenance at) const { return range_.Contains(at); } IsValid(ProvenanceRange range)169 bool IsValid(ProvenanceRange range) const { 170 return range.size() > 0 && range_.Contains(range); 171 } 172 void EmitMessage(std::ostream &, const std::optional<ProvenanceRange> &, 173 const std::string &message, bool echoSourceLine = false) const; 174 const SourceFile *GetSourceFile( 175 Provenance, std::size_t *offset = nullptr) const; 176 std::optional<SourcePosition> GetSourcePosition(Provenance) const; 177 std::optional<ProvenanceRange> GetFirstFileProvenance() const; 178 std::string GetPath(Provenance) const; // __FILE__ 179 int GetLineNumber(Provenance) const; // __LINE__ 180 Provenance CompilerInsertionProvenance(char ch); 181 Provenance CompilerInsertionProvenance(const char *, std::size_t); 182 ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const; 183 std::ostream &Dump(std::ostream &) const; 184 185 private: 186 struct Inclusion { 187 const SourceFile &source; 188 bool isModule{false}; 189 }; 190 struct Macro { 191 ProvenanceRange definition; 192 std::string expansion; 193 }; 194 struct CompilerInsertion { 195 std::string text; 196 }; 197 198 struct Origin { 199 Origin(ProvenanceRange, const SourceFile &); 200 Origin(ProvenanceRange, const SourceFile &, ProvenanceRange, 201 bool isModule = false); 202 Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use, 203 const std::string &expansion); 204 Origin(ProvenanceRange, const std::string &); 205 206 const char &operator[](std::size_t) const; 207 208 std::variant<Inclusion, Macro, CompilerInsertion> u; 209 ProvenanceRange covers, replaces; 210 }; 211 212 const Origin &MapToOrigin(Provenance) const; 213 214 // Elements are in ascending & contiguous order of .covers. 215 std::vector<Origin> origin_; 216 ProvenanceRange range_; 217 std::map<char, Provenance> compilerInsertionProvenance_; 218 std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_; 219 std::vector<std::string> searchPath_; 220 Encoding encoding_{Encoding::UTF_8}; 221 }; 222 223 class CookedSource { 224 public: 225 explicit CookedSource(AllSources &); 226 ~CookedSource(); 227 allSources()228 AllSources &allSources() { return allSources_; } allSources()229 const AllSources &allSources() const { return allSources_; } data()230 const std::string &data() const { return data_; } 231 IsValid(const char * p)232 bool IsValid(const char *p) const { 233 return p >= &data_.front() && p <= &data_.back() + 1; 234 } IsValid(CharBlock range)235 bool IsValid(CharBlock range) const { 236 return !range.empty() && IsValid(range.begin()) && IsValid(range.end() - 1); 237 } IsValid(ProvenanceRange r)238 bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } 239 240 std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const; 241 std::optional<CharBlock> GetCharBlockFromLineAndColumns( 242 int line, int startColumn, int endColumn) const; 243 std::optional<std::pair<SourcePosition, SourcePosition>> 244 GetSourcePositionRange(CharBlock) const; 245 std::optional<CharBlock> GetCharBlock(ProvenanceRange) const; 246 247 // The result of a Put() is the offset that the new data 248 // will have in the eventually marshaled contiguous buffer. Put(const char * data,std::size_t bytes)249 std::size_t Put(const char *data, std::size_t bytes) { 250 return buffer_.Put(data, bytes); 251 } Put(const std::string & s)252 std::size_t Put(const std::string &s) { return buffer_.Put(s); } Put(char ch)253 std::size_t Put(char ch) { return buffer_.Put(&ch, 1); } Put(char ch,Provenance p)254 std::size_t Put(char ch, Provenance p) { 255 provenanceMap_.Put(ProvenanceRange{p, 1}); 256 return buffer_.Put(&ch, 1); 257 } 258 PutProvenance(Provenance p)259 void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); } PutProvenance(ProvenanceRange pr)260 void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); } PutProvenanceMappings(const OffsetToProvenanceMappings & pm)261 void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) { 262 provenanceMap_.Put(pm); 263 } 264 265 void Marshal(); // marshals text into one contiguous block 266 void CompileProvenanceRangeToOffsetMappings(); AcquireData()267 std::string AcquireData() { return std::move(data_); } 268 std::ostream &Dump(std::ostream &) const; 269 270 private: 271 AllSources &allSources_; 272 CharBuffer buffer_; // before Marshal() 273 std::string data_; // all of it, prescanned and preprocessed 274 OffsetToProvenanceMappings provenanceMap_; 275 ProvenanceRangeToOffsetMappings invertedMap_; 276 }; 277 } 278 #endif // FORTRAN_PARSER_PROVENANCE_H_ 279