1 // Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef FORTRAN_PARSER_PROVENANCE_H_
16 #define FORTRAN_PARSER_PROVENANCE_H_
17 
18 #include "char-block.h"
19 #include "char-buffer.h"
20 #include "characters.h"
21 #include "source.h"
22 #include "../common/idioms.h"
23 #include "../common/interval.h"
24 #include <cstddef>
25 #include <map>
26 #include <memory>
27 #include <optional>
28 #include <ostream>
29 #include <sstream>
30 #include <string>
31 #include <utility>
32 #include <variant>
33 #include <vector>
34 
35 namespace Fortran::parser {
36 
37 // Each character in the contiguous source stream built by the
38 // prescanner corresponds to a particular character in a source file,
39 // include file, macro expansion, or compiler-inserted padding.
40 // The location of this original character to which a parsable character
41 // corresponds is its provenance.
42 //
43 // Provenances are offsets into an (unmaterialized) marshaling of the
44 // entire contents of all the original source files, include files, macro
45 // expansions, &c. for each visit to each source.  These origins of the
46 // original source characters constitute a forest whose roots are
47 // the original source files named on the compiler's command line.
48 // Given a Provenance, we can find the tree node that contains it in time
49 // O(log(# of origins)), and describe the position precisely by walking
50 // up the tree.  (It would be possible, via a time/space trade-off, to
51 // cap the time by the use of an intermediate table that would be indexed
52 // by the upper bits of an offset, but that does not appear to be
53 // necessary.)
54 
55 class AllSources;
56 
57 class Provenance {
58 public:
Provenance()59   Provenance() {}
Provenance(std::size_t offset)60   Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); }
61   Provenance(const Provenance &that) = default;
62   Provenance(Provenance &&that) = default;
63   Provenance &operator=(const Provenance &that) = default;
64   Provenance &operator=(Provenance &&that) = default;
65 
offset()66   std::size_t offset() const { return offset_; }
67 
68   Provenance operator+(ptrdiff_t n) const {
69     CHECK(n > -static_cast<ptrdiff_t>(offset_));
70     return {offset_ + static_cast<std::size_t>(n)};
71   }
72   Provenance operator+(std::size_t n) const { return {offset_ + n}; }
73   std::size_t operator-(Provenance that) const {
74     CHECK(that <= *this);
75     return offset_ - that.offset_;
76   }
77   bool operator<(Provenance that) const { return offset_ < that.offset_; }
78   bool operator<=(Provenance that) const { return !(that < *this); }
79   bool operator==(Provenance that) const { return offset_ == that.offset_; }
80   bool operator!=(Provenance that) const { return !(*this == that); }
81 
82 private:
83   std::size_t offset_{0};
84 };
85 
86 using ProvenanceRange = common::Interval<Provenance>;
87 
88 // Maps contiguous ranges of byte offsets in original source files to
89 // contiguous ranges in the cooked character stream; essentially a
90 // partial inversion of OffsetToProvenanceMappings (below).
91 // Used for implementing the first step of mapping an identifier
92 // selected in a code editor to one of its declarative statements.
93 class ProvenanceRangeToOffsetMappings {
94 public:
95   ProvenanceRangeToOffsetMappings();
96   ~ProvenanceRangeToOffsetMappings();
empty()97   bool empty() const { return map_.empty(); }
98   void Put(ProvenanceRange, std::size_t offset);
99   std::optional<std::size_t> Map(ProvenanceRange) const;
100   std::ostream &Dump(std::ostream &) const;
101 
102 private:
103   // A comparison function object for use in std::multimap<Compare=>.
104   // Intersecting intervals will effectively compare equal, not being
105   // either < nor >= each other.
106   struct WhollyPrecedes {
107     bool operator()(ProvenanceRange, ProvenanceRange) const;
108   };
109 
110   std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_;
111 };
112 
113 // Maps 0-based local offsets in some contiguous range (e.g., a token
114 // sequence) to their provenances.  Lookup time is on the order of
115 // O(log(#of intervals with contiguous provenances)).  As mentioned
116 // above, this time could be capped via a time/space trade-off.
117 class OffsetToProvenanceMappings {
118 public:
OffsetToProvenanceMappings()119   OffsetToProvenanceMappings() {}
120   void clear();
121   void swap(OffsetToProvenanceMappings &);
122   void shrink_to_fit();
123   std::size_t SizeInBytes() const;
124   void Put(ProvenanceRange);
125   void Put(const OffsetToProvenanceMappings &);
126   ProvenanceRange Map(std::size_t at) const;
127   void RemoveLastBytes(std::size_t);
128   ProvenanceRangeToOffsetMappings Invert(const AllSources &) const;
129   std::ostream &Dump(std::ostream &) const;
130 
131 private:
132   struct ContiguousProvenanceMapping {
133     std::size_t start;
134     ProvenanceRange range;
135   };
136 
137   // Elements appear in ascending order of distinct .start values;
138   // their .range values are disjoint and not necessarily adjacent.
139   std::vector<ContiguousProvenanceMapping> provenanceMap_;
140 };
141 
142 // A singleton AllSources instance for the whole compilation
143 // is shared by reference.
144 class AllSources {
145 public:
146   AllSources();
147   ~AllSources();
148 
size()149   std::size_t size() const { return range_.size(); }
150   const char &operator[](Provenance) const;
encoding()151   Encoding encoding() const { return encoding_; }
set_encoding(Encoding e)152   AllSources &set_encoding(Encoding e) {
153     encoding_ = e;
154     return *this;
155   }
156 
157   void PushSearchPathDirectory(std::string);
158   std::string PopSearchPathDirectory();
159   const SourceFile *Open(std::string path, std::stringstream *error);
160   const SourceFile *ReadStandardInput(std::stringstream *error);
161 
162   ProvenanceRange AddIncludedFile(
163       const SourceFile &, ProvenanceRange, bool isModule = false);
164   ProvenanceRange AddMacroCall(
165       ProvenanceRange def, ProvenanceRange use, const std::string &expansion);
166   ProvenanceRange AddCompilerInsertion(std::string);
167 
IsValid(Provenance at)168   bool IsValid(Provenance at) const { return range_.Contains(at); }
IsValid(ProvenanceRange range)169   bool IsValid(ProvenanceRange range) const {
170     return range.size() > 0 && range_.Contains(range);
171   }
172   void EmitMessage(std::ostream &, const std::optional<ProvenanceRange> &,
173       const std::string &message, bool echoSourceLine = false) const;
174   const SourceFile *GetSourceFile(
175       Provenance, std::size_t *offset = nullptr) const;
176   std::optional<SourcePosition> GetSourcePosition(Provenance) const;
177   std::optional<ProvenanceRange> GetFirstFileProvenance() const;
178   std::string GetPath(Provenance) const;  // __FILE__
179   int GetLineNumber(Provenance) const;  // __LINE__
180   Provenance CompilerInsertionProvenance(char ch);
181   Provenance CompilerInsertionProvenance(const char *, std::size_t);
182   ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const;
183   std::ostream &Dump(std::ostream &) const;
184 
185 private:
186   struct Inclusion {
187     const SourceFile &source;
188     bool isModule{false};
189   };
190   struct Macro {
191     ProvenanceRange definition;
192     std::string expansion;
193   };
194   struct CompilerInsertion {
195     std::string text;
196   };
197 
198   struct Origin {
199     Origin(ProvenanceRange, const SourceFile &);
200     Origin(ProvenanceRange, const SourceFile &, ProvenanceRange,
201         bool isModule = false);
202     Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use,
203         const std::string &expansion);
204     Origin(ProvenanceRange, const std::string &);
205 
206     const char &operator[](std::size_t) const;
207 
208     std::variant<Inclusion, Macro, CompilerInsertion> u;
209     ProvenanceRange covers, replaces;
210   };
211 
212   const Origin &MapToOrigin(Provenance) const;
213 
214   // Elements are in ascending & contiguous order of .covers.
215   std::vector<Origin> origin_;
216   ProvenanceRange range_;
217   std::map<char, Provenance> compilerInsertionProvenance_;
218   std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_;
219   std::vector<std::string> searchPath_;
220   Encoding encoding_{Encoding::UTF_8};
221 };
222 
223 class CookedSource {
224 public:
225   explicit CookedSource(AllSources &);
226   ~CookedSource();
227 
allSources()228   AllSources &allSources() { return allSources_; }
allSources()229   const AllSources &allSources() const { return allSources_; }
data()230   const std::string &data() const { return data_; }
231 
IsValid(const char * p)232   bool IsValid(const char *p) const {
233     return p >= &data_.front() && p <= &data_.back() + 1;
234   }
IsValid(CharBlock range)235   bool IsValid(CharBlock range) const {
236     return !range.empty() && IsValid(range.begin()) && IsValid(range.end() - 1);
237   }
IsValid(ProvenanceRange r)238   bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); }
239 
240   std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
241   std::optional<CharBlock> GetCharBlockFromLineAndColumns(
242       int line, int startColumn, int endColumn) const;
243   std::optional<std::pair<SourcePosition, SourcePosition>>
244       GetSourcePositionRange(CharBlock) const;
245   std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;
246 
247   // The result of a Put() is the offset that the new data
248   // will have in the eventually marshaled contiguous buffer.
Put(const char * data,std::size_t bytes)249   std::size_t Put(const char *data, std::size_t bytes) {
250     return buffer_.Put(data, bytes);
251   }
Put(const std::string & s)252   std::size_t Put(const std::string &s) { return buffer_.Put(s); }
Put(char ch)253   std::size_t Put(char ch) { return buffer_.Put(&ch, 1); }
Put(char ch,Provenance p)254   std::size_t Put(char ch, Provenance p) {
255     provenanceMap_.Put(ProvenanceRange{p, 1});
256     return buffer_.Put(&ch, 1);
257   }
258 
PutProvenance(Provenance p)259   void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); }
PutProvenance(ProvenanceRange pr)260   void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); }
PutProvenanceMappings(const OffsetToProvenanceMappings & pm)261   void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) {
262     provenanceMap_.Put(pm);
263   }
264 
265   void Marshal();  // marshals text into one contiguous block
266   void CompileProvenanceRangeToOffsetMappings();
AcquireData()267   std::string AcquireData() { return std::move(data_); }
268   std::ostream &Dump(std::ostream &) const;
269 
270 private:
271   AllSources &allSources_;
272   CharBuffer buffer_;  // before Marshal()
273   std::string data_;  // all of it, prescanned and preprocessed
274   OffsetToProvenanceMappings provenanceMap_;
275   ProvenanceRangeToOffsetMappings invertedMap_;
276 };
277 }
278 #endif  // FORTRAN_PARSER_PROVENANCE_H_
279