1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "flang/Parser/message.h"
12 #include "llvm/Support/raw_ostream.h"
13 
14 namespace Fortran::parser {
15 
operator =(TokenSequence && that)16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
17   clear();
18   swap(that);
19   return *this;
20 }
21 
clear()22 void TokenSequence::clear() {
23   start_.clear();
24   nextStart_ = 0;
25   char_.clear();
26   provenances_.clear();
27 }
28 
pop_back()29 void TokenSequence::pop_back() {
30   CHECK(!start_.empty());
31   CHECK(nextStart_ > start_.back());
32   std::size_t bytes{nextStart_ - start_.back()};
33   nextStart_ = start_.back();
34   start_.pop_back();
35   char_.resize(nextStart_);
36   provenances_.RemoveLastBytes(bytes);
37 }
38 
shrink_to_fit()39 void TokenSequence::shrink_to_fit() {
40   start_.shrink_to_fit();
41   char_.shrink_to_fit();
42   provenances_.shrink_to_fit();
43 }
44 
swap(TokenSequence & that)45 void TokenSequence::swap(TokenSequence &that) {
46   start_.swap(that.start_);
47   std::swap(nextStart_, that.nextStart_);
48   char_.swap(that.char_);
49   provenances_.swap(that.provenances_);
50 }
51 
SkipBlanks(std::size_t at) const52 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
53   std::size_t tokens{start_.size()};
54   for (; at < tokens; ++at) {
55     if (!TokenAt(at).IsBlank()) {
56       return at;
57     }
58   }
59   return tokens; // even if at > tokens
60 }
61 
62 // C-style /*comments*/ are removed from preprocessing directive
63 // token sequences by the prescanner, but not C++ or Fortran
64 // free-form line-ending comments (//...  and !...) because
65 // ignoring them is directive-specific.
IsAnythingLeft(std::size_t at) const66 bool TokenSequence::IsAnythingLeft(std::size_t at) const {
67   std::size_t tokens{start_.size()};
68   for (; at < tokens; ++at) {
69     auto tok{TokenAt(at)};
70     const char *end{tok.end()};
71     for (const char *p{tok.begin()}; p < end; ++p) {
72       switch (*p) {
73       case '/':
74         return p + 1 >= end || p[1] != '/';
75       case '!':
76         return false;
77       case ' ':
78         break;
79       default:
80         return true;
81       }
82     }
83   }
84   return false;
85 }
86 
Put(const TokenSequence & that)87 void TokenSequence::Put(const TokenSequence &that) {
88   if (nextStart_ < char_.size()) {
89     start_.push_back(nextStart_);
90   }
91   int offset = char_.size();
92   for (int st : that.start_) {
93     start_.push_back(st + offset);
94   }
95   char_.insert(char_.end(), that.char_.begin(), that.char_.end());
96   nextStart_ = char_.size();
97   provenances_.Put(that.provenances_);
98 }
99 
Put(const TokenSequence & that,ProvenanceRange range)100 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
101   std::size_t offset{0};
102   std::size_t tokens{that.SizeInTokens()};
103   for (std::size_t j{0}; j < tokens; ++j) {
104     CharBlock tok{that.TokenAt(j)};
105     Put(tok, range.OffsetMember(offset));
106     offset += tok.size();
107   }
108   CHECK(offset == range.size());
109 }
110 
Put(const TokenSequence & that,std::size_t at,std::size_t tokens)111 void TokenSequence::Put(
112     const TokenSequence &that, std::size_t at, std::size_t tokens) {
113   ProvenanceRange provenance;
114   std::size_t offset{0};
115   for (; tokens-- > 0; ++at) {
116     CharBlock tok{that.TokenAt(at)};
117     std::size_t tokBytes{tok.size()};
118     for (std::size_t j{0}; j < tokBytes; ++j) {
119       if (offset == provenance.size()) {
120         provenance = that.provenances_.Map(that.start_[at] + j);
121         offset = 0;
122       }
123       PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
124     }
125     CloseToken();
126   }
127 }
128 
Put(const char * s,std::size_t bytes,Provenance provenance)129 void TokenSequence::Put(
130     const char *s, std::size_t bytes, Provenance provenance) {
131   for (std::size_t j{0}; j < bytes; ++j) {
132     PutNextTokenChar(s[j], provenance + j);
133   }
134   CloseToken();
135 }
136 
Put(const CharBlock & t,Provenance provenance)137 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
138   Put(&t[0], t.size(), provenance);
139 }
140 
Put(const std::string & s,Provenance provenance)141 void TokenSequence::Put(const std::string &s, Provenance provenance) {
142   Put(s.data(), s.size(), provenance);
143 }
144 
Put(llvm::raw_string_ostream & ss,Provenance provenance)145 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
146   Put(ss.str(), provenance);
147 }
148 
ToLowerCase()149 TokenSequence &TokenSequence::ToLowerCase() {
150   std::size_t tokens{start_.size()};
151   std::size_t chars{char_.size()};
152   std::size_t atToken{0};
153   for (std::size_t j{0}; j < chars;) {
154     std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
155     char *p{&char_[j]};
156     char const *limit{char_.data() + nextStart};
157     j = nextStart;
158     if (IsDecimalDigit(*p)) {
159       while (p < limit && IsDecimalDigit(*p)) {
160         ++p;
161       }
162       if (p >= limit) {
163       } else if (*p == 'h' || *p == 'H') {
164         // Hollerith
165         *p = 'h';
166       } else if (*p == '_') {
167         // kind-prefixed character literal (e.g., 1_"ABC")
168       } else {
169         // exponent
170         for (; p < limit; ++p) {
171           *p = ToLowerCaseLetter(*p);
172         }
173       }
174     } else if (limit[-1] == '\'' || limit[-1] == '"') {
175       if (*p == limit[-1]) {
176         // Character literal without prefix
177       } else if (p[1] == limit[-1]) {
178         // BOZX-prefixed constant
179         for (; p < limit; ++p) {
180           *p = ToLowerCaseLetter(*p);
181         }
182       } else {
183         // Literal with kind-param prefix name (e.g., K_"ABC").
184         for (; *p != limit[-1]; ++p) {
185           *p = ToLowerCaseLetter(*p);
186         }
187       }
188     } else {
189       for (; p < limit; ++p) {
190         *p = ToLowerCaseLetter(*p);
191       }
192     }
193   }
194   return *this;
195 }
196 
HasBlanks(std::size_t firstChar) const197 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
198   std::size_t tokens{SizeInTokens()};
199   for (std::size_t j{0}; j < tokens; ++j) {
200     if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
201       return true;
202     }
203   }
204   return false;
205 }
206 
HasRedundantBlanks(std::size_t firstChar) const207 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
208   std::size_t tokens{SizeInTokens()};
209   bool lastWasBlank{false};
210   for (std::size_t j{0}; j < tokens; ++j) {
211     bool isBlank{TokenAt(j).IsBlank()};
212     if (isBlank && lastWasBlank && start_[j] >= firstChar) {
213       return true;
214     }
215     lastWasBlank = isBlank;
216   }
217   return false;
218 }
219 
RemoveBlanks(std::size_t firstChar)220 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
221   std::size_t tokens{SizeInTokens()};
222   TokenSequence result;
223   for (std::size_t j{0}; j < tokens; ++j) {
224     if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
225       result.Put(*this, j);
226     }
227   }
228   swap(result);
229   return *this;
230 }
231 
RemoveRedundantBlanks(std::size_t firstChar)232 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
233   std::size_t tokens{SizeInTokens()};
234   TokenSequence result;
235   bool lastWasBlank{false};
236   for (std::size_t j{0}; j < tokens; ++j) {
237     bool isBlank{TokenAt(j).IsBlank()};
238     if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
239       result.Put(*this, j);
240     }
241     lastWasBlank = isBlank;
242   }
243   swap(result);
244   return *this;
245 }
246 
ClipComment(bool skipFirst)247 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
248   std::size_t tokens{SizeInTokens()};
249   for (std::size_t j{0}; j < tokens; ++j) {
250     if (TokenAt(j).FirstNonBlank() == '!') {
251       if (skipFirst) {
252         skipFirst = false;
253       } else {
254         TokenSequence result;
255         if (j > 0) {
256           result.Put(*this, 0, j - 1);
257         }
258         swap(result);
259         return *this;
260       }
261     }
262   }
263   return *this;
264 }
265 
Emit(CookedSource & cooked) const266 void TokenSequence::Emit(CookedSource &cooked) const {
267   cooked.Put(&char_[0], char_.size());
268   cooked.PutProvenanceMappings(provenances_);
269 }
270 
Dump(llvm::raw_ostream & o) const271 llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
272   o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
273     << nextStart_ << '\n';
274   for (std::size_t j{0}; j < start_.size(); ++j) {
275     o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
276       << "'\n";
277   }
278   return o;
279 }
280 
GetCharProvenance(std::size_t offset) const281 Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
282   ProvenanceRange range{provenances_.Map(offset)};
283   return range.start();
284 }
285 
GetTokenProvenance(std::size_t token,std::size_t offset) const286 Provenance TokenSequence::GetTokenProvenance(
287     std::size_t token, std::size_t offset) const {
288   return GetCharProvenance(start_[token] + offset);
289 }
290 
GetTokenProvenanceRange(std::size_t token,std::size_t offset) const291 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
292     std::size_t token, std::size_t offset) const {
293   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
294   return range.Prefix(TokenBytes(token) - offset);
295 }
296 
GetIntervalProvenanceRange(std::size_t token,std::size_t tokens) const297 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
298     std::size_t token, std::size_t tokens) const {
299   if (tokens == 0) {
300     return {};
301   }
302   ProvenanceRange range{provenances_.Map(start_[token])};
303   while (--tokens > 0 &&
304       range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
305   }
306   return range;
307 }
308 
GetProvenanceRange() const309 ProvenanceRange TokenSequence::GetProvenanceRange() const {
310   return GetIntervalProvenanceRange(0, start_.size());
311 }
312 
CheckBadFortranCharacters(Messages & messages) const313 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
314     Messages &messages) const {
315   std::size_t tokens{SizeInTokens()};
316   for (std::size_t j{0}; j < tokens; ++j) {
317     CharBlock token{TokenAt(j)};
318     char ch{token.FirstNonBlank()};
319     if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
320       if (ch == '!' && j == 0) {
321         // allow in !dir$
322       } else if (ch < ' ' || ch >= '\x7f') {
323         messages.Say(GetTokenProvenanceRange(j),
324             "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
325       } else {
326         messages.Say(GetTokenProvenanceRange(j),
327             "bad character ('%c') in Fortran token"_err_en_US, ch);
328       }
329     }
330   }
331   return *this;
332 }
333 
CheckBadParentheses(Messages & messages) const334 const TokenSequence &TokenSequence::CheckBadParentheses(
335     Messages &messages) const {
336   // First, a quick pass with no allocation for the common case
337   int nesting{0};
338   std::size_t tokens{SizeInTokens()};
339   for (std::size_t j{0}; j < tokens; ++j) {
340     CharBlock token{TokenAt(j)};
341     char ch{token.FirstNonBlank()};
342     if (ch == '(') {
343       ++nesting;
344     } else if (ch == ')') {
345       --nesting;
346     }
347   }
348   if (nesting != 0) {
349     // There's an error; diagnose it
350     std::vector<std::size_t> stack;
351     for (std::size_t j{0}; j < tokens; ++j) {
352       CharBlock token{TokenAt(j)};
353       char ch{token.FirstNonBlank()};
354       if (ch == '(') {
355         stack.push_back(j);
356       } else if (ch == ')') {
357         if (stack.empty()) {
358           messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
359           return *this;
360         }
361         stack.pop_back();
362       }
363     }
364     CHECK(!stack.empty());
365     messages.Say(
366         GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
367   }
368   return *this;
369 }
370 } // namespace Fortran::parser
371