1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "flang/Parser/message.h"
12 #include "llvm/Support/raw_ostream.h"
13
14 namespace Fortran::parser {
15
operator =(TokenSequence && that)16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
17 clear();
18 swap(that);
19 return *this;
20 }
21
clear()22 void TokenSequence::clear() {
23 start_.clear();
24 nextStart_ = 0;
25 char_.clear();
26 provenances_.clear();
27 }
28
pop_back()29 void TokenSequence::pop_back() {
30 CHECK(!start_.empty());
31 CHECK(nextStart_ > start_.back());
32 std::size_t bytes{nextStart_ - start_.back()};
33 nextStart_ = start_.back();
34 start_.pop_back();
35 char_.resize(nextStart_);
36 provenances_.RemoveLastBytes(bytes);
37 }
38
shrink_to_fit()39 void TokenSequence::shrink_to_fit() {
40 start_.shrink_to_fit();
41 char_.shrink_to_fit();
42 provenances_.shrink_to_fit();
43 }
44
swap(TokenSequence & that)45 void TokenSequence::swap(TokenSequence &that) {
46 start_.swap(that.start_);
47 std::swap(nextStart_, that.nextStart_);
48 char_.swap(that.char_);
49 provenances_.swap(that.provenances_);
50 }
51
SkipBlanks(std::size_t at) const52 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
53 std::size_t tokens{start_.size()};
54 for (; at < tokens; ++at) {
55 if (!TokenAt(at).IsBlank()) {
56 return at;
57 }
58 }
59 return tokens; // even if at > tokens
60 }
61
62 // C-style /*comments*/ are removed from preprocessing directive
63 // token sequences by the prescanner, but not C++ or Fortran
64 // free-form line-ending comments (//... and !...) because
65 // ignoring them is directive-specific.
IsAnythingLeft(std::size_t at) const66 bool TokenSequence::IsAnythingLeft(std::size_t at) const {
67 std::size_t tokens{start_.size()};
68 for (; at < tokens; ++at) {
69 auto tok{TokenAt(at)};
70 const char *end{tok.end()};
71 for (const char *p{tok.begin()}; p < end; ++p) {
72 switch (*p) {
73 case '/':
74 return p + 1 >= end || p[1] != '/';
75 case '!':
76 return false;
77 case ' ':
78 break;
79 default:
80 return true;
81 }
82 }
83 }
84 return false;
85 }
86
Put(const TokenSequence & that)87 void TokenSequence::Put(const TokenSequence &that) {
88 if (nextStart_ < char_.size()) {
89 start_.push_back(nextStart_);
90 }
91 int offset = char_.size();
92 for (int st : that.start_) {
93 start_.push_back(st + offset);
94 }
95 char_.insert(char_.end(), that.char_.begin(), that.char_.end());
96 nextStart_ = char_.size();
97 provenances_.Put(that.provenances_);
98 }
99
Put(const TokenSequence & that,ProvenanceRange range)100 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
101 std::size_t offset{0};
102 std::size_t tokens{that.SizeInTokens()};
103 for (std::size_t j{0}; j < tokens; ++j) {
104 CharBlock tok{that.TokenAt(j)};
105 Put(tok, range.OffsetMember(offset));
106 offset += tok.size();
107 }
108 CHECK(offset == range.size());
109 }
110
Put(const TokenSequence & that,std::size_t at,std::size_t tokens)111 void TokenSequence::Put(
112 const TokenSequence &that, std::size_t at, std::size_t tokens) {
113 ProvenanceRange provenance;
114 std::size_t offset{0};
115 for (; tokens-- > 0; ++at) {
116 CharBlock tok{that.TokenAt(at)};
117 std::size_t tokBytes{tok.size()};
118 for (std::size_t j{0}; j < tokBytes; ++j) {
119 if (offset == provenance.size()) {
120 provenance = that.provenances_.Map(that.start_[at] + j);
121 offset = 0;
122 }
123 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
124 }
125 CloseToken();
126 }
127 }
128
Put(const char * s,std::size_t bytes,Provenance provenance)129 void TokenSequence::Put(
130 const char *s, std::size_t bytes, Provenance provenance) {
131 for (std::size_t j{0}; j < bytes; ++j) {
132 PutNextTokenChar(s[j], provenance + j);
133 }
134 CloseToken();
135 }
136
Put(const CharBlock & t,Provenance provenance)137 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
138 Put(&t[0], t.size(), provenance);
139 }
140
Put(const std::string & s,Provenance provenance)141 void TokenSequence::Put(const std::string &s, Provenance provenance) {
142 Put(s.data(), s.size(), provenance);
143 }
144
Put(llvm::raw_string_ostream & ss,Provenance provenance)145 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
146 Put(ss.str(), provenance);
147 }
148
ToLowerCase()149 TokenSequence &TokenSequence::ToLowerCase() {
150 std::size_t tokens{start_.size()};
151 std::size_t chars{char_.size()};
152 std::size_t atToken{0};
153 for (std::size_t j{0}; j < chars;) {
154 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
155 char *p{&char_[j]};
156 char const *limit{char_.data() + nextStart};
157 j = nextStart;
158 if (IsDecimalDigit(*p)) {
159 while (p < limit && IsDecimalDigit(*p)) {
160 ++p;
161 }
162 if (p >= limit) {
163 } else if (*p == 'h' || *p == 'H') {
164 // Hollerith
165 *p = 'h';
166 } else if (*p == '_') {
167 // kind-prefixed character literal (e.g., 1_"ABC")
168 } else {
169 // exponent
170 for (; p < limit; ++p) {
171 *p = ToLowerCaseLetter(*p);
172 }
173 }
174 } else if (limit[-1] == '\'' || limit[-1] == '"') {
175 if (*p == limit[-1]) {
176 // Character literal without prefix
177 } else if (p[1] == limit[-1]) {
178 // BOZX-prefixed constant
179 for (; p < limit; ++p) {
180 *p = ToLowerCaseLetter(*p);
181 }
182 } else {
183 // Literal with kind-param prefix name (e.g., K_"ABC").
184 for (; *p != limit[-1]; ++p) {
185 *p = ToLowerCaseLetter(*p);
186 }
187 }
188 } else {
189 for (; p < limit; ++p) {
190 *p = ToLowerCaseLetter(*p);
191 }
192 }
193 }
194 return *this;
195 }
196
HasBlanks(std::size_t firstChar) const197 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
198 std::size_t tokens{SizeInTokens()};
199 for (std::size_t j{0}; j < tokens; ++j) {
200 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
201 return true;
202 }
203 }
204 return false;
205 }
206
HasRedundantBlanks(std::size_t firstChar) const207 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
208 std::size_t tokens{SizeInTokens()};
209 bool lastWasBlank{false};
210 for (std::size_t j{0}; j < tokens; ++j) {
211 bool isBlank{TokenAt(j).IsBlank()};
212 if (isBlank && lastWasBlank && start_[j] >= firstChar) {
213 return true;
214 }
215 lastWasBlank = isBlank;
216 }
217 return false;
218 }
219
RemoveBlanks(std::size_t firstChar)220 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
221 std::size_t tokens{SizeInTokens()};
222 TokenSequence result;
223 for (std::size_t j{0}; j < tokens; ++j) {
224 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
225 result.Put(*this, j);
226 }
227 }
228 swap(result);
229 return *this;
230 }
231
RemoveRedundantBlanks(std::size_t firstChar)232 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
233 std::size_t tokens{SizeInTokens()};
234 TokenSequence result;
235 bool lastWasBlank{false};
236 for (std::size_t j{0}; j < tokens; ++j) {
237 bool isBlank{TokenAt(j).IsBlank()};
238 if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
239 result.Put(*this, j);
240 }
241 lastWasBlank = isBlank;
242 }
243 swap(result);
244 return *this;
245 }
246
ClipComment(bool skipFirst)247 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
248 std::size_t tokens{SizeInTokens()};
249 for (std::size_t j{0}; j < tokens; ++j) {
250 if (TokenAt(j).FirstNonBlank() == '!') {
251 if (skipFirst) {
252 skipFirst = false;
253 } else {
254 TokenSequence result;
255 if (j > 0) {
256 result.Put(*this, 0, j - 1);
257 }
258 swap(result);
259 return *this;
260 }
261 }
262 }
263 return *this;
264 }
265
Emit(CookedSource & cooked) const266 void TokenSequence::Emit(CookedSource &cooked) const {
267 cooked.Put(&char_[0], char_.size());
268 cooked.PutProvenanceMappings(provenances_);
269 }
270
Dump(llvm::raw_ostream & o) const271 llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
272 o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
273 << nextStart_ << '\n';
274 for (std::size_t j{0}; j < start_.size(); ++j) {
275 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
276 << "'\n";
277 }
278 return o;
279 }
280
GetCharProvenance(std::size_t offset) const281 Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
282 ProvenanceRange range{provenances_.Map(offset)};
283 return range.start();
284 }
285
GetTokenProvenance(std::size_t token,std::size_t offset) const286 Provenance TokenSequence::GetTokenProvenance(
287 std::size_t token, std::size_t offset) const {
288 return GetCharProvenance(start_[token] + offset);
289 }
290
GetTokenProvenanceRange(std::size_t token,std::size_t offset) const291 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
292 std::size_t token, std::size_t offset) const {
293 ProvenanceRange range{provenances_.Map(start_[token] + offset)};
294 return range.Prefix(TokenBytes(token) - offset);
295 }
296
GetIntervalProvenanceRange(std::size_t token,std::size_t tokens) const297 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
298 std::size_t token, std::size_t tokens) const {
299 if (tokens == 0) {
300 return {};
301 }
302 ProvenanceRange range{provenances_.Map(start_[token])};
303 while (--tokens > 0 &&
304 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
305 }
306 return range;
307 }
308
GetProvenanceRange() const309 ProvenanceRange TokenSequence::GetProvenanceRange() const {
310 return GetIntervalProvenanceRange(0, start_.size());
311 }
312
CheckBadFortranCharacters(Messages & messages) const313 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
314 Messages &messages) const {
315 std::size_t tokens{SizeInTokens()};
316 for (std::size_t j{0}; j < tokens; ++j) {
317 CharBlock token{TokenAt(j)};
318 char ch{token.FirstNonBlank()};
319 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
320 if (ch == '!' && j == 0) {
321 // allow in !dir$
322 } else if (ch < ' ' || ch >= '\x7f') {
323 messages.Say(GetTokenProvenanceRange(j),
324 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
325 } else {
326 messages.Say(GetTokenProvenanceRange(j),
327 "bad character ('%c') in Fortran token"_err_en_US, ch);
328 }
329 }
330 }
331 return *this;
332 }
333
CheckBadParentheses(Messages & messages) const334 const TokenSequence &TokenSequence::CheckBadParentheses(
335 Messages &messages) const {
336 // First, a quick pass with no allocation for the common case
337 int nesting{0};
338 std::size_t tokens{SizeInTokens()};
339 for (std::size_t j{0}; j < tokens; ++j) {
340 CharBlock token{TokenAt(j)};
341 char ch{token.FirstNonBlank()};
342 if (ch == '(') {
343 ++nesting;
344 } else if (ch == ')') {
345 --nesting;
346 }
347 }
348 if (nesting != 0) {
349 // There's an error; diagnose it
350 std::vector<std::size_t> stack;
351 for (std::size_t j{0}; j < tokens; ++j) {
352 CharBlock token{TokenAt(j)};
353 char ch{token.FirstNonBlank()};
354 if (ch == '(') {
355 stack.push_back(j);
356 } else if (ch == ')') {
357 if (stack.empty()) {
358 messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
359 return *this;
360 }
361 stack.pop_back();
362 }
363 }
364 CHECK(!stack.empty());
365 messages.Say(
366 GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
367 }
368 return *this;
369 }
370 } // namespace Fortran::parser
371