1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Implements out-of-line member functions of template class FormatControl
10 
11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13 
14 #include "format.h"
15 #include "io-stmt.h"
16 #include "main.h"
17 #include "flang/Common/format.h"
18 #include "flang/Decimal/decimal.h"
19 #include <algorithm>
20 #include <limits>
21 
22 namespace Fortran::runtime::io {
23 
24 template <typename CONTEXT>
FormatControl(const Terminator & terminator,const CharType * format,std::size_t formatLength,int maxHeight)25 FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
26     const CharType *format, std::size_t formatLength, int maxHeight)
27     : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
28       formatLength_{static_cast<int>(formatLength)} {
29   RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
30   RUNTIME_CHECK(
31       terminator, formatLength == static_cast<std::size_t>(formatLength_));
32   stack_[0].start = offset_;
33   stack_[0].remaining = Iteration::unlimited; // 13.4(8)
34 }
35 
36 template <typename CONTEXT>
GetMaxParenthesisNesting(IoErrorHandler & handler,const CharType * format,std::size_t formatLength)37 int FormatControl<CONTEXT>::GetMaxParenthesisNesting(
38     IoErrorHandler &handler, const CharType *format, std::size_t formatLength) {
39   int maxNesting{0};
40   int nesting{0};
41   const CharType *end{format + formatLength};
42   std::optional<CharType> quote;
43   int repeat{0};
44   for (const CharType *p{format}; p < end; ++p) {
45     if (quote) {
46       if (*p == *quote) {
47         quote.reset();
48       }
49     } else if (*p >= '0' && *p <= '9') {
50       repeat = 10 * repeat + *p - '0';
51     } else if (*p != ' ') {
52       switch (*p) {
53       case '\'':
54       case '"':
55         quote = *p;
56         break;
57       case 'h':
58       case 'H': // 9HHOLLERITH
59         p += repeat;
60         if (p >= end) {
61           handler.SignalError(IostatErrorInFormat,
62               "Hollerith (%dH) too long in FORMAT", repeat);
63           return maxNesting;
64         }
65         break;
66       case ' ':
67         break;
68       case '(':
69         ++nesting;
70         maxNesting = std::max(nesting, maxNesting);
71         break;
72       case ')':
73         nesting = std::max(nesting - 1, 0);
74         break;
75       }
76       repeat = 0;
77     }
78   }
79   if (quote) {
80     handler.SignalError(
81         IostatErrorInFormat, "Unbalanced quotation marks in FORMAT string");
82   } else if (nesting) {
83     handler.SignalError(
84         IostatErrorInFormat, "Unbalanced parentheses in FORMAT string");
85   }
86   return maxNesting;
87 }
88 
89 template <typename CONTEXT>
GetIntField(IoErrorHandler & handler,CharType firstCh)90 int FormatControl<CONTEXT>::GetIntField(
91     IoErrorHandler &handler, CharType firstCh) {
92   CharType ch{firstCh ? firstCh : PeekNext()};
93   if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) {
94     handler.SignalError(IostatErrorInFormat,
95         "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
96     return 0;
97   }
98   int result{0};
99   bool negate{ch == '-'};
100   if (negate || ch == '+') {
101     firstCh = '\0';
102     ch = PeekNext();
103   }
104   while (ch >= '0' && ch <= '9') {
105     if (result >
106         std::numeric_limits<int>::max() / 10 - (static_cast<int>(ch) - '0')) {
107       handler.SignalError(
108           IostatErrorInFormat, "FORMAT integer field out of range");
109       return result;
110     }
111     result = 10 * result + ch - '0';
112     if (firstCh) {
113       firstCh = '\0';
114     } else {
115       ++offset_;
116     }
117     ch = PeekNext();
118   }
119   if (negate && (result *= -1) > 0) {
120     handler.SignalError(
121         IostatErrorInFormat, "FORMAT integer field out of range");
122   }
123   return result;
124 }
125 
126 template <typename CONTEXT>
HandleControl(CONTEXT & context,char ch,char next,int n)127 static void HandleControl(CONTEXT &context, char ch, char next, int n) {
128   MutableModes &modes{context.mutableModes()};
129   switch (ch) {
130   case 'B':
131     if (next == 'Z') {
132       modes.editingFlags |= blankZero;
133       return;
134     }
135     if (next == 'N') {
136       modes.editingFlags &= ~blankZero;
137       return;
138     }
139     break;
140   case 'D':
141     if (next == 'C') {
142       modes.editingFlags |= decimalComma;
143       return;
144     }
145     if (next == 'P') {
146       modes.editingFlags &= ~decimalComma;
147       return;
148     }
149     break;
150   case 'P':
151     if (!next) {
152       modes.scale = n; // kP - decimal scaling by 10**k
153       return;
154     }
155     break;
156   case 'R':
157     switch (next) {
158     case 'N':
159       modes.round = decimal::RoundNearest;
160       return;
161     case 'Z':
162       modes.round = decimal::RoundToZero;
163       return;
164     case 'U':
165       modes.round = decimal::RoundUp;
166       return;
167     case 'D':
168       modes.round = decimal::RoundDown;
169       return;
170     case 'C':
171       modes.round = decimal::RoundCompatible;
172       return;
173     case 'P':
174       modes.round = executionEnvironment.defaultOutputRoundingMode;
175       return;
176     default:
177       break;
178     }
179     break;
180   case 'X':
181     if (!next) {
182       context.HandleRelativePosition(n);
183       return;
184     }
185     break;
186   case 'S':
187     if (next == 'P') {
188       modes.editingFlags |= signPlus;
189       return;
190     }
191     if (!next || next == 'S') {
192       modes.editingFlags &= ~signPlus;
193       return;
194     }
195     break;
196   case 'T': {
197     if (!next) { // Tn
198       context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based
199       return;
200     }
201     if (next == 'L' || next == 'R') { // TLn & TRn
202       context.HandleRelativePosition(next == 'L' ? -n : n);
203       return;
204     }
205   } break;
206   default:
207     break;
208   }
209   if (next) {
210     context.SignalError(IostatErrorInFormat,
211         "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
212   } else {
213     context.SignalError(
214         IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
215   }
216 }
217 
218 // Locates the next data edit descriptor in the format.
219 // Handles all repetition counts and control edit descriptors.
220 // Generally assumes that the format string has survived the common
221 // format validator gauntlet.
222 template <typename CONTEXT>
CueUpNextDataEdit(Context & context,bool stop)223 int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
224   int unlimitedLoopCheck{-1};
225   while (true) {
226     std::optional<int> repeat;
227     bool unlimited{false};
228     auto maybeReversionPoint{offset_};
229     CharType ch{GetNextChar(context)};
230     while (ch == ',' || ch == ':') {
231       // Skip commas, and don't complain if they're missing; the format
232       // validator does that.
233       if (stop && ch == ':') {
234         return 0;
235       }
236       ch = GetNextChar(context);
237     }
238     if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
239       repeat = GetIntField(context, ch);
240       ch = GetNextChar(context);
241     } else if (ch == '*') {
242       unlimited = true;
243       ch = GetNextChar(context);
244       if (ch != '(') {
245         context.SignalError(IostatErrorInFormat,
246             "Invalid FORMAT: '*' may appear only before '('");
247         return 0;
248       }
249     }
250     ch = Capitalize(ch);
251     if (ch == '(') {
252       if (height_ >= maxHeight_) {
253         context.SignalError(IostatErrorInFormat,
254             "FORMAT stack overflow: too many nested parentheses");
255         return 0;
256       }
257       stack_[height_].start = offset_ - 1; // the '('
258       RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
259       if (unlimited || height_ == 0) {
260         stack_[height_].remaining = Iteration::unlimited;
261         unlimitedLoopCheck = offset_ - 1;
262       } else if (repeat) {
263         if (*repeat <= 0) {
264           *repeat = 1; // error recovery
265         }
266         stack_[height_].remaining = *repeat - 1;
267       } else {
268         stack_[height_].remaining = 0;
269       }
270       if (height_ == 1) {
271         // Subtle point (F'2018 13.4 para 9): tha last parenthesized group
272         // at height 1 becomes the restart point after control reaches the
273         // end of the format, including its repeat count.
274         stack_[0].start = maybeReversionPoint - 1;
275       }
276       ++height_;
277     } else if (height_ == 0) {
278       context.SignalError(IostatErrorInFormat, "FORMAT lacks initial '('");
279       return 0;
280     } else if (ch == ')') {
281       if (height_ == 1) {
282         if (stop) {
283           return 0; // end of FORMAT and no data items remain
284         }
285         context.AdvanceRecord(); // implied / before rightmost )
286       }
287       auto restart{stack_[height_ - 1].start + 1};
288       if (stack_[height_ - 1].remaining == Iteration::unlimited) {
289         offset_ = restart;
290         if (offset_ == unlimitedLoopCheck) {
291           context.SignalError(IostatErrorInFormat,
292               "Unlimited repetition in FORMAT lacks data edit descriptors");
293         }
294       } else if (stack_[height_ - 1].remaining-- > 0) {
295         offset_ = restart;
296       } else {
297         --height_;
298       }
299     } else if (ch == '\'' || ch == '"') {
300       // Quoted 'character literal'
301       CharType quote{ch};
302       auto start{offset_};
303       while (offset_ < formatLength_ && format_[offset_] != quote) {
304         ++offset_;
305       }
306       if (offset_ >= formatLength_) {
307         context.SignalError(IostatErrorInFormat,
308             "FORMAT missing closing quote on character literal");
309         return 0;
310       }
311       ++offset_;
312       std::size_t chars{
313           static_cast<std::size_t>(&format_[offset_] - &format_[start])};
314       if (PeekNext() == quote) {
315         // subtle: handle doubled quote character in a literal by including
316         // the first in the output, then treating the second as the start
317         // of another character literal.
318       } else {
319         --chars;
320       }
321       context.Emit(format_ + start, chars);
322     } else if (ch == 'H') {
323       // 9HHOLLERITH
324       if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
325         context.SignalError(
326             IostatErrorInFormat, "Invalid width on Hollerith in FORMAT");
327         return 0;
328       }
329       context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat));
330       offset_ += *repeat;
331     } else if (ch >= 'A' && ch <= 'Z') {
332       int start{offset_ - 1};
333       CharType next{'\0'};
334       if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
335         CharType peek{Capitalize(PeekNext())};
336         if (peek >= 'A' && peek <= 'Z') {
337           next = peek;
338           ++offset_;
339         }
340       }
341       if ((!next &&
342               (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
343                   ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
344                   ch == 'L')) ||
345           (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
346           (ch == 'D' && next == 'T')) {
347         // Data edit descriptor found
348         offset_ = start;
349         return repeat && *repeat > 0 ? *repeat : 1;
350       } else {
351         // Control edit descriptor
352         if (ch == 'T') { // Tn, TLn, TRn
353           repeat = GetIntField(context);
354         }
355         HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
356             repeat ? *repeat : 1);
357       }
358     } else if (ch == '/') {
359       context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
360     } else if (ch == '$' || ch == '\\') {
361       context.mutableModes().nonAdvancing = true;
362     } else {
363       context.SignalError(IostatErrorInFormat,
364           "Invalid character '%c' in FORMAT", static_cast<char>(ch));
365       return 0;
366     }
367   }
368 }
369 
370 // Returns the next data edit descriptor
371 template <typename CONTEXT>
GetNextDataEdit(Context & context,int maxRepeat)372 DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
373     Context &context, int maxRepeat) {
374   int repeat{CueUpNextDataEdit(context)};
375   auto start{offset_};
376   DataEdit edit;
377   edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
378   if (edit.descriptor == 'E') {
379     if (auto next{static_cast<char>(Capitalize(PeekNext()))};
380         next == 'N' || next == 'S' || next == 'X') {
381       edit.variation = next;
382       ++offset_;
383     }
384   } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
385     // DT'iotype'(v_list) user-defined derived type I/O
386     edit.descriptor = DataEdit::DefinedDerivedType;
387     ++offset_;
388     if (auto quote{static_cast<char>(PeekNext())};
389         quote == '\'' || quote == '"') {
390       // Capture the quoted 'iotype'
391       bool ok{false}, tooLong{false};
392       for (++offset_; offset_ < formatLength_;) {
393         auto ch{static_cast<char>(format_[offset_++])};
394         if (ch == quote &&
395             (offset_ == formatLength_ ||
396                 static_cast<char>(format_[offset_]) != quote)) {
397           ok = true;
398           break; // that was terminating quote
399         } else if (edit.ioTypeChars >= edit.maxIoTypeChars) {
400           tooLong = true;
401         } else {
402           edit.ioType[edit.ioTypeChars++] = ch;
403           if (ch == quote) {
404             ++offset_;
405           }
406         }
407       }
408       if (!ok) {
409         context.SignalError(
410             IostatErrorInFormat, "Unclosed DT'iotype' in FORMAT");
411       } else if (tooLong) {
412         context.SignalError(
413             IostatErrorInFormat, "Excessive DT'iotype' in FORMAT");
414       }
415     }
416     if (PeekNext() == '(') {
417       // Capture the v_list arguments
418       bool ok{false}, tooLong{false};
419       for (++offset_; offset_ < formatLength_;) {
420         int n{GetIntField(context)};
421         if (edit.vListEntries >= edit.maxVListEntries) {
422           tooLong = true;
423         } else {
424           edit.vList[edit.vListEntries++] = n;
425         }
426         auto ch{static_cast<char>(GetNextChar(context))};
427         if (ch != ',') {
428           ok = ch == ')';
429           break;
430         }
431       }
432       if (!ok) {
433         context.SignalError(
434             IostatErrorInFormat, "Unclosed DT(v_list) in FORMAT");
435       } else if (tooLong) {
436         context.SignalError(
437             IostatErrorInFormat, "Excessive DT(v_list) in FORMAT");
438       }
439     }
440   }
441   if (edit.descriptor == 'A') { // width is optional for A[w]
442     auto ch{PeekNext()};
443     if (ch >= '0' && ch <= '9') {
444       edit.width = GetIntField(context);
445     }
446   } else if (edit.descriptor != DataEdit::DefinedDerivedType) {
447     edit.width = GetIntField(context);
448   }
449   if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') {
450     ++offset_;
451     edit.digits = GetIntField(context);
452     CharType ch{PeekNext()};
453     if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
454       ++offset_;
455       edit.expoDigits = GetIntField(context);
456     }
457   }
458   edit.modes = context.mutableModes();
459 
460   // Handle repeated nonparenthesized edit descriptors
461   if (repeat > maxRepeat) {
462     stack_[height_].start = start; // after repeat count
463     stack_[height_].remaining = repeat; // full count
464     ++height_;
465   }
466   edit.repeat = std::min(1, maxRepeat); // 0 if maxRepeat==0
467   if (height_ > 1) { // Subtle: stack_[0].start doesn't necessarily point to '('
468     int start{stack_[height_ - 1].start};
469     if (format_[start] != '(') {
470       if (stack_[height_ - 1].remaining > maxRepeat) {
471         edit.repeat = maxRepeat;
472         stack_[height_ - 1].remaining -= maxRepeat;
473         offset_ = start; // repeat same edit descriptor next time
474       } else {
475         edit.repeat = stack_[height_ - 1].remaining;
476         --height_;
477       }
478     }
479   }
480   return edit;
481 }
482 
483 template <typename CONTEXT>
Finish(Context & context)484 void FormatControl<CONTEXT>::Finish(Context &context) {
485   CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
486 }
487 } // namespace Fortran::runtime::io
488 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
489