1 //===-- runtime/edit-input.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13 
14 namespace Fortran::runtime::io {
15 
16 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
17     int base, int totalBitSize) {
18   std::optional<int> remaining;
19   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
20   common::UnsignedInt128 value{0};
21   for (; next; next = io.NextInField(remaining)) {
22     char32_t ch{*next};
23     if (ch == ' ' || ch == '\t') {
24       continue;
25     }
26     int digit{0};
27     if (ch >= '0' && ch <= '1') {
28       digit = ch - '0';
29     } else if (base >= 8 && ch >= '2' && ch <= '7') {
30       digit = ch - '0';
31     } else if (base >= 10 && ch >= '8' && ch <= '9') {
32       digit = ch - '0';
33     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
34       digit = ch + 10 - 'A';
35     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
36       digit = ch + 10 - 'a';
37     } else {
38       io.GetIoErrorHandler().SignalError(
39           "Bad character '%lc' in B/O/Z input field", ch);
40       return false;
41     }
42     value *= base;
43     value += digit;
44   }
ToUpperCase(const std::string & str)45   // TODO: check for overflow
46   std::memcpy(n, &value, totalBitSize >> 3);
47   return true;
48 }
49 
DynamicTypeWithLengthFortran::evaluate::DynamicTypeWithLength50 // Prepares input from a field, and consumes the sign, if any.
51 // Returns true if there's a '-' sign.
52 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
53     std::optional<char32_t> &next, std::optional<int> &remaining) {
54   next = io.PrepareInput(edit, remaining);
55   bool negative{false};
56   if (next) {
57     negative = *next == '-';
58     if (negative || *next == '+') {
59       io.SkipSpaces(remaining);
60       next = io.NextInField(remaining);
61     }
62   }
63   return negative;
64 }
65 
66 bool EditIntegerInput(
67     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
68   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
69   switch (edit.descriptor) {
70   case DataEdit::ListDirected:
71   case 'G':
72   case 'I':
73     break;
74   case 'B':
75     return EditBOZInput(io, edit, n, 2, kind << 3);
76   case 'O':
77     return EditBOZInput(io, edit, n, 8, kind << 3);
78   case 'Z':
79     return EditBOZInput(io, edit, n, 16, kind << 3);
80   default:
81     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
82         "Data edit descriptor '%c' may not be used with an INTEGER data item",
83         edit.descriptor);
84     return false;
85   }
86   std::optional<int> remaining;
87   std::optional<char32_t> next;
88   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
89   common::UnsignedInt128 value;
90   for (; next; next = io.NextInField(remaining)) {
91     char32_t ch{*next};
92     if (ch == ' ' || ch == '\t') {
93       if (edit.modes.editingFlags & blankZero) {
94         ch = '0'; // BZ mode - treat blank as if it were zero
95       } else {
96         continue;
ArgumentAnalyzer(ExpressionAnalyzer & context)97       }
98     }
99     int digit{0};
ArgumentAnalyzer(ExpressionAnalyzer & context,parser::CharBlock source,bool isProcedureCall=false)100     if (ch >= '0' && ch <= '9') {
101       digit = ch - '0';
102     } else {
fatalErrors() const103       io.GetIoErrorHandler().SignalError(
104           "Bad character '%lc' in INTEGER input field", ch);
105       return false;
106     }
107     value *= 10;
GetExpr(std::size_t i) const108     value += digit;
109   }
110   if (negate) {
MoveExpr(std::size_t i)111     value = -value;
112   }
113   std::memcpy(n, &value, kind);
Analyze(const common::Indirection<parser::Expr> & x)114   return true;
115 }
116 
Analyze(const parser::Expr & x)117 // Parses a REAL input number from the input source as a normalized
118 // fraction into a supplied buffer -- there's an optional '-', a
119 // decimal point, and at least one digit.  The adjusted exponent value
120 // is returned in a reference argument.  The returned value is the number
121 // of characters that (should) have been written to the buffer -- this can
122 // be larger than the buffer size and can indicate overflow.  Replaces
123 // blanks with zeroes if appropriate.
124 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
125     const DataEdit &edit, int &exponent) {
126   std::optional<int> remaining;
127   std::optional<char32_t> next;
128   int got{0};
129   std::optional<int> decimalPoint;
130   auto Put{[&](char ch) -> void {
131     if (got < bufferSize) {
132       buffer[got] = ch;
133     }
134     ++got;
135   }};
136   if (ScanNumericPrefix(io, edit, next, remaining)) {
137     Put('-');
138   }
139   if (!next) { // empty field means zero
140     Put('0');
141     return got;
142   }
143   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
144   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
145   if (first == 'N' || first == 'I') {
146     // NaN or infinity - convert to upper case
147     // Subtle: a blank field of digits could be followed by 'E' or 'D',
148     for (; next &&
149          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
150          next = io.NextInField(remaining)) {
151       if (*next >= 'a' && *next <= 'z') {
152         Put(*next - 'a' + 'A');
153       } else {
154         Put(*next);
155       }
156     }
157     if (next && *next == '(') { // NaN(...)
158       while (next && *next != ')') {
159         next = io.NextInField(remaining);
160       }
161     }
162     exponent = 0;
163   } else if (first == decimal || (first >= '0' && first <= '9') ||
164       first == 'E' || first == 'D' || first == 'Q') {
165     Put('.'); // input field is normalized to a fraction
166     auto start{got};
167     bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
168     for (; next; next = io.NextInField(remaining)) {
169       char32_t ch{*next};
170       if (ch == ' ' || ch == '\t') {
171         if (bzMode) {
172           ch = '0'; // BZ mode - treat blank as if it were zero
173         } else {
174           continue;
175         }
176       }
177       if (ch == '0' && got == start && !decimalPoint) {
178         // omit leading zeroes before the decimal
179       } else if (ch >= '0' && ch <= '9') {
180         Put(ch);
181       } else if (ch == decimal && !decimalPoint) {
182         // the decimal point is *not* copied to the buffer
183         decimalPoint = got - start; // # of digits before the decimal point
184       } else {
185         break;
186       }
187     }
188     if (got == start) {
189       Put('0'); // emit at least one digit
190     }
191     if (next &&
192         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
193             *next == 'q' || *next == 'Q')) {
194       // Optional exponent letter.  Blanks are allowed between the
195       // optional exponent letter and the exponent value.
196       io.SkipSpaces(remaining);
197       next = io.NextInField(remaining);
198     }
199     // The default exponent is -kP, but the scale factor doesn't affect
200     // an explicit exponent.
201     exponent = -edit.modes.scale;
202     if (next &&
203         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
204             (bzMode && (*next == ' ' || *next == '\t')))) {
205       bool negExpo{*next == '-'};
206       if (negExpo || *next == '+') {
207         next = io.NextInField(remaining);
208       }
209       for (exponent = 0; next; next = io.NextInField(remaining)) {
210         if (*next >= '0' && *next <= '9') {
211           exponent = 10 * exponent + *next - '0';
212         } else if (bzMode && (*next == ' ' || *next == '\t')) {
213           exponent = 10 * exponent;
214         } else {
215           break;
216         }
217       }
218       if (negExpo) {
219         exponent = -exponent;
220       }
221     }
222     if (decimalPoint) {
223       exponent += *decimalPoint;
224     } else {
225       // When no decimal point (or comma) appears in the value, the 'd'
226       // part of the edit descriptor must be interpreted as the number of
227       // digits in the value to be interpreted as being to the *right* of
228       // the assumed decimal point (13.7.2.3.2)
229       exponent += got - start - edit.digits.value_or(0);
230     }
231   } else {
232     // TODO: hex FP input
233     exponent = 0;
234     return 0;
235   }
236   // Consume the trailing ')' of a list-directed or NAMELIST complex
237   // input value.
238   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
239     if (next && (*next == ' ' || *next == '\t')) {
240       next = io.NextInField(remaining);
241     }
242     if (!next) { // NextInField fails on separators like ')'
243       next = io.GetCurrentChar();
244       if (next && *next == ')') {
245         io.HandleRelativePosition(1);
246       }
247     }
248   } else if (remaining) {
249     while (next && (*next == ' ' || *next == '\t')) {
250       next = io.NextInField(remaining);
251     }
252     if (next) {
253       return 0; // error: unused nonblank character in fixed-width field
254     }
255   }
256   return got;
257 }
258 
259 template <int KIND>
260 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
261   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
262   static constexpr int maxDigits{
263       common::MaxDecimalConversionDigits(binaryPrecision)};
ApplySubscripts(DataRef && dataRef,std::vector<Subscript> && subscripts)264   static constexpr int bufferSize{maxDigits + 18};
265   char buffer[bufferSize];
266   int exponent{0};
267   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
268   if (got >= maxDigits + 2) {
269     io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
270     return false;
__anon9cc18d1d0102(SymbolRef &&symbol) 271   }
272   if (got == 0) {
273     io.GetIoErrorHandler().SignalError("Bad REAL input value");
274     return false;
275   }
276   bool hadExtra{got > maxDigits};
277   if (exponent != 0) {
__anon9cc18d1d0302(auto &&) 278     got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
279   }
280   buffer[got] = '\0';
281   const char *p{buffer};
282   decimal::ConversionToBinaryResult<binaryPrecision> converted{
283       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
284   if (hadExtra) {
285     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
286         converted.flags | decimal::Inexact);
TopLevelChecks(DataRef && dataRef)287   }
288   // TODO: raise converted.flags as exceptions?
289   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
290       converted.binary;
291   return true;
292 }
293 
294 template <int KIND>
295 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
296   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
297   switch (edit.descriptor) {
298   case DataEdit::ListDirected:
299   case DataEdit::ListDirectedRealPart:
300   case DataEdit::ListDirectedImaginaryPart:
301   case 'F':
302   case 'E': // incl. EN, ES, & EX
303   case 'D':
304   case 'G':
305     return EditCommonRealInput<KIND>(io, edit, n);
FixMisparsedSubstring(const parser::Designator & d)306   case 'B':
307     return EditBOZInput(
308         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
309   case 'O':
310     return EditBOZInput(
311         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
312   case 'Z':
313     return EditBOZInput(
314         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
315   default:
316     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
317         "Data edit descriptor '%c' may not be used for REAL input",
318         edit.descriptor);
319     return false;
__anon9cc18d1d0402(parser::Name &n) 320   }
321 }
322 
323 // 13.7.3 in Fortran 2018
324 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
325   switch (edit.descriptor) {
326   case DataEdit::ListDirected:
327   case 'L':
328   case 'G':
329     break;
330   default:
331     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
332         "Data edit descriptor '%c' may not be used for LOGICAL input",
333         edit.descriptor);
334     return false;
335   }
336   std::optional<int> remaining;
337   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
338   if (next && *next == '.') { // skip optional period
339     next = io.NextInField(remaining);
340   }
341   if (!next) {
342     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
343     return false;
344   }
345   switch (*next) {
346   case 'T':
347   case 't':
348     x = true;
349     break;
350   case 'F':
351   case 'f':
352     x = false;
353     break;
354   default:
355     io.GetIoErrorHandler().SignalError(
356         "Bad character '%lc' in LOGICAL input field", *next);
357     return false;
358   }
359   if (remaining) { // ignore the rest of the field
360     io.HandleRelativePosition(*remaining);
361   } else if (edit.descriptor == DataEdit::ListDirected) {
362     while (io.NextInField(remaining)) { // discard rest of field
363     }
364   }
365   return true;
366 }
367 
368 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
369 static bool EditDelimitedCharacterInput(
370     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
371   bool result{true};
372   while (true) {
AnalyzeKindParam(const std::optional<parser::KindParam> & kindParam,int defaultKind)373     auto ch{io.GetCurrentChar()};
374     if (!ch) {
375       if (io.AdvanceRecord()) {
376         continue;
377       } else {
378         result = false; // EOF in character value
379         break;
__anon9cc18d1d0702(std::uint64_t k) 380       }
381     }
382     io.HandleRelativePosition(1);
383     if (*ch == delimiter) {
384       auto next{io.GetCurrentChar()};
385       if (next && *next == delimiter) {
386         // Repeated delimiter: use as character value
387         io.HandleRelativePosition(1);
388       } else {
389         break; // closing delimiter
390       }
391     }
392     if (length > 0) {
393       *x++ = *ch;
394       --length;
395     }
396   }
397   std::fill_n(x, length, ' ');
398   return result;
399 }
400 
TestFortran::evaluate::IntTypeVisitor401 static bool EditListDirectedDefaultCharacterInput(
402     IoStatementState &io, char *x, std::size_t length) {
403   auto ch{io.GetCurrentChar()};
404   if (ch && (*ch == '\'' || *ch == '"')) {
405     io.HandleRelativePosition(1);
406     return EditDelimitedCharacterInput(io, x, length, *ch);
407   }
408   // Undelimited list-directed character input: stop at a value separator
409   // or the end of the current record.
410   std::optional<int> remaining{length};
411   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
412        next = io.NextInField(remaining)) {
413     switch (*next) {
414     case ' ':
415     case '\t':
416     case ',':
417     case ';':
418     case '/':
419       remaining = 0; // value separator: stop
420       break;
421     default:
422       *x++ = *next;
423       --length;
424     }
425   }
426   std::fill_n(x, length, ' ');
427   return true;
428 }
429 
430 bool EditDefaultCharacterInput(
IntLiteralConstant(const PARSED & x)431     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
432   switch (edit.descriptor) {
433   case DataEdit::ListDirected:
434     return EditListDirectedDefaultCharacterInput(io, x, length);
435   case 'A':
436   case 'G':
437     break;
438   default:
439     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
440         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
441         edit.descriptor);
442     return false;
443   }
444   std::optional<int> remaining{length};
445   if (edit.width && *edit.width > 0) {
446     remaining = *edit.width;
447   }
448   // When the field is wider than the variable, we drop the leading
449   // characters.  When the variable is wider than the field, there's
450   // trailing padding.
451   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
452   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
453        next = io.NextInField(remaining)) {
454     if (skip > 0) {
455       --skip;
456     } else {
457       *x++ = *next;
458       --length;
459     }
460   }
461   std::fill_n(x, length, ' ');
462   return true;
463 }
464 
ReadRealLiteral(parser::CharBlock source,FoldingContext & context)465 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
466 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
467 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
468 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
469 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
470 // TODO: double/double
471 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
472 } // namespace Fortran::runtime::io
473