1 //===-- runtime/edit-input.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13 
14 namespace Fortran::runtime::io {
15 
PrepareInput(IoStatementState & io,const DataEdit & edit,std::optional<int> & remaining)16 static std::optional<char32_t> PrepareInput(
17     IoStatementState &io, const DataEdit &edit, std::optional<int> &remaining) {
18   remaining.reset();
19   if (edit.descriptor == DataEdit::ListDirected) {
20     io.GetNextNonBlank();
21   } else {
22     if (edit.width.value_or(0) > 0) {
23       remaining = *edit.width;
24     }
25     io.SkipSpaces(remaining);
26   }
27   return io.NextInField(remaining);
28 }
29 
EditBOZInput(IoStatementState & io,const DataEdit & edit,void * n,int base,int totalBitSize)30 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
31     int base, int totalBitSize) {
32   std::optional<int> remaining;
33   std::optional<char32_t> next{PrepareInput(io, edit, remaining)};
34   common::UnsignedInt128 value{0};
35   for (; next; next = io.NextInField(remaining)) {
36     char32_t ch{*next};
37     if (ch == ' ') {
38       continue;
39     }
40     int digit{0};
41     if (ch >= '0' && ch <= '1') {
42       digit = ch - '0';
43     } else if (base >= 8 && ch >= '2' && ch <= '7') {
44       digit = ch - '0';
45     } else if (base >= 10 && ch >= '8' && ch <= '9') {
46       digit = ch - '0';
47     } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
48       digit = ch + 10 - 'A';
49     } else if (base == 16 && ch >= 'a' && ch <= 'z') {
50       digit = ch + 10 - 'a';
51     } else {
52       io.GetIoErrorHandler().SignalError(
53           "Bad character '%lc' in B/O/Z input field", ch);
54       return false;
55     }
56     value *= base;
57     value += digit;
58   }
59   // TODO: check for overflow
60   std::memcpy(n, &value, totalBitSize >> 3);
61   return true;
62 }
63 
64 // Returns false if there's a '-' sign
ScanNumericPrefix(IoStatementState & io,const DataEdit & edit,std::optional<char32_t> & next,std::optional<int> & remaining)65 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
66     std::optional<char32_t> &next, std::optional<int> &remaining) {
67   next = PrepareInput(io, edit, remaining);
68   bool negative{false};
69   if (next) {
70     negative = *next == '-';
71     if (negative || *next == '+') {
72       next = io.NextInField(remaining);
73     }
74   }
75   return negative;
76 }
77 
EditIntegerInput(IoStatementState & io,const DataEdit & edit,void * n,int kind)78 bool EditIntegerInput(
79     IoStatementState &io, const DataEdit &edit, void *n, int kind) {
80   RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
81   switch (edit.descriptor) {
82   case DataEdit::ListDirected:
83   case 'G':
84   case 'I':
85     break;
86   case 'B':
87     return EditBOZInput(io, edit, n, 2, kind << 3);
88   case 'O':
89     return EditBOZInput(io, edit, n, 8, kind << 3);
90   case 'Z':
91     return EditBOZInput(io, edit, n, 16, kind << 3);
92   default:
93     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
94         "Data edit descriptor '%c' may not be used with an INTEGER data item",
95         edit.descriptor);
96     return false;
97   }
98   std::optional<int> remaining;
99   std::optional<char32_t> next;
100   bool negate{ScanNumericPrefix(io, edit, next, remaining)};
101   common::UnsignedInt128 value;
102   for (; next; next = io.NextInField(remaining)) {
103     char32_t ch{*next};
104     if (ch == ' ') {
105       if (edit.modes.editingFlags & blankZero) {
106         ch = '0'; // BZ mode - treat blank as if it were zero
107       } else {
108         continue;
109       }
110     }
111     int digit{0};
112     if (ch >= '0' && ch <= '9') {
113       digit = ch - '0';
114     } else {
115       io.GetIoErrorHandler().SignalError(
116           "Bad character '%lc' in INTEGER input field", ch);
117       return false;
118     }
119     value *= 10;
120     value += digit;
121   }
122   if (negate) {
123     value = -value;
124   }
125   std::memcpy(n, &value, kind);
126   return true;
127 }
128 
ScanRealInput(char * buffer,int bufferSize,IoStatementState & io,const DataEdit & edit,int & exponent)129 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
130     const DataEdit &edit, int &exponent) {
131   std::optional<int> remaining;
132   std::optional<char32_t> next;
133   int got{0};
134   std::optional<int> decimalPoint;
135   if (ScanNumericPrefix(io, edit, next, remaining) && next) {
136     if (got < bufferSize) {
137       buffer[got++] = '-';
138     }
139   }
140   if (!next) { // empty field means zero
141     if (got < bufferSize) {
142       buffer[got++] = '0';
143     }
144     return got;
145   }
146   if (got < bufferSize) {
147     buffer[got++] = '.'; // input field is normalized to a fraction
148   }
149   char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
150   auto start{got};
151   if ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')) {
152     // NaN or infinity - convert to upper case
153     for (; next &&
154          ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
155          next = io.NextInField(remaining)) {
156       if (got < bufferSize) {
157         if (*next >= 'a' && *next <= 'z') {
158           buffer[got++] = *next - 'a' + 'A';
159         } else {
160           buffer[got++] = *next;
161         }
162       }
163     }
164     if (next && *next == '(') { // NaN(...)
165       while (next && *next != ')') {
166         next = io.NextInField(remaining);
167       }
168     }
169     exponent = 0;
170   } else if (*next == decimal || (*next >= '0' && *next <= '9')) {
171     for (; next; next = io.NextInField(remaining)) {
172       char32_t ch{*next};
173       if (ch == ' ') {
174         if (edit.modes.editingFlags & blankZero) {
175           ch = '0'; // BZ mode - treat blank as if it were zero
176         } else {
177           continue;
178         }
179       }
180       if (ch == '0' && got == start) {
181         // omit leading zeroes
182       } else if (ch >= '0' && ch <= '9') {
183         if (got < bufferSize) {
184           buffer[got++] = ch;
185         }
186       } else if (ch == decimal && !decimalPoint) {
187         // the decimal point is *not* copied to the buffer
188         decimalPoint = got - start; // # of digits before the decimal point
189       } else {
190         break;
191       }
192     }
193     if (got == start && got < bufferSize) {
194       buffer[got++] = '0'; // all digits were zeroes
195     }
196     if (next &&
197         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
198             *next == 'q' || *next == 'Q')) {
199       io.SkipSpaces(remaining);
200       next = io.NextInField(remaining);
201     }
202     exponent = -edit.modes.scale; // default exponent is -kP
203     if (next &&
204         (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9'))) {
205       bool negExpo{*next == '-'};
206       if (negExpo || *next == '+') {
207         next = io.NextInField(remaining);
208       }
209       for (exponent = 0; next && (*next >= '0' && *next <= '9');
210            next = io.NextInField(remaining)) {
211         exponent = 10 * exponent + *next - '0';
212       }
213       if (negExpo) {
214         exponent = -exponent;
215       }
216     }
217     if (decimalPoint) {
218       exponent += *decimalPoint;
219     } else {
220       // When no decimal point (or comma) appears in the value, the 'd'
221       // part of the edit descriptor must be interpreted as the number of
222       // digits in the value to be interpreted as being to the *right* of
223       // the assumed decimal point (13.7.2.3.2)
224       exponent += got - start - edit.digits.value_or(0);
225     }
226   } else {
227     // TODO: hex FP input
228     exponent = 0;
229     return 0;
230   }
231   if (remaining) {
232     while (next && *next == ' ') {
233       next = io.NextInField(remaining);
234     }
235     if (next) {
236       return 0; // error: unused nonblank character in fixed-width field
237     }
238   }
239   return got;
240 }
241 
242 template <int binaryPrecision>
EditCommonRealInput(IoStatementState & io,const DataEdit & edit,void * n)243 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
244   static constexpr int maxDigits{
245       common::MaxDecimalConversionDigits(binaryPrecision)};
246   static constexpr int bufferSize{maxDigits + 18};
247   char buffer[bufferSize];
248   int exponent{0};
249   int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
250   if (got >= maxDigits + 2) {
251     io.GetIoErrorHandler().Crash("EditRealInput: buffer was too small");
252     return false;
253   }
254   if (got == 0) {
255     io.GetIoErrorHandler().SignalError("Bad REAL input value");
256     return false;
257   }
258   bool hadExtra{got > maxDigits};
259   if (exponent != 0) {
260     got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
261   }
262   buffer[got] = '\0';
263   const char *p{buffer};
264   decimal::ConversionToBinaryResult<binaryPrecision> converted{
265       decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
266   if (hadExtra) {
267     converted.flags = static_cast<enum decimal::ConversionResultFlags>(
268         converted.flags | decimal::Inexact);
269   }
270   // TODO: raise converted.flags as exceptions?
271   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
272       converted.binary;
273   return true;
274 }
275 
276 template <int binaryPrecision>
EditRealInput(IoStatementState & io,const DataEdit & edit,void * n)277 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
278   switch (edit.descriptor) {
279   case DataEdit::ListDirected:
280   case 'F':
281   case 'E': // incl. EN, ES, & EX
282   case 'D':
283   case 'G':
284     return EditCommonRealInput<binaryPrecision>(io, edit, n);
285   case 'B':
286     return EditBOZInput(
287         io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
288   case 'O':
289     return EditBOZInput(
290         io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
291   case 'Z':
292     return EditBOZInput(
293         io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
294   default:
295     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
296         "Data edit descriptor '%c' may not be used for REAL input",
297         edit.descriptor);
298     return false;
299   }
300 }
301 
302 // 13.7.3 in Fortran 2018
EditLogicalInput(IoStatementState & io,const DataEdit & edit,bool & x)303 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
304   switch (edit.descriptor) {
305   case DataEdit::ListDirected:
306   case 'L':
307   case 'G':
308     break;
309   default:
310     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
311         "Data edit descriptor '%c' may not be used for LOGICAL input",
312         edit.descriptor);
313     return false;
314   }
315   std::optional<int> remaining;
316   std::optional<char32_t> next{PrepareInput(io, edit, remaining)};
317   if (next && *next == '.') { // skip optional period
318     next = io.NextInField(remaining);
319   }
320   if (!next) {
321     io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
322     return false;
323   }
324   switch (*next) {
325   case 'T':
326   case 't':
327     x = true;
328     break;
329   case 'F':
330   case 'f':
331     x = false;
332     break;
333   default:
334     io.GetIoErrorHandler().SignalError(
335         "Bad character '%lc' in LOGICAL input field", *next);
336     return false;
337   }
338   if (remaining) { // ignore the rest of the field
339     io.HandleRelativePosition(*remaining);
340   } else if (edit.descriptor == DataEdit::ListDirected) {
341     while (io.NextInField(remaining)) { // discard rest of field
342     }
343   }
344   return true;
345 }
346 
347 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
EditDelimitedCharacterInput(IoStatementState & io,char * x,std::size_t length,char32_t delimiter)348 static bool EditDelimitedCharacterInput(
349     IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
350   while (true) {
351     if (auto ch{io.GetCurrentChar()}) {
352       io.HandleRelativePosition(1);
353       if (*ch == delimiter) {
354         ch = io.GetCurrentChar();
355         if (ch && *ch == delimiter) {
356           // Repeated delimiter: use as character value.  Can't straddle a
357           // record boundary.
358           io.HandleRelativePosition(1);
359         } else {
360           std::fill_n(x, length, ' ');
361           return true;
362         }
363       }
364       if (length > 0) {
365         *x++ = *ch;
366         --length;
367       }
368     } else if (!io.AdvanceRecord()) { // EOF
369       std::fill_n(x, length, ' ');
370       return false;
371     }
372   }
373 }
374 
EditListDirectedDefaultCharacterInput(IoStatementState & io,char * x,std::size_t length)375 static bool EditListDirectedDefaultCharacterInput(
376     IoStatementState &io, char *x, std::size_t length) {
377   auto ch{io.GetCurrentChar()};
378   if (ch && (*ch == '\'' || *ch == '"')) {
379     io.HandleRelativePosition(1);
380     return EditDelimitedCharacterInput(io, x, length, *ch);
381   }
382   // Undelimited list-directed character input: stop at a value separator
383   // or the end of the current record.
384   std::optional<int> remaining{length};
385   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
386        next = io.NextInField(remaining)) {
387     switch (*next) {
388     case ' ':
389     case ',':
390     case ';':
391     case '/':
392       remaining = 0; // value separator: stop
393       break;
394     default:
395       *x++ = *next;
396       --length;
397     }
398   }
399   std::fill_n(x, length, ' ');
400   return true;
401 }
402 
EditDefaultCharacterInput(IoStatementState & io,const DataEdit & edit,char * x,std::size_t length)403 bool EditDefaultCharacterInput(
404     IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
405   switch (edit.descriptor) {
406   case DataEdit::ListDirected:
407     return EditListDirectedDefaultCharacterInput(io, x, length);
408   case 'A':
409   case 'G':
410     break;
411   default:
412     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
413         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
414         edit.descriptor);
415     return false;
416   }
417   std::optional<int> remaining{length};
418   if (edit.width && *edit.width > 0) {
419     remaining = *edit.width;
420   }
421   // When the field is wider than the variable, we drop the leading
422   // characters.  When the variable is wider than the field, there's
423   // trailing padding.
424   std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
425   for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
426        next = io.NextInField(remaining)) {
427     if (skip > 0) {
428       --skip;
429     } else {
430       *x++ = *next;
431       --length;
432     }
433   }
434   std::fill_n(x, length, ' ');
435   return true;
436 }
437 
438 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
439 template bool EditRealInput<11>(IoStatementState &, const DataEdit &, void *);
440 template bool EditRealInput<24>(IoStatementState &, const DataEdit &, void *);
441 template bool EditRealInput<53>(IoStatementState &, const DataEdit &, void *);
442 template bool EditRealInput<64>(IoStatementState &, const DataEdit &, void *);
443 template bool EditRealInput<113>(IoStatementState &, const DataEdit &, void *);
444 } // namespace Fortran::runtime::io
445