1 //===-- runtime/edit-input.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "edit-input.h"
10 #include "flang/Common/real.h"
11 #include "flang/Common/uint128.h"
12 #include <algorithm>
13
14 namespace Fortran::runtime::io {
15
16 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
17 int base, int totalBitSize) {
18 std::optional<int> remaining;
19 std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
20 common::UnsignedInt128 value{0};
21 for (; next; next = io.NextInField(remaining)) {
22 char32_t ch{*next};
23 if (ch == ' ' || ch == '\t') {
24 continue;
25 }
26 int digit{0};
27 if (ch >= '0' && ch <= '1') {
28 digit = ch - '0';
29 } else if (base >= 8 && ch >= '2' && ch <= '7') {
30 digit = ch - '0';
31 } else if (base >= 10 && ch >= '8' && ch <= '9') {
32 digit = ch - '0';
33 } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
34 digit = ch + 10 - 'A';
35 } else if (base == 16 && ch >= 'a' && ch <= 'z') {
36 digit = ch + 10 - 'a';
37 } else {
38 io.GetIoErrorHandler().SignalError(
39 "Bad character '%lc' in B/O/Z input field", ch);
40 return false;
41 }
42 value *= base;
43 value += digit;
44 }
ToUpperCase(const std::string & str)45 // TODO: check for overflow
46 std::memcpy(n, &value, totalBitSize >> 3);
47 return true;
48 }
49
DynamicTypeWithLengthFortran::evaluate::DynamicTypeWithLength50 // Prepares input from a field, and consumes the sign, if any.
51 // Returns true if there's a '-' sign.
52 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
53 std::optional<char32_t> &next, std::optional<int> &remaining) {
54 next = io.PrepareInput(edit, remaining);
55 bool negative{false};
56 if (next) {
57 negative = *next == '-';
58 if (negative || *next == '+') {
59 io.SkipSpaces(remaining);
60 next = io.NextInField(remaining);
61 }
62 }
63 return negative;
64 }
65
66 bool EditIntegerInput(
67 IoStatementState &io, const DataEdit &edit, void *n, int kind) {
68 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
69 switch (edit.descriptor) {
70 case DataEdit::ListDirected:
71 case 'G':
72 case 'I':
73 break;
74 case 'B':
75 return EditBOZInput(io, edit, n, 2, kind << 3);
76 case 'O':
77 return EditBOZInput(io, edit, n, 8, kind << 3);
78 case 'Z':
79 return EditBOZInput(io, edit, n, 16, kind << 3);
80 default:
81 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
82 "Data edit descriptor '%c' may not be used with an INTEGER data item",
83 edit.descriptor);
84 return false;
85 }
86 std::optional<int> remaining;
87 std::optional<char32_t> next;
88 bool negate{ScanNumericPrefix(io, edit, next, remaining)};
89 common::UnsignedInt128 value;
90 for (; next; next = io.NextInField(remaining)) {
91 char32_t ch{*next};
92 if (ch == ' ' || ch == '\t') {
93 if (edit.modes.editingFlags & blankZero) {
94 ch = '0'; // BZ mode - treat blank as if it were zero
95 } else {
96 continue;
ArgumentAnalyzer(ExpressionAnalyzer & context)97 }
98 }
99 int digit{0};
ArgumentAnalyzer(ExpressionAnalyzer & context,parser::CharBlock source,bool isProcedureCall=false)100 if (ch >= '0' && ch <= '9') {
101 digit = ch - '0';
102 } else {
fatalErrors() const103 io.GetIoErrorHandler().SignalError(
104 "Bad character '%lc' in INTEGER input field", ch);
105 return false;
106 }
107 value *= 10;
GetExpr(std::size_t i) const108 value += digit;
109 }
110 if (negate) {
MoveExpr(std::size_t i)111 value = -value;
112 }
113 std::memcpy(n, &value, kind);
Analyze(const common::Indirection<parser::Expr> & x)114 return true;
115 }
116
Analyze(const parser::Expr & x)117 // Parses a REAL input number from the input source as a normalized
118 // fraction into a supplied buffer -- there's an optional '-', a
119 // decimal point, and at least one digit. The adjusted exponent value
120 // is returned in a reference argument. The returned value is the number
121 // of characters that (should) have been written to the buffer -- this can
122 // be larger than the buffer size and can indicate overflow. Replaces
123 // blanks with zeroes if appropriate.
124 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
125 const DataEdit &edit, int &exponent) {
126 std::optional<int> remaining;
127 std::optional<char32_t> next;
128 int got{0};
129 std::optional<int> decimalPoint;
130 auto Put{[&](char ch) -> void {
131 if (got < bufferSize) {
132 buffer[got] = ch;
133 }
134 ++got;
135 }};
136 if (ScanNumericPrefix(io, edit, next, remaining)) {
137 Put('-');
138 }
139 if (!next) { // empty field means zero
140 Put('0');
141 return got;
142 }
143 char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
144 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
145 if (first == 'N' || first == 'I') {
146 // NaN or infinity - convert to upper case
147 // Subtle: a blank field of digits could be followed by 'E' or 'D',
148 for (; next &&
149 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
150 next = io.NextInField(remaining)) {
151 if (*next >= 'a' && *next <= 'z') {
152 Put(*next - 'a' + 'A');
153 } else {
154 Put(*next);
155 }
156 }
157 if (next && *next == '(') { // NaN(...)
158 while (next && *next != ')') {
159 next = io.NextInField(remaining);
160 }
161 }
162 exponent = 0;
163 } else if (first == decimal || (first >= '0' && first <= '9') ||
164 first == 'E' || first == 'D' || first == 'Q') {
165 Put('.'); // input field is normalized to a fraction
166 auto start{got};
167 bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
168 for (; next; next = io.NextInField(remaining)) {
169 char32_t ch{*next};
170 if (ch == ' ' || ch == '\t') {
171 if (bzMode) {
172 ch = '0'; // BZ mode - treat blank as if it were zero
173 } else {
174 continue;
175 }
176 }
177 if (ch == '0' && got == start && !decimalPoint) {
178 // omit leading zeroes before the decimal
179 } else if (ch >= '0' && ch <= '9') {
180 Put(ch);
181 } else if (ch == decimal && !decimalPoint) {
182 // the decimal point is *not* copied to the buffer
183 decimalPoint = got - start; // # of digits before the decimal point
184 } else {
185 break;
186 }
187 }
188 if (got == start) {
189 Put('0'); // emit at least one digit
190 }
191 if (next &&
192 (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
193 *next == 'q' || *next == 'Q')) {
194 // Optional exponent letter. Blanks are allowed between the
195 // optional exponent letter and the exponent value.
196 io.SkipSpaces(remaining);
197 next = io.NextInField(remaining);
198 }
199 // The default exponent is -kP, but the scale factor doesn't affect
200 // an explicit exponent.
201 exponent = -edit.modes.scale;
202 if (next &&
203 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
204 (bzMode && (*next == ' ' || *next == '\t')))) {
205 bool negExpo{*next == '-'};
206 if (negExpo || *next == '+') {
207 next = io.NextInField(remaining);
208 }
209 for (exponent = 0; next; next = io.NextInField(remaining)) {
210 if (*next >= '0' && *next <= '9') {
211 exponent = 10 * exponent + *next - '0';
212 } else if (bzMode && (*next == ' ' || *next == '\t')) {
213 exponent = 10 * exponent;
214 } else {
215 break;
216 }
217 }
218 if (negExpo) {
219 exponent = -exponent;
220 }
221 }
222 if (decimalPoint) {
223 exponent += *decimalPoint;
224 } else {
225 // When no decimal point (or comma) appears in the value, the 'd'
226 // part of the edit descriptor must be interpreted as the number of
227 // digits in the value to be interpreted as being to the *right* of
228 // the assumed decimal point (13.7.2.3.2)
229 exponent += got - start - edit.digits.value_or(0);
230 }
231 } else {
232 // TODO: hex FP input
233 exponent = 0;
234 return 0;
235 }
236 // Consume the trailing ')' of a list-directed or NAMELIST complex
237 // input value.
238 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
239 if (next && (*next == ' ' || *next == '\t')) {
240 next = io.NextInField(remaining);
241 }
242 if (!next) { // NextInField fails on separators like ')'
243 next = io.GetCurrentChar();
244 if (next && *next == ')') {
245 io.HandleRelativePosition(1);
246 }
247 }
248 } else if (remaining) {
249 while (next && (*next == ' ' || *next == '\t')) {
250 next = io.NextInField(remaining);
251 }
252 if (next) {
253 return 0; // error: unused nonblank character in fixed-width field
254 }
255 }
256 return got;
257 }
258
259 template <int KIND>
260 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
261 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
262 static constexpr int maxDigits{
263 common::MaxDecimalConversionDigits(binaryPrecision)};
ApplySubscripts(DataRef && dataRef,std::vector<Subscript> && subscripts)264 static constexpr int bufferSize{maxDigits + 18};
265 char buffer[bufferSize];
266 int exponent{0};
267 int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)};
268 if (got >= maxDigits + 2) {
269 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
270 return false;
__anon9cc18d1d0102(SymbolRef &&symbol) 271 }
272 if (got == 0) {
273 io.GetIoErrorHandler().SignalError("Bad REAL input value");
274 return false;
275 }
276 bool hadExtra{got > maxDigits};
277 if (exponent != 0) {
__anon9cc18d1d0302(auto &&) 278 got += std::snprintf(&buffer[got], bufferSize - got, "e%d", exponent);
279 }
280 buffer[got] = '\0';
281 const char *p{buffer};
282 decimal::ConversionToBinaryResult<binaryPrecision> converted{
283 decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)};
284 if (hadExtra) {
285 converted.flags = static_cast<enum decimal::ConversionResultFlags>(
286 converted.flags | decimal::Inexact);
TopLevelChecks(DataRef && dataRef)287 }
288 // TODO: raise converted.flags as exceptions?
289 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
290 converted.binary;
291 return true;
292 }
293
294 template <int KIND>
295 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
296 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
297 switch (edit.descriptor) {
298 case DataEdit::ListDirected:
299 case DataEdit::ListDirectedRealPart:
300 case DataEdit::ListDirectedImaginaryPart:
301 case 'F':
302 case 'E': // incl. EN, ES, & EX
303 case 'D':
304 case 'G':
305 return EditCommonRealInput<KIND>(io, edit, n);
FixMisparsedSubstring(const parser::Designator & d)306 case 'B':
307 return EditBOZInput(
308 io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
309 case 'O':
310 return EditBOZInput(
311 io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
312 case 'Z':
313 return EditBOZInput(
314 io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
315 default:
316 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
317 "Data edit descriptor '%c' may not be used for REAL input",
318 edit.descriptor);
319 return false;
__anon9cc18d1d0402(parser::Name &n) 320 }
321 }
322
323 // 13.7.3 in Fortran 2018
324 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
325 switch (edit.descriptor) {
326 case DataEdit::ListDirected:
327 case 'L':
328 case 'G':
329 break;
330 default:
331 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
332 "Data edit descriptor '%c' may not be used for LOGICAL input",
333 edit.descriptor);
334 return false;
335 }
336 std::optional<int> remaining;
337 std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
338 if (next && *next == '.') { // skip optional period
339 next = io.NextInField(remaining);
340 }
341 if (!next) {
342 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
343 return false;
344 }
345 switch (*next) {
346 case 'T':
347 case 't':
348 x = true;
349 break;
350 case 'F':
351 case 'f':
352 x = false;
353 break;
354 default:
355 io.GetIoErrorHandler().SignalError(
356 "Bad character '%lc' in LOGICAL input field", *next);
357 return false;
358 }
359 if (remaining) { // ignore the rest of the field
360 io.HandleRelativePosition(*remaining);
361 } else if (edit.descriptor == DataEdit::ListDirected) {
362 while (io.NextInField(remaining)) { // discard rest of field
363 }
364 }
365 return true;
366 }
367
368 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
369 static bool EditDelimitedCharacterInput(
370 IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
371 bool result{true};
372 while (true) {
AnalyzeKindParam(const std::optional<parser::KindParam> & kindParam,int defaultKind)373 auto ch{io.GetCurrentChar()};
374 if (!ch) {
375 if (io.AdvanceRecord()) {
376 continue;
377 } else {
378 result = false; // EOF in character value
379 break;
__anon9cc18d1d0702(std::uint64_t k) 380 }
381 }
382 io.HandleRelativePosition(1);
383 if (*ch == delimiter) {
384 auto next{io.GetCurrentChar()};
385 if (next && *next == delimiter) {
386 // Repeated delimiter: use as character value
387 io.HandleRelativePosition(1);
388 } else {
389 break; // closing delimiter
390 }
391 }
392 if (length > 0) {
393 *x++ = *ch;
394 --length;
395 }
396 }
397 std::fill_n(x, length, ' ');
398 return result;
399 }
400
TestFortran::evaluate::IntTypeVisitor401 static bool EditListDirectedDefaultCharacterInput(
402 IoStatementState &io, char *x, std::size_t length) {
403 auto ch{io.GetCurrentChar()};
404 if (ch && (*ch == '\'' || *ch == '"')) {
405 io.HandleRelativePosition(1);
406 return EditDelimitedCharacterInput(io, x, length, *ch);
407 }
408 // Undelimited list-directed character input: stop at a value separator
409 // or the end of the current record.
410 std::optional<int> remaining{length};
411 for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
412 next = io.NextInField(remaining)) {
413 switch (*next) {
414 case ' ':
415 case '\t':
416 case ',':
417 case ';':
418 case '/':
419 remaining = 0; // value separator: stop
420 break;
421 default:
422 *x++ = *next;
423 --length;
424 }
425 }
426 std::fill_n(x, length, ' ');
427 return true;
428 }
429
430 bool EditDefaultCharacterInput(
IntLiteralConstant(const PARSED & x)431 IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
432 switch (edit.descriptor) {
433 case DataEdit::ListDirected:
434 return EditListDirectedDefaultCharacterInput(io, x, length);
435 case 'A':
436 case 'G':
437 break;
438 default:
439 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
440 "Data edit descriptor '%c' may not be used with a CHARACTER data item",
441 edit.descriptor);
442 return false;
443 }
444 std::optional<int> remaining{length};
445 if (edit.width && *edit.width > 0) {
446 remaining = *edit.width;
447 }
448 // When the field is wider than the variable, we drop the leading
449 // characters. When the variable is wider than the field, there's
450 // trailing padding.
451 std::int64_t skip{*remaining - static_cast<std::int64_t>(length)};
452 for (std::optional<char32_t> next{io.NextInField(remaining)}; next;
453 next = io.NextInField(remaining)) {
454 if (skip > 0) {
455 --skip;
456 } else {
457 *x++ = *next;
458 --length;
459 }
460 }
461 std::fill_n(x, length, ' ');
462 return true;
463 }
464
ReadRealLiteral(parser::CharBlock source,FoldingContext & context)465 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
466 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
467 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
468 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
469 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
470 // TODO: double/double
471 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
472 } // namespace Fortran::runtime::io
473