llvm-project

edit-input.cpp
1134 строки · 37.0 Кб
Перенос по словам
1
//===-- runtime/edit-input.cpp --------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8

9
#include "edit-input.h"
10
#include "namelist.h"
11
#include "utf.h"
12
#include "flang/Common/optional.h"
13
#include "flang/Common/real.h"
14
#include "flang/Common/uint128.h"
15
#include "flang/Runtime/freestanding-tools.h"
16
#include <algorithm>
17
#include <cfenv>
18

19
namespace Fortran::runtime::io {
20
RT_OFFLOAD_API_GROUP_BEGIN
21

22
// Checks that a list-directed input value has been entirely consumed and
23
// doesn't contain unparsed characters before the next value separator.
24
static inline RT_API_ATTRS bool IsCharValueSeparator(
25
    const DataEdit &edit, char32_t ch) {
26
  char32_t comma{
27
      edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
28
  return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
29
      (edit.IsNamelist() && (ch == '&' || ch == '$'));
30
}
31

32
static RT_API_ATTRS bool CheckCompleteListDirectedField(
33
    IoStatementState &io, const DataEdit &edit) {
34
  if (edit.IsListDirected()) {
35
    std::size_t byteCount;
36
    if (auto ch{io.GetCurrentChar(byteCount)}) {
37
      if (IsCharValueSeparator(edit, *ch)) {
38
        return true;
39
      } else {
40
        const auto &connection{io.GetConnectionState()};
41
        io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42
            "invalid character (0x%x) after list-directed input value, "
43
            "at column %d in record %d",
44
            static_cast<unsigned>(*ch),
45
            static_cast<int>(connection.positionInRecord + 1),
46
            static_cast<int>(connection.currentRecordNumber));
47
        return false;
48
      }
49
    } else {
50
      return true; // end of record: ok
51
    }
52
  } else {
53
    return true;
54
  }
55
}
56

57
template <int LOG2_BASE>
58
static RT_API_ATTRS bool EditBOZInput(
59
    IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
60
  // Skip leading white space & zeroes
61
  Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
62
  auto start{io.GetConnectionState().positionInRecord};
63
  Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
64
  if (next.value_or('?') == '0') {
65
    do {
66
      start = io.GetConnectionState().positionInRecord;
67
      next = io.NextInField(remaining, edit);
68
    } while (next && *next == '0');
69
  }
70
  // Count significant digits after any leading white space & zeroes
71
  int digits{0};
72
  int significantBits{0};
73
  for (; next; next = io.NextInField(remaining, edit)) {
74
    char32_t ch{*next};
75
    if (ch == ' ' || ch == '\t') {
76
      if (edit.modes.editingFlags & blankZero) {
77
        ch = '0'; // BZ mode - treat blank as if it were zero
78
      } else {
79
        continue;
80
      }
81
    }
82
    if (ch >= '0' && ch <= '1') {
83
    } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
84
    } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
85
    } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
86
    } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
87
    } else if (ch == ',') {
88
      break; // end non-list-directed field early
89
    } else {
90
      io.GetIoErrorHandler().SignalError(
91
          "Bad character '%lc' in B/O/Z input field", ch);
92
      return false;
93
    }
94
    if (digits++ == 0) {
95
      significantBits = 4;
96
      if (ch >= '0' && ch <= '1') {
97
        significantBits = 1;
98
      } else if (ch >= '2' && ch <= '3') {
99
        significantBits = 2;
100
      } else if (ch >= '4' && ch <= '7') {
101
        significantBits = 3;
102
      } else {
103
        significantBits = 4;
104
      }
105
    } else {
106
      significantBits += LOG2_BASE;
107
    }
108
  }
109
  auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
110
  if (significantBytes > bytes) {
111
    io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
112
        "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
113
    return false;
114
  }
115
  // Reset to start of significant digits
116
  io.HandleAbsolutePosition(start);
117
  remaining.reset();
118
  // Make a second pass now that the digit count is known
119
  std::memset(n, 0, bytes);
120
  int increment{isHostLittleEndian ? -1 : 1};
121
  auto *data{reinterpret_cast<unsigned char *>(n) +
122
      (isHostLittleEndian ? significantBytes - 1 : 0)};
123
  int shift{((digits - 1) * LOG2_BASE) & 7};
124
  while (digits > 0) {
125
    char32_t ch{*io.NextInField(remaining, edit)};
126
    int digit{0};
127
    if (ch == ' ' || ch == '\t') {
128
      if (edit.modes.editingFlags & blankZero) {
129
        ch = '0'; // BZ mode - treat blank as if it were zero
130
      } else {
131
        continue;
132
      }
133
    }
134
    --digits;
135
    if (ch >= '0' && ch <= '9') {
136
      digit = ch - '0';
137
    } else if (ch >= 'A' && ch <= 'F') {
138
      digit = ch + 10 - 'A';
139
    } else if (ch >= 'a' && ch <= 'f') {
140
      digit = ch + 10 - 'a';
141
    } else {
142
      continue;
143
    }
144
    if (shift < 0) {
145
      if (shift + LOG2_BASE > 0) { // misaligned octal
146
        *data |= digit >> -shift;
147
      }
148
      shift += 8;
149
      data += increment;
150
    }
151
    *data |= digit << shift;
152
    shift -= LOG2_BASE;
153
  }
154
  return CheckCompleteListDirectedField(io, edit);
155
}
156

157
static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
158
  return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
159
}
160

161
// Prepares input from a field, and returns the sign, if any, else '\0'.
162
static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
163
    const DataEdit &edit, Fortran::common::optional<char32_t> &next,
164
    Fortran::common::optional<int> &remaining) {
165
  remaining = io.CueUpInput(edit);
166
  next = io.NextInField(remaining, edit);
167
  char sign{'\0'};
168
  if (next) {
169
    if (*next == '-' || *next == '+') {
170
      sign = *next;
171
      if (!edit.IsListDirected()) {
172
        io.SkipSpaces(remaining);
173
      }
174
      next = io.NextInField(remaining, edit);
175
    }
176
  }
177
  return sign;
178
}
179

180
RT_API_ATTRS bool EditIntegerInput(
181
    IoStatementState &io, const DataEdit &edit, void *n, int kind) {
182
  RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
183
  switch (edit.descriptor) {
184
  case DataEdit::ListDirected:
185
    if (IsNamelistNameOrSlash(io)) {
186
      return false;
187
    }
188
    break;
189
  case 'G':
190
  case 'I':
191
    break;
192
  case 'B':
193
    return EditBOZInput<1>(io, edit, n, kind);
194
  case 'O':
195
    return EditBOZInput<3>(io, edit, n, kind);
196
  case 'Z':
197
    return EditBOZInput<4>(io, edit, n, kind);
198
  case 'A': // legacy extension
199
    return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
200
  default:
201
    io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
202
        "Data edit descriptor '%c' may not be used with an INTEGER data item",
203
        edit.descriptor);
204
    return false;
205
  }
206
  Fortran::common::optional<int> remaining;
207
  Fortran::common::optional<char32_t> next;
208
  char sign{ScanNumericPrefix(io, edit, next, remaining)};
209
  common::UnsignedInt128 value{0};
210
  bool any{!!sign};
211
  bool overflow{false};
212
  for (; next; next = io.NextInField(remaining, edit)) {
213
    char32_t ch{*next};
214
    if (ch == ' ' || ch == '\t') {
215
      if (edit.modes.editingFlags & blankZero) {
216
        ch = '0'; // BZ mode - treat blank as if it were zero
217
      } else {
218
        continue;
219
      }
220
    }
221
    int digit{0};
222
    if (ch >= '0' && ch <= '9') {
223
      digit = ch - '0';
224
    } else if (ch == ',') {
225
      break; // end non-list-directed field early
226
    } else {
227
      io.GetIoErrorHandler().SignalError(
228
          "Bad character '%lc' in INTEGER input field", ch);
229
      return false;
230
    }
231
    static constexpr auto maxu128{~common::UnsignedInt128{0}};
232
    static constexpr auto maxu128OverTen{maxu128 / 10};
233
    static constexpr int maxLastDigit{
234
        static_cast<int>(maxu128 - (maxu128OverTen * 10))};
235
    overflow |= value >= maxu128OverTen &&
236
        (value > maxu128OverTen || digit > maxLastDigit);
237
    value *= 10;
238
    value += digit;
239
    any = true;
240
  }
241
  if (!any && !remaining) {
242
    io.GetIoErrorHandler().SignalError(
243
        "Integer value absent from NAMELIST or list-directed input");
244
    return false;
245
  }
246
  auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
247
  overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
248
  if (overflow) {
249
    io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
250
        "Decimal input overflows INTEGER(%d) variable", kind);
251
    return false;
252
  }
253
  if (sign == '-') {
254
    value = -value;
255
  }
256
  if (any || !io.GetIoErrorHandler().InError()) {
257
    // The value is stored in the lower order bits on big endian platform.
258
    // When memcpy, shift the value to the higher order bit.
259
    auto shft{static_cast<int>(sizeof(value.low())) - kind};
260
    // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
261
    if (!isHostLittleEndian && shft >= 0) {
262
      auto l{value.low() << (8 * shft)};
263
      std::memcpy(n, &l, kind);
264
    } else {
265
      std::memcpy(n, &value, kind); // a blank field means zero
266
    }
267
    return true;
268
  } else {
269
    return false;
270
  }
271
}
272

273
// Parses a REAL input number from the input source as a normalized
274
// fraction into a supplied buffer -- there's an optional '-', a
275
// decimal point when the input is not hexadecimal, and at least one
276
// digit.  Replaces blanks with zeroes where appropriate.
277
struct ScannedRealInput {
278
  // Number of characters that (should) have been written to the
279
  // buffer -- this can be larger than the buffer size, which
280
  // indicates buffer overflow.  Zero indicates an error.
281
  int got{0};
282
  int exponent{0}; // adjusted as necessary; binary if isHexadecimal
283
  bool isHexadecimal{false}; // 0X...
284
};
285
static RT_API_ATTRS ScannedRealInput ScanRealInput(
286
    char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
287
  Fortran::common::optional<int> remaining;
288
  Fortran::common::optional<char32_t> next;
289
  int got{0};
290
  Fortran::common::optional<int> radixPointOffset;
291
  // The following lambda definition violates the conding style,
292
  // but cuda-11.8 nvcc hits an internal error with the brace initialization.
293
  auto Put = [&](char ch) -> void {
294
    if (got < bufferSize) {
295
      buffer[got] = ch;
296
    }
297
    ++got;
298
  };
299
  char sign{ScanNumericPrefix(io, edit, next, remaining)};
300
  if (sign == '-') {
301
    Put('-');
302
  }
303
  bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
304
  int exponent{0};
305
  if (!next || (!bzMode && *next == ' ') ||
306
      (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
307
    if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
308
      // An empty/blank field means zero when not list-directed.
309
      // A fixed-width field containing only a sign is also zero;
310
      // this behavior isn't standard-conforming in F'2023 but it is
311
      // required to pass FCVS.
312
      Put('0');
313
    }
314
    return {got, exponent, false};
315
  }
316
  char32_t radixPointChar{GetRadixPointChar(edit)};
317
  char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
318
  bool isHexadecimal{false};
319
  if (first == 'N' || first == 'I') {
320
    // NaN or infinity - convert to upper case
321
    // Subtle: a blank field of digits could be followed by 'E' or 'D',
322
    for (; next &&
323
         ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
324
         next = io.NextInField(remaining, edit)) {
325
      if (*next >= 'a' && *next <= 'z') {
326
        Put(*next - 'a' + 'A');
327
      } else {
328
        Put(*next);
329
      }
330
    }
331
    if (next && *next == '(') { // NaN(...)
332
      Put('(');
333
      int depth{1};
334
      while (true) {
335
        next = io.NextInField(remaining, edit);
336
        if (depth == 0) {
337
          break;
338
        } else if (!next) {
339
          return {}; // error
340
        } else if (*next == '(') {
341
          ++depth;
342
        } else if (*next == ')') {
343
          --depth;
344
        }
345
        Put(*next);
346
      }
347
    }
348
  } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
349
      (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
350
      first == 'D' || first == 'Q') {
351
    if (first == '0') {
352
      next = io.NextInField(remaining, edit);
353
      if (next && (*next == 'x' || *next == 'X')) { // 0X...
354
        isHexadecimal = true;
355
        next = io.NextInField(remaining, edit);
356
      } else {
357
        Put('0');
358
      }
359
    }
360
    // input field is normalized to a fraction
361
    if (!isHexadecimal) {
362
      Put('.');
363
    }
364
    auto start{got};
365
    for (; next; next = io.NextInField(remaining, edit)) {
366
      char32_t ch{*next};
367
      if (ch == ' ' || ch == '\t') {
368
        if (isHexadecimal) {
369
          return {}; // error
370
        } else if (bzMode) {
371
          ch = '0'; // BZ mode - treat blank as if it were zero
372
        } else {
373
          continue; // ignore blank in fixed field
374
        }
375
      }
376
      if (ch == '0' && got == start && !radixPointOffset) {
377
        // omit leading zeroes before the radix point
378
      } else if (ch >= '0' && ch <= '9') {
379
        Put(ch);
380
      } else if (ch == radixPointChar && !radixPointOffset) {
381
        // The radix point character is *not* copied to the buffer.
382
        radixPointOffset = got - start; // # of digits before the radix point
383
      } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
384
        Put(ch);
385
      } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
386
        Put(ch - 'a' + 'A'); // normalize to capitals
387
      } else {
388
        break;
389
      }
390
    }
391
    if (got == start) {
392
      // Nothing but zeroes and maybe a radix point.  F'2018 requires
393
      // at least one digit, but F'77 did not, and a bare "." shows up in
394
      // the FCVS suite.
395
      Put('0'); // emit at least one digit
396
    }
397
    // In list-directed input, a bad exponent is not consumed.
398
    auto nextBeforeExponent{next};
399
    auto startExponent{io.GetConnectionState().positionInRecord};
400
    bool hasGoodExponent{false};
401
    if (next) {
402
      if (isHexadecimal) {
403
        if (*next == 'p' || *next == 'P') {
404
          next = io.NextInField(remaining, edit);
405
        } else {
406
          // The binary exponent is not optional in the standard.
407
          return {}; // error
408
        }
409
      } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
410
          *next == 'q' || *next == 'Q') {
411
        // Optional exponent letter.  Blanks are allowed between the
412
        // optional exponent letter and the exponent value.
413
        io.SkipSpaces(remaining);
414
        next = io.NextInField(remaining, edit);
415
      }
416
    }
417
    if (next &&
418
        (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
419
            *next == ' ' || *next == '\t')) {
420
      bool negExpo{*next == '-'};
421
      if (negExpo || *next == '+') {
422
        next = io.NextInField(remaining, edit);
423
      }
424
      for (; next; next = io.NextInField(remaining, edit)) {
425
        if (*next >= '0' && *next <= '9') {
426
          hasGoodExponent = true;
427
          if (exponent < 10000) {
428
            exponent = 10 * exponent + *next - '0';
429
          }
430
        } else if (*next == ' ' || *next == '\t') {
431
          if (isHexadecimal) {
432
            break;
433
          } else if (bzMode) {
434
            hasGoodExponent = true;
435
            exponent = 10 * exponent;
436
          }
437
        } else {
438
          break;
439
        }
440
      }
441
      if (negExpo) {
442
        exponent = -exponent;
443
      }
444
    }
445
    if (!hasGoodExponent) {
446
      if (isHexadecimal) {
447
        return {}; // error
448
      }
449
      // There isn't a good exponent; do not consume it.
450
      next = nextBeforeExponent;
451
      io.HandleAbsolutePosition(startExponent);
452
      // The default exponent is -kP, but the scale factor doesn't affect
453
      // an explicit exponent.
454
      exponent = -edit.modes.scale;
455
    }
456
    // Adjust exponent by number of digits before the radix point.
457
    if (isHexadecimal) {
458
      // Exponents for hexadecimal input are binary.
459
      exponent += radixPointOffset.value_or(got - start) * 4;
460
    } else if (radixPointOffset) {
461
      exponent += *radixPointOffset;
462
    } else {
463
      // When no redix point (or comma) appears in the value, the 'd'
464
      // part of the edit descriptor must be interpreted as the number of
465
      // digits in the value to be interpreted as being to the *right* of
466
      // the assumed radix point (13.7.2.3.2)
467
      exponent += got - start - edit.digits.value_or(0);
468
    }
469
  }
470
  // Consume the trailing ')' of a list-directed or NAMELIST complex
471
  // input value.
472
  if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
473
    if (next && (*next == ' ' || *next == '\t')) {
474
      io.SkipSpaces(remaining);
475
      next = io.NextInField(remaining, edit);
476
    }
477
    if (!next) { // NextInField fails on separators like ')'
478
      std::size_t byteCount{0};
479
      next = io.GetCurrentChar(byteCount);
480
      if (next && *next == ')') {
481
        io.HandleRelativePosition(byteCount);
482
      }
483
    }
484
  } else if (remaining) {
485
    while (next && (*next == ' ' || *next == '\t')) {
486
      next = io.NextInField(remaining, edit);
487
    }
488
    if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
489
      return {}; // error: unused nonblank character in fixed-width field
490
    }
491
  }
492
  return {got, exponent, isHexadecimal};
493
}
494

495
static RT_API_ATTRS void RaiseFPExceptions(
496
    decimal::ConversionResultFlags flags) {
497
#undef RAISE
498
#if defined(RT_DEVICE_COMPILATION)
499
  Terminator terminator(__FILE__, __LINE__);
500
#define RAISE(e) \
501
  terminator.Crash( \
502
      "not implemented yet: raising FP exception in device code: %s", #e);
503
#else // !defined(RT_DEVICE_COMPILATION)
504
#ifdef feraisexcept // a macro in some environments; omit std::
505
#define RAISE feraiseexcept
506
#else
507
#define RAISE std::feraiseexcept
508
#endif
509
#endif // !defined(RT_DEVICE_COMPILATION)
510
  if (flags & decimal::ConversionResultFlags::Overflow) {
511
    RAISE(FE_OVERFLOW);
512
  }
513
  if (flags & decimal::ConversionResultFlags::Underflow) {
514
    RAISE(FE_UNDERFLOW);
515
  }
516
  if (flags & decimal::ConversionResultFlags::Inexact) {
517
    RAISE(FE_INEXACT);
518
  }
519
  if (flags & decimal::ConversionResultFlags::Invalid) {
520
    RAISE(FE_INVALID);
521
  }
522
#undef RAISE
523
}
524

525
// If no special modes are in effect and the form of the input value
526
// that's present in the input stream is acceptable to the decimal->binary
527
// converter without modification, this fast path for real input
528
// saves time by avoiding memory copies and reformatting of the exponent.
529
template <int PRECISION>
530
static RT_API_ATTRS bool TryFastPathRealDecimalInput(
531
    IoStatementState &io, const DataEdit &edit, void *n) {
532
  if (edit.modes.editingFlags & (blankZero | decimalComma)) {
533
    return false;
534
  }
535
  if (edit.modes.scale != 0) {
536
    return false;
537
  }
538
  const ConnectionState &connection{io.GetConnectionState()};
539
  if (connection.internalIoCharKind > 1) {
540
    return false; // reading non-default character
541
  }
542
  const char *str{nullptr};
543
  std::size_t got{io.GetNextInputBytes(str)};
544
  if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
545
    return false; // could not access reliably-terminated input stream
546
  }
547
  const char *p{str};
548
  std::int64_t maxConsume{
549
      std::min<std::int64_t>(got, edit.width.value_or(got))};
550
  const char *limit{str + maxConsume};
551
  decimal::ConversionToBinaryResult<PRECISION> converted{
552
      decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
553
  if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
554
    return false;
555
  }
556
  if (edit.digits.value_or(0) != 0) {
557
    // Edit descriptor is Fw.d (or other) with d != 0, which
558
    // implies scaling
559
    const char *q{str};
560
    for (; q < limit; ++q) {
561
      if (*q == '.' || *q == 'n' || *q == 'N') {
562
        break;
563
      }
564
    }
565
    if (q == limit) {
566
      // No explicit decimal point, and not NaN/Inf.
567
      return false;
568
    }
569
  }
570
  if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
571
    // Need to consume a trailing ')', possibly with leading spaces
572
    for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
573
    }
574
    if (p < limit && *p == ')') {
575
      ++p;
576
    } else {
577
      return false;
578
    }
579
  } else if (edit.IsListDirected()) {
580
    if (p < limit && !IsCharValueSeparator(edit, *p)) {
581
      return false;
582
    }
583
  } else {
584
    for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
585
    }
586
    if (edit.width && p < str + *edit.width) {
587
      return false; // unconverted characters remain in fixed width field
588
    }
589
  }
590
  // Success on the fast path!
591
  *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
592
      converted.binary;
593
  io.HandleRelativePosition(p - str);
594
  // Set FP exception flags
595
  if (converted.flags != decimal::ConversionResultFlags::Exact) {
596
    RaiseFPExceptions(converted.flags);
597
  }
598
  return true;
599
}
600

601
template <int binaryPrecision>
602
RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
603
ConvertHexadecimal(
604
    const char *&p, enum decimal::FortranRounding rounding, int expo) {
605
  using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
606
  using RawType = typename RealType::RawType;
607
  bool isNegative{*p == '-'};
608
  constexpr RawType one{1};
609
  RawType signBit{0};
610
  if (isNegative) {
611
    ++p;
612
    signBit = one << (RealType::bits - 1);
613
  }
614
  RawType fraction{0};
615
  // Adjust the incoming binary P+/- exponent to shift the radix point
616
  // to below the LSB and add in the bias.
617
  expo += binaryPrecision - 1 + RealType::exponentBias;
618
  // Input the fraction.
619
  int roundingBit{0};
620
  int guardBit{0};
621
  for (; *p; ++p) {
622
    fraction <<= 4;
623
    expo -= 4;
624
    if (*p >= '0' && *p <= '9') {
625
      fraction |= *p - '0';
626
    } else if (*p >= 'A' && *p <= 'F') {
627
      fraction |= *p - 'A' + 10; // data were normalized to capitals
628
    } else {
629
      break;
630
    }
631
    if (fraction >> binaryPrecision) {
632
      while (fraction >> binaryPrecision) {
633
        guardBit |= roundingBit;
634
        roundingBit = (int)fraction & 1;
635
        fraction >>= 1;
636
        ++expo;
637
      }
638
      // Consume excess digits
639
      while (*++p) {
640
        if (*p == '0') {
641
        } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) {
642
          guardBit = 1;
643
        } else {
644
          break;
645
        }
646
      }
647
      break;
648
    }
649
  }
650
  if (fraction) {
651
    // Boost biased expo if too small
652
    while (expo < 1) {
653
      guardBit |= roundingBit;
654
      roundingBit = (int)fraction & 1;
655
      fraction >>= 1;
656
      ++expo;
657
    }
658
    // Normalize
659
    while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
660
      fraction <<= 1;
661
      --expo;
662
      guardBit = roundingBit = 0;
663
    }
664
  }
665
  // Rounding
666
  bool increase{false};
667
  switch (rounding) {
668
  case decimal::RoundNearest: // RN & RP
669
    increase = roundingBit && (guardBit | ((int)fraction & 1));
670
    break;
671
  case decimal::RoundUp: // RU
672
    increase = !isNegative && (roundingBit | guardBit);
673
    break;
674
  case decimal::RoundDown: // RD
675
    increase = isNegative && (roundingBit | guardBit);
676
    break;
677
  case decimal::RoundToZero: // RZ
678
    break;
679
  case decimal::RoundCompatible: // RC
680
    increase = roundingBit != 0;
681
    break;
682
  }
683
  if (increase) {
684
    ++fraction;
685
    if (fraction >> binaryPrecision) {
686
      fraction >>= 1;
687
      ++expo;
688
    }
689
  }
690
  // Package & return result
691
  constexpr RawType significandMask{(one << RealType::significandBits) - 1};
692
  int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
693
  if (!fraction) {
694
    expo = 0;
695
  } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
696
    expo = 0; // subnormal
697
    flags |= decimal::Underflow;
698
  } else if (expo >= RealType::maxExponent) {
699
    if (rounding == decimal::RoundToZero ||
700
        (rounding == decimal::RoundDown && !isNegative) ||
701
        (rounding == decimal::RoundUp && isNegative)) {
702
      expo = RealType::maxExponent - 1; // +/-HUGE()
703
      fraction = significandMask;
704
    } else {
705
      expo = RealType::maxExponent; // +/-Inf
706
      fraction = 0;
707
      flags |= decimal::Overflow;
708
    }
709
  } else {
710
    fraction &= significandMask; // remove explicit normalization unless x87
711
  }
712
  return decimal::ConversionToBinaryResult<binaryPrecision>{
713
      RealType{static_cast<RawType>(signBit |
714
          static_cast<RawType>(expo) << RealType::significandBits | fraction)},
715
      static_cast<decimal::ConversionResultFlags>(flags)};
716
}
717

718
template <int KIND>
719
RT_API_ATTRS bool EditCommonRealInput(
720
    IoStatementState &io, const DataEdit &edit, void *n) {
721
  constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
722
  if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
723
    return CheckCompleteListDirectedField(io, edit);
724
  }
725
  // Fast path wasn't available or didn't work; go the more general route
726
  static constexpr int maxDigits{
727
      common::MaxDecimalConversionDigits(binaryPrecision)};
728
  static constexpr int bufferSize{maxDigits + 18};
729
  char buffer[bufferSize];
730
  auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
731
  int got{scanned.got};
732
  if (got >= maxDigits + 2) {
733
    io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
734
    return false;
735
  }
736
  if (got == 0) {
737
    const auto &connection{io.GetConnectionState()};
738
    io.GetIoErrorHandler().SignalError(IostatBadRealInput,
739
        "Bad real input data at column %d of record %d",
740
        static_cast<int>(connection.positionInRecord + 1),
741
        static_cast<int>(connection.currentRecordNumber));
742
    return false;
743
  }
744
  decimal::ConversionToBinaryResult<binaryPrecision> converted;
745
  const char *p{buffer};
746
  if (scanned.isHexadecimal) {
747
    buffer[got] = '\0';
748
    converted = ConvertHexadecimal<binaryPrecision>(
749
        p, edit.modes.round, scanned.exponent);
750
  } else {
751
    bool hadExtra{got > maxDigits};
752
    int exponent{scanned.exponent};
753
    if (exponent != 0) {
754
      buffer[got++] = 'e';
755
      if (exponent < 0) {
756
        buffer[got++] = '-';
757
        exponent = -exponent;
758
      }
759
      if (exponent > 9999) {
760
        exponent = 9999; // will convert to +/-Inf
761
      }
762
      if (exponent > 999) {
763
        int dig{exponent / 1000};
764
        buffer[got++] = '0' + dig;
765
        int rest{exponent - 1000 * dig};
766
        dig = rest / 100;
767
        buffer[got++] = '0' + dig;
768
        rest -= 100 * dig;
769
        dig = rest / 10;
770
        buffer[got++] = '0' + dig;
771
        buffer[got++] = '0' + (rest - 10 * dig);
772
      } else if (exponent > 99) {
773
        int dig{exponent / 100};
774
        buffer[got++] = '0' + dig;
775
        int rest{exponent - 100 * dig};
776
        dig = rest / 10;
777
        buffer[got++] = '0' + dig;
778
        buffer[got++] = '0' + (rest - 10 * dig);
779
      } else if (exponent > 9) {
780
        int dig{exponent / 10};
781
        buffer[got++] = '0' + dig;
782
        buffer[got++] = '0' + (exponent - 10 * dig);
783
      } else {
784
        buffer[got++] = '0' + exponent;
785
      }
786
    }
787
    buffer[got] = '\0';
788
    converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
789
    if (hadExtra) {
790
      converted.flags = static_cast<enum decimal::ConversionResultFlags>(
791
          converted.flags | decimal::Inexact);
792
    }
793
  }
794
  if (*p) { // unprocessed junk after value
795
    const auto &connection{io.GetConnectionState()};
796
    io.GetIoErrorHandler().SignalError(IostatBadRealInput,
797
        "Trailing characters after real input data at column %d of record %d",
798
        static_cast<int>(connection.positionInRecord + 1),
799
        static_cast<int>(connection.currentRecordNumber));
800
    return false;
801
  }
802
  *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
803
      converted.binary;
804
  // Set FP exception flags
805
  if (converted.flags != decimal::ConversionResultFlags::Exact) {
806
    if (converted.flags & decimal::ConversionResultFlags::Overflow) {
807
      io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
808
      return false;
809
    }
810
    RaiseFPExceptions(converted.flags);
811
  }
812
  return CheckCompleteListDirectedField(io, edit);
813
}
814

815
template <int KIND>
816
RT_API_ATTRS bool EditRealInput(
817
    IoStatementState &io, const DataEdit &edit, void *n) {
818
  switch (edit.descriptor) {
819
  case DataEdit::ListDirected:
820
    if (IsNamelistNameOrSlash(io)) {
821
      return false;
822
    }
823
    return EditCommonRealInput<KIND>(io, edit, n);
824
  case DataEdit::ListDirectedRealPart:
825
  case DataEdit::ListDirectedImaginaryPart:
826
  case 'F':
827
  case 'E': // incl. EN, ES, & EX
828
  case 'D':
829
  case 'G':
830
    return EditCommonRealInput<KIND>(io, edit, n);
831
  case 'B':
832
    return EditBOZInput<1>(io, edit, n,
833
        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
834
  case 'O':
835
    return EditBOZInput<3>(io, edit, n,
836
        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
837
  case 'Z':
838
    return EditBOZInput<4>(io, edit, n,
839
        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
840
  case 'A': // legacy extension
841
    return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
842
  default:
843
    io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
844
        "Data edit descriptor '%c' may not be used for REAL input",
845
        edit.descriptor);
846
    return false;
847
  }
848
}
849

850
// 13.7.3 in Fortran 2018
851
RT_API_ATTRS bool EditLogicalInput(
852
    IoStatementState &io, const DataEdit &edit, bool &x) {
853
  switch (edit.descriptor) {
854
  case DataEdit::ListDirected:
855
    if (IsNamelistNameOrSlash(io)) {
856
      return false;
857
    }
858
    break;
859
  case 'L':
860
  case 'G':
861
    break;
862
  default:
863
    io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
864
        "Data edit descriptor '%c' may not be used for LOGICAL input",
865
        edit.descriptor);
866
    return false;
867
  }
868
  Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
869
  Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
870
  if (next && *next == '.') { // skip optional period
871
    next = io.NextInField(remaining, edit);
872
  }
873
  if (!next) {
874
    io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
875
    return false;
876
  }
877
  switch (*next) {
878
  case 'T':
879
  case 't':
880
    x = true;
881
    break;
882
  case 'F':
883
  case 'f':
884
    x = false;
885
    break;
886
  default:
887
    io.GetIoErrorHandler().SignalError(
888
        "Bad character '%lc' in LOGICAL input field", *next);
889
    return false;
890
  }
891
  if (remaining) { // ignore the rest of a fixed-width field
892
    io.HandleRelativePosition(*remaining);
893
  } else if (edit.descriptor == DataEdit::ListDirected) {
894
    while (io.NextInField(remaining, edit)) { // discard rest of field
895
    }
896
  }
897
  return CheckCompleteListDirectedField(io, edit);
898
}
899

900
// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
901
template <typename CHAR>
902
static RT_API_ATTRS bool EditDelimitedCharacterInput(
903
    IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
904
  bool result{true};
905
  while (true) {
906
    std::size_t byteCount{0};
907
    auto ch{io.GetCurrentChar(byteCount)};
908
    if (!ch) {
909
      if (io.AdvanceRecord()) {
910
        continue;
911
      } else {
912
        result = false; // EOF in character value
913
        break;
914
      }
915
    }
916
    io.HandleRelativePosition(byteCount);
917
    if (*ch == delimiter) {
918
      auto next{io.GetCurrentChar(byteCount)};
919
      if (next && *next == delimiter) {
920
        // Repeated delimiter: use as character value
921
        io.HandleRelativePosition(byteCount);
922
      } else {
923
        break; // closing delimiter
924
      }
925
    }
926
    if (length > 0) {
927
      *x++ = *ch;
928
      --length;
929
    }
930
  }
931
  Fortran::runtime::fill_n(x, length, ' ');
932
  return result;
933
}
934

935
template <typename CHAR>
936
static RT_API_ATTRS bool EditListDirectedCharacterInput(
937
    IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
938
  std::size_t byteCount{0};
939
  auto ch{io.GetCurrentChar(byteCount)};
940
  if (ch && (*ch == '\'' || *ch == '"')) {
941
    io.HandleRelativePosition(byteCount);
942
    return EditDelimitedCharacterInput(io, x, length, *ch);
943
  }
944
  if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
945
    return false;
946
  }
947
  // Undelimited list-directed character input: stop at a value separator
948
  // or the end of the current record.  Subtlety: the "remaining" count
949
  // here is a dummy that's used to avoid the interpretation of separators
950
  // in NextInField.
951
  Fortran::common::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
952
  while (Fortran::common::optional<char32_t> next{
953
      io.NextInField(remaining, edit)}) {
954
    bool isSep{false};
955
    switch (*next) {
956
    case ' ':
957
    case '\t':
958
    case '/':
959
      isSep = true;
960
      break;
961
    case '&':
962
    case '$':
963
      isSep = edit.IsNamelist();
964
      break;
965
    case ',':
966
      isSep = !(edit.modes.editingFlags & decimalComma);
967
      break;
968
    case ';':
969
      isSep = !!(edit.modes.editingFlags & decimalComma);
970
      break;
971
    default:
972
      break;
973
    }
974
    if (isSep) {
975
      remaining = 0;
976
    } else {
977
      *x++ = *next;
978
      remaining = --length > 0 ? maxUTF8Bytes : 0;
979
    }
980
  }
981
  Fortran::runtime::fill_n(x, length, ' ');
982
  return true;
983
}
984

985
template <typename CHAR>
986
RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
987
    CHAR *x, std::size_t lengthChars) {
988
  switch (edit.descriptor) {
989
  case DataEdit::ListDirected:
990
    return EditListDirectedCharacterInput(io, x, lengthChars, edit);
991
  case 'A':
992
  case 'G':
993
    break;
994
  case 'B':
995
    return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
996
  case 'O':
997
    return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
998
  case 'Z':
999
    return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
1000
  default:
1001
    io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1002
        "Data edit descriptor '%c' may not be used with a CHARACTER data item",
1003
        edit.descriptor);
1004
    return false;
1005
  }
1006
  const ConnectionState &connection{io.GetConnectionState()};
1007
  std::size_t remainingChars{lengthChars};
1008
  // Skip leading characters.
1009
  // Their bytes don't count towards INQUIRE(IOLENGTH=).
1010
  std::size_t skipChars{0};
1011
  if (edit.width && *edit.width > 0) {
1012
    remainingChars = *edit.width;
1013
    if (remainingChars > lengthChars) {
1014
      skipChars = remainingChars - lengthChars;
1015
    }
1016
  }
1017
  // When the field is wider than the variable, we drop the leading
1018
  // characters.  When the variable is wider than the field, there can be
1019
  // trailing padding or an EOR condition.
1020
  const char *input{nullptr};
1021
  std::size_t readyBytes{0};
1022
  // Transfer payload bytes; these do count.
1023
  while (remainingChars > 0) {
1024
    if (readyBytes == 0) {
1025
      readyBytes = io.GetNextInputBytes(input);
1026
      if (readyBytes == 0 ||
1027
          (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1028
        if (io.CheckForEndOfRecord(readyBytes)) {
1029
          if (readyBytes == 0) {
1030
            // PAD='YES' and no more data
1031
            Fortran::runtime::fill_n(x, lengthChars, ' ');
1032
            return !io.GetIoErrorHandler().InError();
1033
          } else {
1034
            // Do partial read(s) then pad on last iteration
1035
          }
1036
        } else {
1037
          return !io.GetIoErrorHandler().InError();
1038
        }
1039
      }
1040
    }
1041
    std::size_t chunkBytes;
1042
    std::size_t chunkChars{1};
1043
    bool skipping{skipChars > 0};
1044
    if (connection.isUTF8) {
1045
      chunkBytes = MeasureUTF8Bytes(*input);
1046
      if (skipping) {
1047
        --skipChars;
1048
      } else if (auto ucs{DecodeUTF8(input)}) {
1049
        if ((sizeof *x == 1 && *ucs > 0xff) ||
1050
            (sizeof *x == 2 && *ucs > 0xffff)) {
1051
          *x++ = '?';
1052
        } else {
1053
          *x++ = *ucs;
1054
        }
1055
        --lengthChars;
1056
      } else if (chunkBytes == 0) {
1057
        // error recovery: skip bad encoding
1058
        chunkBytes = 1;
1059
      }
1060
    } else if (connection.internalIoCharKind > 1) {
1061
      // Reading from non-default character internal unit
1062
      chunkBytes = connection.internalIoCharKind;
1063
      if (skipping) {
1064
        --skipChars;
1065
      } else {
1066
        char32_t buffer{0};
1067
        std::memcpy(&buffer, input, chunkBytes);
1068
        if ((sizeof *x == 1 && buffer > 0xff) ||
1069
            (sizeof *x == 2 && buffer > 0xffff)) {
1070
          *x++ = '?';
1071
        } else {
1072
          *x++ = buffer;
1073
        }
1074
        --lengthChars;
1075
      }
1076
    } else if constexpr (sizeof *x > 1) {
1077
      // Read single byte with expansion into multi-byte CHARACTER
1078
      chunkBytes = 1;
1079
      if (skipping) {
1080
        --skipChars;
1081
      } else {
1082
        *x++ = static_cast<unsigned char>(*input);
1083
        --lengthChars;
1084
      }
1085
    } else { // single bytes -> default CHARACTER
1086
      if (skipping) {
1087
        chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
1088
        chunkChars = chunkBytes;
1089
        skipChars -= chunkChars;
1090
      } else {
1091
        chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
1092
        chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
1093
        chunkChars = chunkBytes;
1094
        std::memcpy(x, input, chunkBytes);
1095
        x += chunkBytes;
1096
        lengthChars -= chunkChars;
1097
      }
1098
    }
1099
    input += chunkBytes;
1100
    remainingChars -= chunkChars;
1101
    if (!skipping) {
1102
      io.GotChar(chunkBytes);
1103
    }
1104
    io.HandleRelativePosition(chunkBytes);
1105
    readyBytes -= chunkBytes;
1106
  }
1107
  // Pad the remainder of the input variable, if any.
1108
  Fortran::runtime::fill_n(x, lengthChars, ' ');
1109
  return CheckCompleteListDirectedField(io, edit);
1110
}
1111

1112
template RT_API_ATTRS bool EditRealInput<2>(
1113
    IoStatementState &, const DataEdit &, void *);
1114
template RT_API_ATTRS bool EditRealInput<3>(
1115
    IoStatementState &, const DataEdit &, void *);
1116
template RT_API_ATTRS bool EditRealInput<4>(
1117
    IoStatementState &, const DataEdit &, void *);
1118
template RT_API_ATTRS bool EditRealInput<8>(
1119
    IoStatementState &, const DataEdit &, void *);
1120
template RT_API_ATTRS bool EditRealInput<10>(
1121
    IoStatementState &, const DataEdit &, void *);
1122
// TODO: double/double
1123
template RT_API_ATTRS bool EditRealInput<16>(
1124
    IoStatementState &, const DataEdit &, void *);
1125

1126
template RT_API_ATTRS bool EditCharacterInput(
1127
    IoStatementState &, const DataEdit &, char *, std::size_t);
1128
template RT_API_ATTRS bool EditCharacterInput(
1129
    IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1130
template RT_API_ATTRS bool EditCharacterInput(
1131
    IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1132

1133
RT_OFFLOAD_API_GROUP_END
1134
} // namespace Fortran::runtime::io
1135
llvm-project

Использование cookies