qemu

Форк
0
/
softfloat-parts.c.inc 
1626 строк · 45.7 Кб
1
/*
2
 * QEMU float support
3
 *
4
 * The code in this source file is derived from release 2a of the SoftFloat
5
 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6
 * some later contributions) are provided under that license, as detailed below.
7
 * It has subsequently been modified by contributors to the QEMU Project,
8
 * so some portions are provided under:
9
 *  the SoftFloat-2a license
10
 *  the BSD license
11
 *  GPL-v2-or-later
12
 *
13
 * Any future contributions to this file after December 1st 2014 will be
14
 * taken to be licensed under the Softfloat-2a license unless specifically
15
 * indicated otherwise.
16
 */
17

18
static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19
{
20
    switch (a->cls) {
21
    case float_class_snan:
22
        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23
        if (s->default_nan_mode) {
24
            parts_default_nan(a, s);
25
        } else {
26
            parts_silence_nan(a, s);
27
        }
28
        break;
29
    case float_class_qnan:
30
        if (s->default_nan_mode) {
31
            parts_default_nan(a, s);
32
        }
33
        break;
34
    default:
35
        g_assert_not_reached();
36
    }
37
}
38

39
static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40
                                     float_status *s)
41
{
42
    if (is_snan(a->cls) || is_snan(b->cls)) {
43
        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
44
    }
45

46
    if (s->default_nan_mode) {
47
        parts_default_nan(a, s);
48
    } else {
49
        int cmp = frac_cmp(a, b);
50
        if (cmp == 0) {
51
            cmp = a->sign < b->sign;
52
        }
53

54
        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55
            a = b;
56
        }
57
        if (is_snan(a->cls)) {
58
            parts_silence_nan(a, s);
59
        }
60
    }
61
    return a;
62
}
63

64
static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65
                                            FloatPartsN *c, float_status *s,
66
                                            int ab_mask, int abc_mask)
67
{
68
    int which;
69

70
    if (unlikely(abc_mask & float_cmask_snan)) {
71
        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
72
    }
73

74
    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75
                          ab_mask == float_cmask_infzero, s);
76

77
    if (s->default_nan_mode || which == 3) {
78
        /*
79
         * Note that this check is after pickNaNMulAdd so that function
80
         * has an opportunity to set the Invalid flag for infzero.
81
         */
82
        parts_default_nan(a, s);
83
        return a;
84
    }
85

86
    switch (which) {
87
    case 0:
88
        break;
89
    case 1:
90
        a = b;
91
        break;
92
    case 2:
93
        a = c;
94
        break;
95
    default:
96
        g_assert_not_reached();
97
    }
98
    if (is_snan(a->cls)) {
99
        parts_silence_nan(a, s);
100
    }
101
    return a;
102
}
103

104
/*
105
 * Canonicalize the FloatParts structure.  Determine the class,
106
 * unbias the exponent, and normalize the fraction.
107
 */
108
static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109
                                 const FloatFmt *fmt)
110
{
111
    if (unlikely(p->exp == 0)) {
112
        if (likely(frac_eqz(p))) {
113
            p->cls = float_class_zero;
114
        } else if (status->flush_inputs_to_zero) {
115
            float_raise(float_flag_input_denormal, status);
116
            p->cls = float_class_zero;
117
            frac_clear(p);
118
        } else {
119
            int shift = frac_normalize(p);
120
            p->cls = float_class_normal;
121
            p->exp = fmt->frac_shift - fmt->exp_bias
122
                   - shift + !fmt->m68k_denormal;
123
        }
124
    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
125
        p->cls = float_class_normal;
126
        p->exp -= fmt->exp_bias;
127
        frac_shl(p, fmt->frac_shift);
128
        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
129
    } else if (likely(frac_eqz(p))) {
130
        p->cls = float_class_inf;
131
    } else {
132
        frac_shl(p, fmt->frac_shift);
133
        p->cls = (parts_is_snan_frac(p->frac_hi, status)
134
                  ? float_class_snan : float_class_qnan);
135
    }
136
}
137

138
/*
139
 * Round and uncanonicalize a floating-point number by parts. There
140
 * are FRAC_SHIFT bits that may require rounding at the bottom of the
141
 * fraction; these bits will be removed. The exponent will be biased
142
 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
143
 */
144
static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
145
                                   const FloatFmt *fmt)
146
{
147
    const int exp_max = fmt->exp_max;
148
    const int frac_shift = fmt->frac_shift;
149
    const uint64_t round_mask = fmt->round_mask;
150
    const uint64_t frac_lsb = round_mask + 1;
151
    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
152
    const uint64_t roundeven_mask = round_mask | frac_lsb;
153
    uint64_t inc;
154
    bool overflow_norm = false;
155
    int exp, flags = 0;
156

157
    switch (s->float_rounding_mode) {
158
    case float_round_nearest_even:
159
        if (N > 64 && frac_lsb == 0) {
160
            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
161
                   ? frac_lsbm1 : 0);
162
        } else {
163
            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
164
                   ? frac_lsbm1 : 0);
165
        }
166
        break;
167
    case float_round_ties_away:
168
        inc = frac_lsbm1;
169
        break;
170
    case float_round_to_zero:
171
        overflow_norm = true;
172
        inc = 0;
173
        break;
174
    case float_round_up:
175
        inc = p->sign ? 0 : round_mask;
176
        overflow_norm = p->sign;
177
        break;
178
    case float_round_down:
179
        inc = p->sign ? round_mask : 0;
180
        overflow_norm = !p->sign;
181
        break;
182
    case float_round_to_odd:
183
        overflow_norm = true;
184
        /* fall through */
185
    case float_round_to_odd_inf:
186
        if (N > 64 && frac_lsb == 0) {
187
            inc = p->frac_hi & 1 ? 0 : round_mask;
188
        } else {
189
            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
190
        }
191
        break;
192
    default:
193
        g_assert_not_reached();
194
    }
195

196
    exp = p->exp + fmt->exp_bias;
197
    if (likely(exp > 0)) {
198
        if (p->frac_lo & round_mask) {
199
            flags |= float_flag_inexact;
200
            if (frac_addi(p, p, inc)) {
201
                frac_shr(p, 1);
202
                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
203
                exp++;
204
            }
205
            p->frac_lo &= ~round_mask;
206
        }
207

208
        if (fmt->arm_althp) {
209
            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
210
            if (unlikely(exp > exp_max)) {
211
                /* Overflow.  Return the maximum normal.  */
212
                flags = float_flag_invalid;
213
                exp = exp_max;
214
                frac_allones(p);
215
                p->frac_lo &= ~round_mask;
216
            }
217
        } else if (unlikely(exp >= exp_max)) {
218
            flags |= float_flag_overflow;
219
            if (s->rebias_overflow) {
220
                exp -= fmt->exp_re_bias;
221
            } else if (overflow_norm) {
222
                flags |= float_flag_inexact;
223
                exp = exp_max - 1;
224
                frac_allones(p);
225
                p->frac_lo &= ~round_mask;
226
            } else {
227
                flags |= float_flag_inexact;
228
                p->cls = float_class_inf;
229
                exp = exp_max;
230
                frac_clear(p);
231
            }
232
        }
233
        frac_shr(p, frac_shift);
234
    } else if (unlikely(s->rebias_underflow)) {
235
        flags |= float_flag_underflow;
236
        exp += fmt->exp_re_bias;
237
        if (p->frac_lo & round_mask) {
238
            flags |= float_flag_inexact;
239
            if (frac_addi(p, p, inc)) {
240
                frac_shr(p, 1);
241
                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
242
                exp++;
243
            }
244
            p->frac_lo &= ~round_mask;
245
        }
246
        frac_shr(p, frac_shift);
247
    } else if (s->flush_to_zero) {
248
        flags |= float_flag_output_denormal;
249
        p->cls = float_class_zero;
250
        exp = 0;
251
        frac_clear(p);
252
    } else {
253
        bool is_tiny = s->tininess_before_rounding || exp < 0;
254

255
        if (!is_tiny) {
256
            FloatPartsN discard;
257
            is_tiny = !frac_addi(&discard, p, inc);
258
        }
259

260
        frac_shrjam(p, !fmt->m68k_denormal - exp);
261

262
        if (p->frac_lo & round_mask) {
263
            /* Need to recompute round-to-even/round-to-odd. */
264
            switch (s->float_rounding_mode) {
265
            case float_round_nearest_even:
266
                if (N > 64 && frac_lsb == 0) {
267
                    inc = ((p->frac_hi & 1) ||
268
                           (p->frac_lo & round_mask) != frac_lsbm1
269
                           ? frac_lsbm1 : 0);
270
                } else {
271
                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
272
                           ? frac_lsbm1 : 0);
273
                }
274
                break;
275
            case float_round_to_odd:
276
            case float_round_to_odd_inf:
277
                if (N > 64 && frac_lsb == 0) {
278
                    inc = p->frac_hi & 1 ? 0 : round_mask;
279
                } else {
280
                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
281
                }
282
                break;
283
            default:
284
                break;
285
            }
286
            flags |= float_flag_inexact;
287
            frac_addi(p, p, inc);
288
            p->frac_lo &= ~round_mask;
289
        }
290

291
        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
292
        frac_shr(p, frac_shift);
293

294
        if (is_tiny && (flags & float_flag_inexact)) {
295
            flags |= float_flag_underflow;
296
        }
297
        if (exp == 0 && frac_eqz(p)) {
298
            p->cls = float_class_zero;
299
        }
300
    }
301
    p->exp = exp;
302
    float_raise(flags, s);
303
}
304

305
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
306
                            const FloatFmt *fmt)
307
{
308
    if (likely(p->cls == float_class_normal)) {
309
        parts_uncanon_normal(p, s, fmt);
310
    } else {
311
        switch (p->cls) {
312
        case float_class_zero:
313
            p->exp = 0;
314
            frac_clear(p);
315
            return;
316
        case float_class_inf:
317
            g_assert(!fmt->arm_althp);
318
            p->exp = fmt->exp_max;
319
            frac_clear(p);
320
            return;
321
        case float_class_qnan:
322
        case float_class_snan:
323
            g_assert(!fmt->arm_althp);
324
            p->exp = fmt->exp_max;
325
            frac_shr(p, fmt->frac_shift);
326
            return;
327
        default:
328
            break;
329
        }
330
        g_assert_not_reached();
331
    }
332
}
333

334
/*
335
 * Returns the result of adding or subtracting the values of the
336
 * floating-point values `a' and `b'. The operation is performed
337
 * according to the IEC/IEEE Standard for Binary Floating-Point
338
 * Arithmetic.
339
 */
340
static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
341
                                   float_status *s, bool subtract)
342
{
343
    bool b_sign = b->sign ^ subtract;
344
    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
345

346
    if (a->sign != b_sign) {
347
        /* Subtraction */
348
        if (likely(ab_mask == float_cmask_normal)) {
349
            if (parts_sub_normal(a, b)) {
350
                return a;
351
            }
352
            /* Subtract was exact, fall through to set sign. */
353
            ab_mask = float_cmask_zero;
354
        }
355

356
        if (ab_mask == float_cmask_zero) {
357
            a->sign = s->float_rounding_mode == float_round_down;
358
            return a;
359
        }
360

361
        if (unlikely(ab_mask & float_cmask_anynan)) {
362
            goto p_nan;
363
        }
364

365
        if (ab_mask & float_cmask_inf) {
366
            if (a->cls != float_class_inf) {
367
                /* N - Inf */
368
                goto return_b;
369
            }
370
            if (b->cls != float_class_inf) {
371
                /* Inf - N */
372
                return a;
373
            }
374
            /* Inf - Inf */
375
            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
376
            parts_default_nan(a, s);
377
            return a;
378
        }
379
    } else {
380
        /* Addition */
381
        if (likely(ab_mask == float_cmask_normal)) {
382
            parts_add_normal(a, b);
383
            return a;
384
        }
385

386
        if (ab_mask == float_cmask_zero) {
387
            return a;
388
        }
389

390
        if (unlikely(ab_mask & float_cmask_anynan)) {
391
            goto p_nan;
392
        }
393

394
        if (ab_mask & float_cmask_inf) {
395
            a->cls = float_class_inf;
396
            return a;
397
        }
398
    }
399

400
    if (b->cls == float_class_zero) {
401
        g_assert(a->cls == float_class_normal);
402
        return a;
403
    }
404

405
    g_assert(a->cls == float_class_zero);
406
    g_assert(b->cls == float_class_normal);
407
 return_b:
408
    b->sign = b_sign;
409
    return b;
410

411
 p_nan:
412
    return parts_pick_nan(a, b, s);
413
}
414

415
/*
416
 * Returns the result of multiplying the floating-point values `a' and
417
 * `b'. The operation is performed according to the IEC/IEEE Standard
418
 * for Binary Floating-Point Arithmetic.
419
 */
420
static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
421
                                float_status *s)
422
{
423
    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
424
    bool sign = a->sign ^ b->sign;
425

426
    if (likely(ab_mask == float_cmask_normal)) {
427
        FloatPartsW tmp;
428

429
        frac_mulw(&tmp, a, b);
430
        frac_truncjam(a, &tmp);
431

432
        a->exp += b->exp + 1;
433
        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
434
            frac_add(a, a, a);
435
            a->exp -= 1;
436
        }
437

438
        a->sign = sign;
439
        return a;
440
    }
441

442
    /* Inf * Zero == NaN */
443
    if (unlikely(ab_mask == float_cmask_infzero)) {
444
        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
445
        parts_default_nan(a, s);
446
        return a;
447
    }
448

449
    if (unlikely(ab_mask & float_cmask_anynan)) {
450
        return parts_pick_nan(a, b, s);
451
    }
452

453
    /* Multiply by 0 or Inf */
454
    if (ab_mask & float_cmask_inf) {
455
        a->cls = float_class_inf;
456
        a->sign = sign;
457
        return a;
458
    }
459

460
    g_assert(ab_mask & float_cmask_zero);
461
    a->cls = float_class_zero;
462
    a->sign = sign;
463
    return a;
464
}
465

466
/*
467
 * Returns the result of multiplying the floating-point values `a' and
468
 * `b' then adding 'c', with no intermediate rounding step after the
469
 * multiplication. The operation is performed according to the
470
 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
471
 * The flags argument allows the caller to select negation of the
472
 * addend, the intermediate product, or the final result. (The
473
 * difference between this and having the caller do a separate
474
 * negation is that negating externally will flip the sign bit on NaNs.)
475
 *
476
 * Requires A and C extracted into a double-sized structure to provide the
477
 * extra space for the widening multiply.
478
 */
479
static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
480
                                   FloatPartsN *c, int flags, float_status *s)
481
{
482
    int ab_mask, abc_mask;
483
    FloatPartsW p_widen, c_widen;
484

485
    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
486
    abc_mask = float_cmask(c->cls) | ab_mask;
487

488
    /*
489
     * It is implementation-defined whether the cases of (0,inf,qnan)
490
     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
491
     * they return if they do), so we have to hand this information
492
     * off to the target-specific pick-a-NaN routine.
493
     */
494
    if (unlikely(abc_mask & float_cmask_anynan)) {
495
        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
496
    }
497

498
    if (flags & float_muladd_negate_c) {
499
        c->sign ^= 1;
500
    }
501

502
    /* Compute the sign of the product into A. */
503
    a->sign ^= b->sign;
504
    if (flags & float_muladd_negate_product) {
505
        a->sign ^= 1;
506
    }
507

508
    if (unlikely(ab_mask != float_cmask_normal)) {
509
        if (unlikely(ab_mask == float_cmask_infzero)) {
510
            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
511
            goto d_nan;
512
        }
513

514
        if (ab_mask & float_cmask_inf) {
515
            if (c->cls == float_class_inf && a->sign != c->sign) {
516
                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
517
                goto d_nan;
518
            }
519
            goto return_inf;
520
        }
521

522
        g_assert(ab_mask & float_cmask_zero);
523
        if (c->cls == float_class_normal) {
524
            *a = *c;
525
            goto return_normal;
526
        }
527
        if (c->cls == float_class_zero) {
528
            if (a->sign != c->sign) {
529
                goto return_sub_zero;
530
            }
531
            goto return_zero;
532
        }
533
        g_assert(c->cls == float_class_inf);
534
    }
535

536
    if (unlikely(c->cls == float_class_inf)) {
537
        a->sign = c->sign;
538
        goto return_inf;
539
    }
540

541
    /* Perform the multiplication step. */
542
    p_widen.sign = a->sign;
543
    p_widen.exp = a->exp + b->exp + 1;
544
    frac_mulw(&p_widen, a, b);
545
    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
546
        frac_add(&p_widen, &p_widen, &p_widen);
547
        p_widen.exp -= 1;
548
    }
549

550
    /* Perform the addition step. */
551
    if (c->cls != float_class_zero) {
552
        /* Zero-extend C to less significant bits. */
553
        frac_widen(&c_widen, c);
554
        c_widen.exp = c->exp;
555

556
        if (a->sign == c->sign) {
557
            parts_add_normal(&p_widen, &c_widen);
558
        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
559
            goto return_sub_zero;
560
        }
561
    }
562

563
    /* Narrow with sticky bit, for proper rounding later. */
564
    frac_truncjam(a, &p_widen);
565
    a->sign = p_widen.sign;
566
    a->exp = p_widen.exp;
567

568
 return_normal:
569
    if (flags & float_muladd_halve_result) {
570
        a->exp -= 1;
571
    }
572
 finish_sign:
573
    if (flags & float_muladd_negate_result) {
574
        a->sign ^= 1;
575
    }
576
    return a;
577

578
 return_sub_zero:
579
    a->sign = s->float_rounding_mode == float_round_down;
580
 return_zero:
581
    a->cls = float_class_zero;
582
    goto finish_sign;
583

584
 return_inf:
585
    a->cls = float_class_inf;
586
    goto finish_sign;
587

588
 d_nan:
589
    parts_default_nan(a, s);
590
    return a;
591
}
592

593
/*
594
 * Returns the result of dividing the floating-point value `a' by the
595
 * corresponding value `b'. The operation is performed according to
596
 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
597
 */
598
static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
599
                                float_status *s)
600
{
601
    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
602
    bool sign = a->sign ^ b->sign;
603

604
    if (likely(ab_mask == float_cmask_normal)) {
605
        a->sign = sign;
606
        a->exp -= b->exp + frac_div(a, b);
607
        return a;
608
    }
609

610
    /* 0/0 or Inf/Inf => NaN */
611
    if (unlikely(ab_mask == float_cmask_zero)) {
612
        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
613
        goto d_nan;
614
    }
615
    if (unlikely(ab_mask == float_cmask_inf)) {
616
        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
617
        goto d_nan;
618
    }
619

620
    /* All the NaN cases */
621
    if (unlikely(ab_mask & float_cmask_anynan)) {
622
        return parts_pick_nan(a, b, s);
623
    }
624

625
    a->sign = sign;
626

627
    /* Inf / X */
628
    if (a->cls == float_class_inf) {
629
        return a;
630
    }
631

632
    /* 0 / X */
633
    if (a->cls == float_class_zero) {
634
        return a;
635
    }
636

637
    /* X / Inf */
638
    if (b->cls == float_class_inf) {
639
        a->cls = float_class_zero;
640
        return a;
641
    }
642

643
    /* X / 0 => Inf */
644
    g_assert(b->cls == float_class_zero);
645
    float_raise(float_flag_divbyzero, s);
646
    a->cls = float_class_inf;
647
    return a;
648

649
 d_nan:
650
    parts_default_nan(a, s);
651
    return a;
652
}
653

654
/*
655
 * Floating point remainder, per IEC/IEEE, or modulus.
656
 */
657
static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
658
                                   uint64_t *mod_quot, float_status *s)
659
{
660
    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
661

662
    if (likely(ab_mask == float_cmask_normal)) {
663
        frac_modrem(a, b, mod_quot);
664
        return a;
665
    }
666

667
    if (mod_quot) {
668
        *mod_quot = 0;
669
    }
670

671
    /* All the NaN cases */
672
    if (unlikely(ab_mask & float_cmask_anynan)) {
673
        return parts_pick_nan(a, b, s);
674
    }
675

676
    /* Inf % N; N % 0 */
677
    if (a->cls == float_class_inf || b->cls == float_class_zero) {
678
        float_raise(float_flag_invalid, s);
679
        parts_default_nan(a, s);
680
        return a;
681
    }
682

683
    /* N % Inf; 0 % N */
684
    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
685
    return a;
686
}
687

688
/*
689
 * Square Root
690
 *
691
 * The base algorithm is lifted from
692
 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
693
 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
694
 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
695
 * and is thus MIT licenced.
696
 */
697
static void partsN(sqrt)(FloatPartsN *a, float_status *status,
698
                         const FloatFmt *fmt)
699
{
700
    const uint32_t three32 = 3u << 30;
701
    const uint64_t three64 = 3ull << 62;
702
    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
703
    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
704
    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
705
    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
706
    uint64_t discard;
707
    bool exp_odd;
708
    size_t index;
709

710
    if (unlikely(a->cls != float_class_normal)) {
711
        switch (a->cls) {
712
        case float_class_snan:
713
        case float_class_qnan:
714
            parts_return_nan(a, status);
715
            return;
716
        case float_class_zero:
717
            return;
718
        case float_class_inf:
719
            if (unlikely(a->sign)) {
720
                goto d_nan;
721
            }
722
            return;
723
        default:
724
            g_assert_not_reached();
725
        }
726
    }
727

728
    if (unlikely(a->sign)) {
729
        goto d_nan;
730
    }
731

732
    /*
733
     * Argument reduction.
734
     * x = 4^e frac; with integer e, and frac in [1, 4)
735
     * m = frac fixed point at bit 62, since we're in base 4.
736
     * If base-2 exponent is odd, exchange that for multiply by 2,
737
     * which results in no shift.
738
     */
739
    exp_odd = a->exp & 1;
740
    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
741
    if (!exp_odd) {
742
        frac_shr(a, 1);
743
    }
744

745
    /*
746
     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
747
     *
748
     * Initial estimate:
749
     * 7-bit lookup table (1-bit exponent and 6-bit significand).
750
     *
751
     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
752
     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
753
     * a table lookup is faster and needs one less iteration.
754
     * The 7-bit table gives |e| < 0x1.fdp-9.
755
     *
756
     * A Newton-Raphson iteration for r is
757
     *   s = m*r
758
     *   d = s*r
759
     *   u = 3 - d
760
     *   r = r*u/2
761
     *
762
     * Fixed point representations:
763
     *   m, s, d, u, three are all 2.30; r is 0.32
764
     */
765
    m64 = a->frac_hi;
766
    m32 = m64 >> 32;
767

768
    r32 = rsqrt_tab[index] << 16;
769
    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
770

771
    s32 = ((uint64_t)m32 * r32) >> 32;
772
    d32 = ((uint64_t)s32 * r32) >> 32;
773
    u32 = three32 - d32;
774

775
    if (N == 64) {
776
        /* float64 or smaller */
777

778
        r32 = ((uint64_t)r32 * u32) >> 31;
779
        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
780

781
        s32 = ((uint64_t)m32 * r32) >> 32;
782
        d32 = ((uint64_t)s32 * r32) >> 32;
783
        u32 = three32 - d32;
784

785
        if (fmt->frac_size <= 23) {
786
            /* float32 or smaller */
787

788
            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
789
            s32 = (s32 - 1) >> 6;               /* 9.23 */
790
            /* s < sqrt(m) < s + 0x1.08p-23 */
791

792
            /* compute nearest rounded result to 2.23 bits */
793
            uint32_t d0 = (m32 << 16) - s32 * s32;
794
            uint32_t d1 = s32 - d0;
795
            uint32_t d2 = d1 + s32 + 1;
796
            s32 += d1 >> 31;
797
            a->frac_hi = (uint64_t)s32 << (64 - 25);
798

799
            /* increment or decrement for inexact */
800
            if (d2 != 0) {
801
                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
802
            }
803
            goto done;
804
        }
805

806
        /* float64 */
807

808
        r64 = (uint64_t)r32 * u32 * 2;
809
        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
810
        mul64To128(m64, r64, &s64, &discard);
811
        mul64To128(s64, r64, &d64, &discard);
812
        u64 = three64 - d64;
813

814
        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
815
        s64 = (s64 - 2) >> 9;                  /* 12.52 */
816

817
        /* Compute nearest rounded result */
818
        uint64_t d0 = (m64 << 42) - s64 * s64;
819
        uint64_t d1 = s64 - d0;
820
        uint64_t d2 = d1 + s64 + 1;
821
        s64 += d1 >> 63;
822
        a->frac_hi = s64 << (64 - 54);
823

824
        /* increment or decrement for inexact */
825
        if (d2 != 0) {
826
            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
827
        }
828
        goto done;
829
    }
830

831
    r64 = (uint64_t)r32 * u32 * 2;
832
    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
833

834
    mul64To128(m64, r64, &s64, &discard);
835
    mul64To128(s64, r64, &d64, &discard);
836
    u64 = three64 - d64;
837
    mul64To128(u64, r64, &r64, &discard);
838
    r64 <<= 1;
839
    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
840

841
    mul64To128(m64, r64, &s64, &discard);
842
    mul64To128(s64, r64, &d64, &discard);
843
    u64 = three64 - d64;
844
    mul64To128(u64, r64, &rh, &rl);
845
    add128(rh, rl, rh, rl, &rh, &rl);
846
    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
847

848
    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
849
    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
850
    sub128(three64, 0, dh, dl, &uh, &ul);
851
    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
852
    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
853

854
    sub128(sh, sl, 0, 4, &sh, &sl);
855
    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
856
    /* s < sqrt(m) < s + 1ulp */
857

858
    /* Compute nearest rounded result */
859
    mul64To128(sl, sl, &d0h, &d0l);
860
    d0h += 2 * sh * sl;
861
    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
862
    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
863
    add128(sh, sl, 0, 1, &d2h, &d2l);
864
    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
865
    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
866
    shift128Left(sh, sl, 128 - 114, &sh, &sl);
867

868
    /* increment or decrement for inexact */
869
    if (d2h | d2l) {
870
        if ((int64_t)(d1h ^ d2h) < 0) {
871
            sub128(sh, sl, 0, 1, &sh, &sl);
872
        } else {
873
            add128(sh, sl, 0, 1, &sh, &sl);
874
        }
875
    }
876
    a->frac_lo = sl;
877
    a->frac_hi = sh;
878

879
 done:
880
    /* Convert back from base 4 to base 2. */
881
    a->exp >>= 1;
882
    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
883
        frac_add(a, a, a);
884
    } else {
885
        a->exp += 1;
886
    }
887
    return;
888

889
 d_nan:
890
    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
891
    parts_default_nan(a, status);
892
}
893

894
/*
895
 * Rounds the floating-point value `a' to an integer, and returns the
896
 * result as a floating-point value. The operation is performed
897
 * according to the IEC/IEEE Standard for Binary Floating-Point
898
 * Arithmetic.
899
 *
900
 * parts_round_to_int_normal is an internal helper function for
901
 * normal numbers only, returning true for inexact but not directly
902
 * raising float_flag_inexact.
903
 */
904
static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
905
                                        int scale, int frac_size)
906
{
907
    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
908
    int shift_adj;
909

910
    scale = MIN(MAX(scale, -0x10000), 0x10000);
911
    a->exp += scale;
912

913
    if (a->exp < 0) {
914
        bool one;
915

916
        /* All fractional */
917
        switch (rmode) {
918
        case float_round_nearest_even:
919
            one = false;
920
            if (a->exp == -1) {
921
                FloatPartsN tmp;
922
                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
923
                frac_add(&tmp, a, a);
924
                /* Anything remaining means frac > 0.5. */
925
                one = !frac_eqz(&tmp);
926
            }
927
            break;
928
        case float_round_ties_away:
929
            one = a->exp == -1;
930
            break;
931
        case float_round_to_zero:
932
            one = false;
933
            break;
934
        case float_round_up:
935
            one = !a->sign;
936
            break;
937
        case float_round_down:
938
            one = a->sign;
939
            break;
940
        case float_round_to_odd:
941
            one = true;
942
            break;
943
        default:
944
            g_assert_not_reached();
945
        }
946

947
        frac_clear(a);
948
        a->exp = 0;
949
        if (one) {
950
            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
951
        } else {
952
            a->cls = float_class_zero;
953
        }
954
        return true;
955
    }
956

957
    if (a->exp >= frac_size) {
958
        /* All integral */
959
        return false;
960
    }
961

962
    if (N > 64 && a->exp < N - 64) {
963
        /*
964
         * Rounding is not in the low word -- shift lsb to bit 2,
965
         * which leaves room for sticky and rounding bit.
966
         */
967
        shift_adj = (N - 1) - (a->exp + 2);
968
        frac_shrjam(a, shift_adj);
969
        frac_lsb = 1 << 2;
970
    } else {
971
        shift_adj = 0;
972
        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
973
    }
974

975
    frac_lsbm1 = frac_lsb >> 1;
976
    rnd_mask = frac_lsb - 1;
977
    rnd_even_mask = rnd_mask | frac_lsb;
978

979
    if (!(a->frac_lo & rnd_mask)) {
980
        /* Fractional bits already clear, undo the shift above. */
981
        frac_shl(a, shift_adj);
982
        return false;
983
    }
984

985
    switch (rmode) {
986
    case float_round_nearest_even:
987
        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
988
        break;
989
    case float_round_ties_away:
990
        inc = frac_lsbm1;
991
        break;
992
    case float_round_to_zero:
993
        inc = 0;
994
        break;
995
    case float_round_up:
996
        inc = a->sign ? 0 : rnd_mask;
997
        break;
998
    case float_round_down:
999
        inc = a->sign ? rnd_mask : 0;
1000
        break;
1001
    case float_round_to_odd:
1002
        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1003
        break;
1004
    default:
1005
        g_assert_not_reached();
1006
    }
1007

1008
    if (shift_adj == 0) {
1009
        if (frac_addi(a, a, inc)) {
1010
            frac_shr(a, 1);
1011
            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1012
            a->exp++;
1013
        }
1014
        a->frac_lo &= ~rnd_mask;
1015
    } else {
1016
        frac_addi(a, a, inc);
1017
        a->frac_lo &= ~rnd_mask;
1018
        /* Be careful shifting back, not to overflow */
1019
        frac_shl(a, shift_adj - 1);
1020
        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1021
            a->exp++;
1022
        } else {
1023
            frac_add(a, a, a);
1024
        }
1025
    }
1026
    return true;
1027
}
1028

1029
static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1030
                                 int scale, float_status *s,
1031
                                 const FloatFmt *fmt)
1032
{
1033
    switch (a->cls) {
1034
    case float_class_qnan:
1035
    case float_class_snan:
1036
        parts_return_nan(a, s);
1037
        break;
1038
    case float_class_zero:
1039
    case float_class_inf:
1040
        break;
1041
    case float_class_normal:
1042
        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1043
            float_raise(float_flag_inexact, s);
1044
        }
1045
        break;
1046
    default:
1047
        g_assert_not_reached();
1048
    }
1049
}
1050

1051
/*
1052
 * Returns the result of converting the floating-point value `a' to
1053
 * the two's complement integer format. The conversion is performed
1054
 * according to the IEC/IEEE Standard for Binary Floating-Point
1055
 * Arithmetic---which means in particular that the conversion is
1056
 * rounded according to the current rounding mode. If `a' is a NaN,
1057
 * the largest positive integer is returned. Otherwise, if the
1058
 * conversion overflows, the largest integer with the same sign as `a'
1059
 * is returned.
1060
 */
1061
static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1062
                                     int scale, int64_t min, int64_t max,
1063
                                     float_status *s)
1064
{
1065
    int flags = 0;
1066
    uint64_t r;
1067

1068
    switch (p->cls) {
1069
    case float_class_snan:
1070
        flags |= float_flag_invalid_snan;
1071
        /* fall through */
1072
    case float_class_qnan:
1073
        flags |= float_flag_invalid;
1074
        r = max;
1075
        break;
1076

1077
    case float_class_inf:
1078
        flags = float_flag_invalid | float_flag_invalid_cvti;
1079
        r = p->sign ? min : max;
1080
        break;
1081

1082
    case float_class_zero:
1083
        return 0;
1084

1085
    case float_class_normal:
1086
        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1087
        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1088
            flags = float_flag_inexact;
1089
        }
1090

1091
        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1092
            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1093
        } else {
1094
            r = UINT64_MAX;
1095
        }
1096
        if (p->sign) {
1097
            if (r <= -(uint64_t)min) {
1098
                r = -r;
1099
            } else {
1100
                flags = float_flag_invalid | float_flag_invalid_cvti;
1101
                r = min;
1102
            }
1103
        } else if (r > max) {
1104
            flags = float_flag_invalid | float_flag_invalid_cvti;
1105
            r = max;
1106
        }
1107
        break;
1108

1109
    default:
1110
        g_assert_not_reached();
1111
    }
1112

1113
    float_raise(flags, s);
1114
    return r;
1115
}
1116

1117
/*
1118
 *  Returns the result of converting the floating-point value `a' to
1119
 *  the unsigned integer format. The conversion is performed according
1120
 *  to the IEC/IEEE Standard for Binary Floating-Point
1121
 *  Arithmetic---which means in particular that the conversion is
1122
 *  rounded according to the current rounding mode. If `a' is a NaN,
1123
 *  the largest unsigned integer is returned. Otherwise, if the
1124
 *  conversion overflows, the largest unsigned integer is returned. If
1125
 *  the 'a' is negative, the result is rounded and zero is returned;
1126
 *  values that do not round to zero will raise the inexact exception
1127
 *  flag.
1128
 */
1129
static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1130
                                      int scale, uint64_t max, float_status *s)
1131
{
1132
    int flags = 0;
1133
    uint64_t r;
1134

1135
    switch (p->cls) {
1136
    case float_class_snan:
1137
        flags |= float_flag_invalid_snan;
1138
        /* fall through */
1139
    case float_class_qnan:
1140
        flags |= float_flag_invalid;
1141
        r = max;
1142
        break;
1143

1144
    case float_class_inf:
1145
        flags = float_flag_invalid | float_flag_invalid_cvti;
1146
        r = p->sign ? 0 : max;
1147
        break;
1148

1149
    case float_class_zero:
1150
        return 0;
1151

1152
    case float_class_normal:
1153
        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1154
        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1155
            flags = float_flag_inexact;
1156
            if (p->cls == float_class_zero) {
1157
                r = 0;
1158
                break;
1159
            }
1160
        }
1161

1162
        if (p->sign) {
1163
            flags = float_flag_invalid | float_flag_invalid_cvti;
1164
            r = 0;
1165
        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1166
            flags = float_flag_invalid | float_flag_invalid_cvti;
1167
            r = max;
1168
        } else {
1169
            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1170
            if (r > max) {
1171
                flags = float_flag_invalid | float_flag_invalid_cvti;
1172
                r = max;
1173
            }
1174
        }
1175
        break;
1176

1177
    default:
1178
        g_assert_not_reached();
1179
    }
1180

1181
    float_raise(flags, s);
1182
    return r;
1183
}
1184

1185
/*
1186
 * Like partsN(float_to_sint), except do not saturate the result.
1187
 * Instead, return the rounded unbounded precision two's compliment result,
1188
 * modulo 2**(bitsm1 + 1).
1189
 */
1190
static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1191
                                            FloatRoundMode rmode,
1192
                                            int bitsm1, float_status *s)
1193
{
1194
    int flags = 0;
1195
    uint64_t r;
1196
    bool overflow = false;
1197

1198
    switch (p->cls) {
1199
    case float_class_snan:
1200
        flags |= float_flag_invalid_snan;
1201
        /* fall through */
1202
    case float_class_qnan:
1203
        flags |= float_flag_invalid;
1204
        r = 0;
1205
        break;
1206

1207
    case float_class_inf:
1208
        overflow = true;
1209
        r = 0;
1210
        break;
1211

1212
    case float_class_zero:
1213
        return 0;
1214

1215
    case float_class_normal:
1216
        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1217
        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1218
            flags = float_flag_inexact;
1219
        }
1220

1221
        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1222
            /*
1223
             * Because we rounded to integral, and exp < 64,
1224
             * we know frac_low is zero.
1225
             */
1226
            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1227
            if (p->exp < bitsm1) {
1228
                /* Result in range. */
1229
            } else if (p->exp == bitsm1) {
1230
                /* The only in-range value is INT_MIN. */
1231
                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1232
            } else {
1233
                overflow = true;
1234
            }
1235
        } else {
1236
            /* Overflow, but there might still be bits to return. */
1237
            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1238
            if (shl < N) {
1239
                frac_shl(p, shl);
1240
                r = p->frac_hi;
1241
            } else {
1242
                r = 0;
1243
            }
1244
            overflow = true;
1245
        }
1246

1247
        if (p->sign) {
1248
            r = -r;
1249
        }
1250
        break;
1251

1252
    default:
1253
        g_assert_not_reached();
1254
    }
1255

1256
    if (overflow) {
1257
        flags = float_flag_invalid | float_flag_invalid_cvti;
1258
    }
1259
    float_raise(flags, s);
1260
    return r;
1261
}
1262

1263
/*
1264
 * Integer to float conversions
1265
 *
1266
 * Returns the result of converting the two's complement integer `a'
1267
 * to the floating-point format. The conversion is performed according
1268
 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1269
 */
1270
static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1271
                                  int scale, float_status *s)
1272
{
1273
    uint64_t f = a;
1274
    int shift;
1275

1276
    memset(p, 0, sizeof(*p));
1277

1278
    if (a == 0) {
1279
        p->cls = float_class_zero;
1280
        return;
1281
    }
1282

1283
    p->cls = float_class_normal;
1284
    if (a < 0) {
1285
        f = -f;
1286
        p->sign = true;
1287
    }
1288
    shift = clz64(f);
1289
    scale = MIN(MAX(scale, -0x10000), 0x10000);
1290

1291
    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1292
    p->frac_hi = f << shift;
1293
}
1294

1295
/*
1296
 * Unsigned Integer to float conversions
1297
 *
1298
 * Returns the result of converting the unsigned integer `a' to the
1299
 * floating-point format. The conversion is performed according to the
1300
 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1301
 */
1302
static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1303
                                  int scale, float_status *status)
1304
{
1305
    memset(p, 0, sizeof(*p));
1306

1307
    if (a == 0) {
1308
        p->cls = float_class_zero;
1309
    } else {
1310
        int shift = clz64(a);
1311
        scale = MIN(MAX(scale, -0x10000), 0x10000);
1312
        p->cls = float_class_normal;
1313
        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1314
        p->frac_hi = a << shift;
1315
    }
1316
}
1317

1318
/*
1319
 * Float min/max.
1320
 */
1321
static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1322
                                   float_status *s, int flags)
1323
{
1324
    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1325
    int a_exp, b_exp, cmp;
1326

1327
    if (unlikely(ab_mask & float_cmask_anynan)) {
1328
        /*
1329
         * For minNum/maxNum (IEEE 754-2008)
1330
         * or minimumNumber/maximumNumber (IEEE 754-2019),
1331
         * if one operand is a QNaN, and the other
1332
         * operand is numerical, then return numerical argument.
1333
         */
1334
        if ((flags & (minmax_isnum | minmax_isnumber))
1335
            && !(ab_mask & float_cmask_snan)
1336
            && (ab_mask & ~float_cmask_qnan)) {
1337
            return is_nan(a->cls) ? b : a;
1338
        }
1339

1340
        /*
1341
         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1342
         * are removed and replaced with minimum, minimumNumber, maximum
1343
         * and maximumNumber.
1344
         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1345
         *   If both operands are NaNs, a QNaN is returned.
1346
         *   If either operand is a SNaN,
1347
         *   an invalid operation exception is signaled,
1348
         *   but unless both operands are NaNs,
1349
         *   the SNaN is otherwise ignored and not converted to a QNaN.
1350
         */
1351
        if ((flags & minmax_isnumber)
1352
            && (ab_mask & float_cmask_snan)
1353
            && (ab_mask & ~float_cmask_anynan)) {
1354
            float_raise(float_flag_invalid, s);
1355
            return is_nan(a->cls) ? b : a;
1356
        }
1357

1358
        return parts_pick_nan(a, b, s);
1359
    }
1360

1361
    a_exp = a->exp;
1362
    b_exp = b->exp;
1363

1364
    if (unlikely(ab_mask != float_cmask_normal)) {
1365
        switch (a->cls) {
1366
        case float_class_normal:
1367
            break;
1368
        case float_class_inf:
1369
            a_exp = INT16_MAX;
1370
            break;
1371
        case float_class_zero:
1372
            a_exp = INT16_MIN;
1373
            break;
1374
        default:
1375
            g_assert_not_reached();
1376
            break;
1377
        }
1378
        switch (b->cls) {
1379
        case float_class_normal:
1380
            break;
1381
        case float_class_inf:
1382
            b_exp = INT16_MAX;
1383
            break;
1384
        case float_class_zero:
1385
            b_exp = INT16_MIN;
1386
            break;
1387
        default:
1388
            g_assert_not_reached();
1389
            break;
1390
        }
1391
    }
1392

1393
    /* Compare magnitudes. */
1394
    cmp = a_exp - b_exp;
1395
    if (cmp == 0) {
1396
        cmp = frac_cmp(a, b);
1397
    }
1398

1399
    /*
1400
     * Take the sign into account.
1401
     * For ismag, only do this if the magnitudes are equal.
1402
     */
1403
    if (!(flags & minmax_ismag) || cmp == 0) {
1404
        if (a->sign != b->sign) {
1405
            /* For differing signs, the negative operand is less. */
1406
            cmp = a->sign ? -1 : 1;
1407
        } else if (a->sign) {
1408
            /* For two negative operands, invert the magnitude comparison. */
1409
            cmp = -cmp;
1410
        }
1411
    }
1412

1413
    if (flags & minmax_ismin) {
1414
        cmp = -cmp;
1415
    }
1416
    return cmp < 0 ? b : a;
1417
}
1418

1419
/*
1420
 * Floating point compare
1421
 */
1422
static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1423
                                     float_status *s, bool is_quiet)
1424
{
1425
    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1426

1427
    if (likely(ab_mask == float_cmask_normal)) {
1428
        FloatRelation cmp;
1429

1430
        if (a->sign != b->sign) {
1431
            goto a_sign;
1432
        }
1433
        if (a->exp == b->exp) {
1434
            cmp = frac_cmp(a, b);
1435
        } else if (a->exp < b->exp) {
1436
            cmp = float_relation_less;
1437
        } else {
1438
            cmp = float_relation_greater;
1439
        }
1440
        if (a->sign) {
1441
            cmp = -cmp;
1442
        }
1443
        return cmp;
1444
    }
1445

1446
    if (unlikely(ab_mask & float_cmask_anynan)) {
1447
        if (ab_mask & float_cmask_snan) {
1448
            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1449
        } else if (!is_quiet) {
1450
            float_raise(float_flag_invalid, s);
1451
        }
1452
        return float_relation_unordered;
1453
    }
1454

1455
    if (ab_mask & float_cmask_zero) {
1456
        if (ab_mask == float_cmask_zero) {
1457
            return float_relation_equal;
1458
        } else if (a->cls == float_class_zero) {
1459
            goto b_sign;
1460
        } else {
1461
            goto a_sign;
1462
        }
1463
    }
1464

1465
    if (ab_mask == float_cmask_inf) {
1466
        if (a->sign == b->sign) {
1467
            return float_relation_equal;
1468
        }
1469
    } else if (b->cls == float_class_inf) {
1470
        goto b_sign;
1471
    } else {
1472
        g_assert(a->cls == float_class_inf);
1473
    }
1474

1475
 a_sign:
1476
    return a->sign ? float_relation_less : float_relation_greater;
1477
 b_sign:
1478
    return b->sign ? float_relation_greater : float_relation_less;
1479
}
1480

1481
/*
1482
 * Multiply A by 2 raised to the power N.
1483
 */
1484
static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1485
{
1486
    switch (a->cls) {
1487
    case float_class_snan:
1488
    case float_class_qnan:
1489
        parts_return_nan(a, s);
1490
        break;
1491
    case float_class_zero:
1492
    case float_class_inf:
1493
        break;
1494
    case float_class_normal:
1495
        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1496
        break;
1497
    default:
1498
        g_assert_not_reached();
1499
    }
1500
}
1501

1502
/*
1503
 * Return log2(A)
1504
 */
1505
static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1506
{
1507
    uint64_t a0, a1, r, t, ign;
1508
    FloatPartsN f;
1509
    int i, n, a_exp, f_exp;
1510

1511
    if (unlikely(a->cls != float_class_normal)) {
1512
        switch (a->cls) {
1513
        case float_class_snan:
1514
        case float_class_qnan:
1515
            parts_return_nan(a, s);
1516
            return;
1517
        case float_class_zero:
1518
            float_raise(float_flag_divbyzero, s);
1519
            /* log2(0) = -inf */
1520
            a->cls = float_class_inf;
1521
            a->sign = 1;
1522
            return;
1523
        case float_class_inf:
1524
            if (unlikely(a->sign)) {
1525
                goto d_nan;
1526
            }
1527
            return;
1528
        default:
1529
            break;
1530
        }
1531
        g_assert_not_reached();
1532
    }
1533
    if (unlikely(a->sign)) {
1534
        goto d_nan;
1535
    }
1536

1537
    /* TODO: This algorithm looses bits too quickly for float128. */
1538
    g_assert(N == 64);
1539

1540
    a_exp = a->exp;
1541
    f_exp = -1;
1542

1543
    r = 0;
1544
    t = DECOMPOSED_IMPLICIT_BIT;
1545
    a0 = a->frac_hi;
1546
    a1 = 0;
1547

1548
    n = fmt->frac_size + 2;
1549
    if (unlikely(a_exp == -1)) {
1550
        /*
1551
         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1552
         * When the value is very close to 1.0, there are lots of 1's in
1553
         * the msb parts of the fraction.  At the end, when we subtract
1554
         * this value from -1.0, we can see a catastrophic loss of precision,
1555
         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1556
         * bits of y in the final result.  To minimize this, compute as many
1557
         * digits as we can.
1558
         * ??? This case needs another algorithm to avoid this.
1559
         */
1560
        n = fmt->frac_size * 2 + 2;
1561
        /* Don't compute a value overlapping the sticky bit */
1562
        n = MIN(n, 62);
1563
    }
1564

1565
    for (i = 0; i < n; i++) {
1566
        if (a1) {
1567
            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1568
        } else if (a0 & 0xffffffffull) {
1569
            mul64To128(a0, a0, &a0, &a1);
1570
        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1571
            a0 >>= 32;
1572
            a0 *= a0;
1573
        } else {
1574
            goto exact;
1575
        }
1576

1577
        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1578
            if (unlikely(a_exp == 0 && r == 0)) {
1579
                /*
1580
                 * When a_exp == 0, we're computing the log2 of a value
1581
                 * [1.0,2.0).  When the value is very close to 1.0, there
1582
                 * are lots of 0's in the msb parts of the fraction.
1583
                 * We need to compute more digits to produce a correct
1584
                 * result -- restart at the top of the fraction.
1585
                 * ??? This is likely to lose precision quickly, as for
1586
                 * float128; we may need another method.
1587
                 */
1588
                f_exp -= i;
1589
                t = r = DECOMPOSED_IMPLICIT_BIT;
1590
                i = 0;
1591
            } else {
1592
                r |= t;
1593
            }
1594
        } else {
1595
            add128(a0, a1, a0, a1, &a0, &a1);
1596
        }
1597
        t >>= 1;
1598
    }
1599

1600
    /* Set sticky for inexact. */
1601
    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1602

1603
 exact:
1604
    parts_sint_to_float(a, a_exp, 0, s);
1605
    if (r == 0) {
1606
        return;
1607
    }
1608

1609
    memset(&f, 0, sizeof(f));
1610
    f.cls = float_class_normal;
1611
    f.frac_hi = r;
1612
    f.exp = f_exp - frac_normalize(&f);
1613

1614
    if (a_exp < 0) {
1615
        parts_sub_normal(a, &f);
1616
    } else if (a_exp > 0) {
1617
        parts_add_normal(a, &f);
1618
    } else {
1619
        *a = f;
1620
    }
1621
    return;
1622

1623
 d_nan:
1624
    float_raise(float_flag_invalid, s);
1625
    parts_default_nan(a, s);
1626
}
1627

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.