qemu

softfloat.c
5278 строк · 148.3 Кб
Перенос по словам
1
/*
2
 * QEMU float support
3
 *
4
 * The code in this source file is derived from release 2a of the SoftFloat
5
 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6
 * some later contributions) are provided under that license, as detailed below.
7
 * It has subsequently been modified by contributors to the QEMU Project,
8
 * so some portions are provided under:
9
 *  the SoftFloat-2a license
10
 *  the BSD license
11
 *  GPL-v2-or-later
12
 *
13
 * Any future contributions to this file after December 1st 2014 will be
14
 * taken to be licensed under the Softfloat-2a license unless specifically
15
 * indicated otherwise.
16
 */
17

18
/*
19
===============================================================================
20
This C source file is part of the SoftFloat IEC/IEEE Floating-point
21
Arithmetic Package, Release 2a.
22

23
Written by John R. Hauser.  This work was made possible in part by the
24
International Computer Science Institute, located at Suite 600, 1947 Center
25
Street, Berkeley, California 94704.  Funding was partially provided by the
26
National Science Foundation under grant MIP-9311980.  The original version
27
of this code was written as part of a project to build a fixed-point vector
28
processor in collaboration with the University of California at Berkeley,
29
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31
arithmetic/SoftFloat.html'.
32

33
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35
TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38

39
Derivative works are acceptable, even for commercial purposes, so long as
40
(1) they include prominent notice that the work is derivative, and (2) they
41
include prominent notice akin to these four paragraphs for those parts of
42
this code that are retained.
43

44
===============================================================================
45
*/
46

47
/* BSD licensing:
48
 * Copyright (c) 2006, Fabrice Bellard
49
 * All rights reserved.
50
 *
51
 * Redistribution and use in source and binary forms, with or without
52
 * modification, are permitted provided that the following conditions are met:
53
 *
54
 * 1. Redistributions of source code must retain the above copyright notice,
55
 * this list of conditions and the following disclaimer.
56
 *
57
 * 2. Redistributions in binary form must reproduce the above copyright notice,
58
 * this list of conditions and the following disclaimer in the documentation
59
 * and/or other materials provided with the distribution.
60
 *
61
 * 3. Neither the name of the copyright holder nor the names of its contributors
62
 * may be used to endorse or promote products derived from this software without
63
 * specific prior written permission.
64
 *
65
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75
 * THE POSSIBILITY OF SUCH DAMAGE.
76
 */
77

78
/* Portions of this work are licensed under the terms of the GNU GPL,
79
 * version 2 or later. See the COPYING file in the top-level directory.
80
 */
81

82
/* softfloat (and in particular the code in softfloat-specialize.h) is
83
 * target-dependent and needs the TARGET_* macros.
84
 */
85
#include "qemu/osdep.h"
86
#include <math.h>
87
#include "qemu/bitops.h"
88
#include "fpu/softfloat.h"
89

90
/* We only need stdlib for abort() */
91

92
/*----------------------------------------------------------------------------
93
| Primitive arithmetic functions, including multi-word arithmetic, and
94
| division and square root approximations.  (Can be specialized to target if
95
| desired.)
96
*----------------------------------------------------------------------------*/
97
#include "fpu/softfloat-macros.h"
98

99
/*
100
 * Hardfloat
101
 *
102
 * Fast emulation of guest FP instructions is challenging for two reasons.
103
 * First, FP instruction semantics are similar but not identical, particularly
104
 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105
 * exception flags is not trivial: reading the host's flags register with a
106
 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107
 * and trapping on every FP exception is not fast nor pleasant to work with.
108
 *
109
 * We address these challenges by leveraging the host FPU for a subset of the
110
 * operations. To do this we expand on the idea presented in this paper:
111
 *
112
 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113
 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114
 *
115
 * The idea is thus to leverage the host FPU to (1) compute FP operations
116
 * and (2) identify whether FP exceptions occurred while avoiding
117
 * expensive exception flag register accesses.
118
 *
119
 * An important optimization shown in the paper is that given that exception
120
 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121
 * This is particularly useful for the inexact flag, which is very frequently
122
 * raised in floating-point workloads.
123
 *
124
 * We optimize the code further by deferring to soft-fp whenever FP exception
125
 * detection might get hairy. Two examples: (1) when at least one operand is
126
 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127
 * and the result is < the minimum normal.
128
 */
129
#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
130
    static inline void name(soft_t *a, float_status *s)                 \
131
    {                                                                   \
132
        if (unlikely(soft_t ## _is_denormal(*a))) {                     \
133
            *a = soft_t ## _set_sign(soft_t ## _zero,                   \
134
                                     soft_t ## _is_neg(*a));            \
135
            float_raise(float_flag_input_denormal, s);                  \
136
        }                                                               \
137
    }
138

139
GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140
GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141
#undef GEN_INPUT_FLUSH__NOCHECK
142

143
#define GEN_INPUT_FLUSH1(name, soft_t)                  \
144
    static inline void name(soft_t *a, float_status *s) \
145
    {                                                   \
146
        if (likely(!s->flush_inputs_to_zero)) {         \
147
            return;                                     \
148
        }                                               \
149
        soft_t ## _input_flush__nocheck(a, s);          \
150
    }
151

152
GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153
GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154
#undef GEN_INPUT_FLUSH1
155

156
#define GEN_INPUT_FLUSH2(name, soft_t)                                  \
157
    static inline void name(soft_t *a, soft_t *b, float_status *s)      \
158
    {                                                                   \
159
        if (likely(!s->flush_inputs_to_zero)) {                         \
160
            return;                                                     \
161
        }                                                               \
162
        soft_t ## _input_flush__nocheck(a, s);                          \
163
        soft_t ## _input_flush__nocheck(b, s);                          \
164
    }
165

166
GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167
GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168
#undef GEN_INPUT_FLUSH2
169

170
#define GEN_INPUT_FLUSH3(name, soft_t)                                  \
171
    static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172
    {                                                                   \
173
        if (likely(!s->flush_inputs_to_zero)) {                         \
174
            return;                                                     \
175
        }                                                               \
176
        soft_t ## _input_flush__nocheck(a, s);                          \
177
        soft_t ## _input_flush__nocheck(b, s);                          \
178
        soft_t ## _input_flush__nocheck(c, s);                          \
179
    }
180

181
GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182
GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183
#undef GEN_INPUT_FLUSH3
184

185
/*
186
 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187
 * hardfloat functions. Each combination of number of inputs and float size
188
 * gets its own value.
189
 */
190
#if defined(__x86_64__)
191
# define QEMU_HARDFLOAT_1F32_USE_FP 0
192
# define QEMU_HARDFLOAT_1F64_USE_FP 1
193
# define QEMU_HARDFLOAT_2F32_USE_FP 0
194
# define QEMU_HARDFLOAT_2F64_USE_FP 1
195
# define QEMU_HARDFLOAT_3F32_USE_FP 0
196
# define QEMU_HARDFLOAT_3F64_USE_FP 1
197
#else
198
# define QEMU_HARDFLOAT_1F32_USE_FP 0
199
# define QEMU_HARDFLOAT_1F64_USE_FP 0
200
# define QEMU_HARDFLOAT_2F32_USE_FP 0
201
# define QEMU_HARDFLOAT_2F64_USE_FP 0
202
# define QEMU_HARDFLOAT_3F32_USE_FP 0
203
# define QEMU_HARDFLOAT_3F64_USE_FP 0
204
#endif
205

206
/*
207
 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208
 * float{32,64}_is_infinity when !USE_FP.
209
 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210
 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211
 */
212
#if defined(__x86_64__) || defined(__aarch64__)
213
# define QEMU_HARDFLOAT_USE_ISINF   1
214
#else
215
# define QEMU_HARDFLOAT_USE_ISINF   0
216
#endif
217

218
/*
219
 * Some targets clear the FP flags before most FP operations. This prevents
220
 * the use of hardfloat, since hardfloat relies on the inexact flag being
221
 * already set.
222
 */
223
#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224
# if defined(__FAST_MATH__)
225
#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226
    IEEE implementation
227
# endif
228
# define QEMU_NO_HARDFLOAT 1
229
# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230
#else
231
# define QEMU_NO_HARDFLOAT 0
232
# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233
#endif
234

235
static inline bool can_use_fpu(const float_status *s)
236
{
237
    if (QEMU_NO_HARDFLOAT) {
238
        return false;
239
    }
240
    return likely(s->float_exception_flags & float_flag_inexact &&
241
                  s->float_rounding_mode == float_round_nearest_even);
242
}
243

244
/*
245
 * Hardfloat generation functions. Each operation can have two flavors:
246
 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247
 * most condition checks, or native ones (e.g. fpclassify).
248
 *
249
 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250
 * compiler to propagate constants and inline everything into the callers.
251
 *
252
 * We only generate functions for operations with two inputs, since only
253
 * these are common enough to justify consolidating them into common code.
254
 */
255

256
typedef union {
257
    float32 s;
258
    float h;
259
} union_float32;
260

261
typedef union {
262
    float64 s;
263
    double h;
264
} union_float64;
265

266
typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267
typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268

269
typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270
typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271
typedef float   (*hard_f32_op2_fn)(float a, float b);
272
typedef double  (*hard_f64_op2_fn)(double a, double b);
273

274
/* 2-input is-zero-or-normal */
275
static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276
{
277
    if (QEMU_HARDFLOAT_2F32_USE_FP) {
278
        /*
279
         * Not using a temp variable for consecutive fpclassify calls ends up
280
         * generating faster code.
281
         */
282
        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283
               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284
    }
285
    return float32_is_zero_or_normal(a.s) &&
286
           float32_is_zero_or_normal(b.s);
287
}
288

289
static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290
{
291
    if (QEMU_HARDFLOAT_2F64_USE_FP) {
292
        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293
               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294
    }
295
    return float64_is_zero_or_normal(a.s) &&
296
           float64_is_zero_or_normal(b.s);
297
}
298

299
/* 3-input is-zero-or-normal */
300
static inline
301
bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302
{
303
    if (QEMU_HARDFLOAT_3F32_USE_FP) {
304
        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305
               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306
               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307
    }
308
    return float32_is_zero_or_normal(a.s) &&
309
           float32_is_zero_or_normal(b.s) &&
310
           float32_is_zero_or_normal(c.s);
311
}
312

313
static inline
314
bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315
{
316
    if (QEMU_HARDFLOAT_3F64_USE_FP) {
317
        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318
               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319
               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320
    }
321
    return float64_is_zero_or_normal(a.s) &&
322
           float64_is_zero_or_normal(b.s) &&
323
           float64_is_zero_or_normal(c.s);
324
}
325

326
static inline bool f32_is_inf(union_float32 a)
327
{
328
    if (QEMU_HARDFLOAT_USE_ISINF) {
329
        return isinf(a.h);
330
    }
331
    return float32_is_infinity(a.s);
332
}
333

334
static inline bool f64_is_inf(union_float64 a)
335
{
336
    if (QEMU_HARDFLOAT_USE_ISINF) {
337
        return isinf(a.h);
338
    }
339
    return float64_is_infinity(a.s);
340
}
341

342
static inline float32
343
float32_gen2(float32 xa, float32 xb, float_status *s,
344
             hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345
             f32_check_fn pre, f32_check_fn post)
346
{
347
    union_float32 ua, ub, ur;
348

349
    ua.s = xa;
350
    ub.s = xb;
351

352
    if (unlikely(!can_use_fpu(s))) {
353
        goto soft;
354
    }
355

356
    float32_input_flush2(&ua.s, &ub.s, s);
357
    if (unlikely(!pre(ua, ub))) {
358
        goto soft;
359
    }
360

361
    ur.h = hard(ua.h, ub.h);
362
    if (unlikely(f32_is_inf(ur))) {
363
        float_raise(float_flag_overflow, s);
364
    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365
        goto soft;
366
    }
367
    return ur.s;
368

369
 soft:
370
    return soft(ua.s, ub.s, s);
371
}
372

373
static inline float64
374
float64_gen2(float64 xa, float64 xb, float_status *s,
375
             hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376
             f64_check_fn pre, f64_check_fn post)
377
{
378
    union_float64 ua, ub, ur;
379

380
    ua.s = xa;
381
    ub.s = xb;
382

383
    if (unlikely(!can_use_fpu(s))) {
384
        goto soft;
385
    }
386

387
    float64_input_flush2(&ua.s, &ub.s, s);
388
    if (unlikely(!pre(ua, ub))) {
389
        goto soft;
390
    }
391

392
    ur.h = hard(ua.h, ub.h);
393
    if (unlikely(f64_is_inf(ur))) {
394
        float_raise(float_flag_overflow, s);
395
    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396
        goto soft;
397
    }
398
    return ur.s;
399

400
 soft:
401
    return soft(ua.s, ub.s, s);
402
}
403

404
/*
405
 * Classify a floating point number. Everything above float_class_qnan
406
 * is a NaN so cls >= float_class_qnan is any NaN.
407
 */
408

409
typedef enum __attribute__ ((__packed__)) {
410
    float_class_unclassified,
411
    float_class_zero,
412
    float_class_normal,
413
    float_class_inf,
414
    float_class_qnan,  /* all NaNs from here */
415
    float_class_snan,
416
} FloatClass;
417

418
#define float_cmask(bit)  (1u << (bit))
419

420
enum {
421
    float_cmask_zero    = float_cmask(float_class_zero),
422
    float_cmask_normal  = float_cmask(float_class_normal),
423
    float_cmask_inf     = float_cmask(float_class_inf),
424
    float_cmask_qnan    = float_cmask(float_class_qnan),
425
    float_cmask_snan    = float_cmask(float_class_snan),
426

427
    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428
    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
429
};
430

431
/* Flags for parts_minmax. */
432
enum {
433
    /* Set for minimum; clear for maximum. */
434
    minmax_ismin = 1,
435
    /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436
    minmax_isnum = 2,
437
    /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438
    minmax_ismag = 4,
439
    /*
440
     * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441
     * operations.
442
     */
443
    minmax_isnumber = 8,
444
};
445

446
/* Simple helpers for checking if, or what kind of, NaN we have */
447
static inline __attribute__((unused)) bool is_nan(FloatClass c)
448
{
449
    return unlikely(c >= float_class_qnan);
450
}
451

452
static inline __attribute__((unused)) bool is_snan(FloatClass c)
453
{
454
    return c == float_class_snan;
455
}
456

457
static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458
{
459
    return c == float_class_qnan;
460
}
461

462
/*
463
 * Structure holding all of the decomposed parts of a float.
464
 * The exponent is unbiased and the fraction is normalized.
465
 *
466
 * The fraction words are stored in big-endian word ordering,
467
 * so that truncation from a larger format to a smaller format
468
 * can be done simply by ignoring subsequent elements.
469
 */
470

471
typedef struct {
472
    FloatClass cls;
473
    bool sign;
474
    int32_t exp;
475
    union {
476
        /* Routines that know the structure may reference the singular name. */
477
        uint64_t frac;
478
        /*
479
         * Routines expanded with multiple structures reference "hi" and "lo"
480
         * depending on the operation.  In FloatParts64, "hi" and "lo" are
481
         * both the same word and aliased here.
482
         */
483
        uint64_t frac_hi;
484
        uint64_t frac_lo;
485
    };
486
} FloatParts64;
487

488
typedef struct {
489
    FloatClass cls;
490
    bool sign;
491
    int32_t exp;
492
    uint64_t frac_hi;
493
    uint64_t frac_lo;
494
} FloatParts128;
495

496
typedef struct {
497
    FloatClass cls;
498
    bool sign;
499
    int32_t exp;
500
    uint64_t frac_hi;
501
    uint64_t frac_hm;  /* high-middle */
502
    uint64_t frac_lm;  /* low-middle */
503
    uint64_t frac_lo;
504
} FloatParts256;
505

506
/* These apply to the most significant word of each FloatPartsN. */
507
#define DECOMPOSED_BINARY_POINT    63
508
#define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
509

510
/* Structure holding all of the relevant parameters for a format.
511
 *   exp_size: the size of the exponent field
512
 *   exp_bias: the offset applied to the exponent field
513
 *   exp_max: the maximum normalised exponent
514
 *   frac_size: the size of the fraction field
515
 *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516
 * The following are computed based the size of fraction
517
 *   round_mask: bits below lsb which must be rounded
518
 * The following optional modifiers are available:
519
 *   arm_althp: handle ARM Alternative Half Precision
520
 *   m68k_denormal: explicit integer bit for extended precision may be 1
521
 */
522
typedef struct {
523
    int exp_size;
524
    int exp_bias;
525
    int exp_re_bias;
526
    int exp_max;
527
    int frac_size;
528
    int frac_shift;
529
    bool arm_althp;
530
    bool m68k_denormal;
531
    uint64_t round_mask;
532
} FloatFmt;
533

534
/* Expand fields based on the size of exponent and fraction */
535
#define FLOAT_PARAMS_(E)                                \
536
    .exp_size       = E,                                \
537
    .exp_bias       = ((1 << E) - 1) >> 1,              \
538
    .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
539
    .exp_max        = (1 << E) - 1
540

541
#define FLOAT_PARAMS(E, F)                              \
542
    FLOAT_PARAMS_(E),                                   \
543
    .frac_size      = F,                                \
544
    .frac_shift     = (-F - 1) & 63,                    \
545
    .round_mask     = (1ull << ((-F - 1) & 63)) - 1
546

547
static const FloatFmt float16_params = {
548
    FLOAT_PARAMS(5, 10)
549
};
550

551
static const FloatFmt float16_params_ahp = {
552
    FLOAT_PARAMS(5, 10),
553
    .arm_althp = true
554
};
555

556
static const FloatFmt bfloat16_params = {
557
    FLOAT_PARAMS(8, 7)
558
};
559

560
static const FloatFmt float32_params = {
561
    FLOAT_PARAMS(8, 23)
562
};
563

564
static const FloatFmt float64_params = {
565
    FLOAT_PARAMS(11, 52)
566
};
567

568
static const FloatFmt float128_params = {
569
    FLOAT_PARAMS(15, 112)
570
};
571

572
#define FLOATX80_PARAMS(R)              \
573
    FLOAT_PARAMS_(15),                  \
574
    .frac_size = R == 64 ? 63 : R,      \
575
    .frac_shift = 0,                    \
576
    .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
577

578
static const FloatFmt floatx80_params[3] = {
579
    [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580
    [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
581
    [floatx80_precision_x] = {
582
        FLOATX80_PARAMS(64),
583
#ifdef TARGET_M68K
584
        .m68k_denormal = true,
585
#endif
586
    },
587
};
588

589
/* Unpack a float to parts, but do not canonicalize.  */
590
static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
591
{
592
    const int f_size = fmt->frac_size;
593
    const int e_size = fmt->exp_size;
594

595
    *r = (FloatParts64) {
596
        .cls = float_class_unclassified,
597
        .sign = extract64(raw, f_size + e_size, 1),
598
        .exp = extract64(raw, f_size, e_size),
599
        .frac = extract64(raw, 0, f_size)
600
    };
601
}
602

603
static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
604
{
605
    unpack_raw64(p, &float16_params, f);
606
}
607

608
static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
609
{
610
    unpack_raw64(p, &bfloat16_params, f);
611
}
612

613
static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
614
{
615
    unpack_raw64(p, &float32_params, f);
616
}
617

618
static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
619
{
620
    unpack_raw64(p, &float64_params, f);
621
}
622

623
static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
624
{
625
    *p = (FloatParts128) {
626
        .cls = float_class_unclassified,
627
        .sign = extract32(f.high, 15, 1),
628
        .exp = extract32(f.high, 0, 15),
629
        .frac_hi = f.low
630
    };
631
}
632

633
static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
634
{
635
    const int f_size = float128_params.frac_size - 64;
636
    const int e_size = float128_params.exp_size;
637

638
    *p = (FloatParts128) {
639
        .cls = float_class_unclassified,
640
        .sign = extract64(f.high, f_size + e_size, 1),
641
        .exp = extract64(f.high, f_size, e_size),
642
        .frac_hi = extract64(f.high, 0, f_size),
643
        .frac_lo = f.low,
644
    };
645
}
646

647
/* Pack a float from parts, but do not canonicalize.  */
648
static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
649
{
650
    const int f_size = fmt->frac_size;
651
    const int e_size = fmt->exp_size;
652
    uint64_t ret;
653

654
    ret = (uint64_t)p->sign << (f_size + e_size);
655
    ret = deposit64(ret, f_size, e_size, p->exp);
656
    ret = deposit64(ret, 0, f_size, p->frac);
657
    return ret;
658
}
659

660
static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
661
{
662
    return make_float16(pack_raw64(p, &float16_params));
663
}
664

665
static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
666
{
667
    return pack_raw64(p, &bfloat16_params);
668
}
669

670
static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
671
{
672
    return make_float32(pack_raw64(p, &float32_params));
673
}
674

675
static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
676
{
677
    return make_float64(pack_raw64(p, &float64_params));
678
}
679

680
static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
681
{
682
    const int f_size = float128_params.frac_size - 64;
683
    const int e_size = float128_params.exp_size;
684
    uint64_t hi;
685

686
    hi = (uint64_t)p->sign << (f_size + e_size);
687
    hi = deposit64(hi, f_size, e_size, p->exp);
688
    hi = deposit64(hi, 0, f_size, p->frac_hi);
689
    return make_float128(hi, p->frac_lo);
690
}
691

692
/*----------------------------------------------------------------------------
693
| Functions and definitions to determine:  (1) whether tininess for underflow
694
| is detected before or after rounding by default, (2) what (if anything)
695
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
696
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697
| are propagated from function inputs to output.  These details are target-
698
| specific.
699
*----------------------------------------------------------------------------*/
700
#include "softfloat-specialize.c.inc"
701

702
#define PARTS_GENERIC_64_128(NAME, P) \
703
    _Generic((P), FloatParts64 *: parts64_##NAME, \
704
                  FloatParts128 *: parts128_##NAME)
705

706
#define PARTS_GENERIC_64_128_256(NAME, P) \
707
    _Generic((P), FloatParts64 *: parts64_##NAME, \
708
                  FloatParts128 *: parts128_##NAME, \
709
                  FloatParts256 *: parts256_##NAME)
710

711
#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
712
#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
713

714
static void parts64_return_nan(FloatParts64 *a, float_status *s);
715
static void parts128_return_nan(FloatParts128 *a, float_status *s);
716

717
#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
718

719
static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720
                                      float_status *s);
721
static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722
                                        float_status *s);
723

724
#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
725

726
static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727
                                             FloatParts64 *c, float_status *s,
728
                                             int ab_mask, int abc_mask);
729
static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730
                                               FloatParts128 *b,
731
                                               FloatParts128 *c,
732
                                               float_status *s,
733
                                               int ab_mask, int abc_mask);
734

735
#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736
    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
737

738
static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739
                                 const FloatFmt *fmt);
740
static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741
                                  const FloatFmt *fmt);
742

743
#define parts_canonicalize(A, S, F) \
744
    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
745

746
static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747
                                   const FloatFmt *fmt);
748
static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749
                                    const FloatFmt *fmt);
750

751
#define parts_uncanon_normal(A, S, F) \
752
    PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
753

754
static void parts64_uncanon(FloatParts64 *p, float_status *status,
755
                            const FloatFmt *fmt);
756
static void parts128_uncanon(FloatParts128 *p, float_status *status,
757
                             const FloatFmt *fmt);
758

759
#define parts_uncanon(A, S, F) \
760
    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761

762
static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763
static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764
static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
765

766
#define parts_add_normal(A, B) \
767
    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
768

769
static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770
static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
771
static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
772

773
#define parts_sub_normal(A, B) \
774
    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
775

776
static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777
                                    float_status *s, bool subtract);
778
static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779
                                      float_status *s, bool subtract);
780

781
#define parts_addsub(A, B, S, Z) \
782
    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
783

784
static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785
                                 float_status *s);
786
static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787
                                   float_status *s);
788

789
#define parts_mul(A, B, S) \
790
    PARTS_GENERIC_64_128(mul, A)(A, B, S)
791

792
static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
793
                                    FloatParts64 *c, int flags,
794
                                    float_status *s);
795
static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
796
                                      FloatParts128 *c, int flags,
797
                                      float_status *s);
798

799
#define parts_muladd(A, B, C, Z, S) \
800
    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
801

802
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803
                                 float_status *s);
804
static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805
                                   float_status *s);
806

807
#define parts_div(A, B, S) \
808
    PARTS_GENERIC_64_128(div, A)(A, B, S)
809

810
static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811
                                    uint64_t *mod_quot, float_status *s);
812
static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813
                                      uint64_t *mod_quot, float_status *s);
814

815
#define parts_modrem(A, B, Q, S) \
816
    PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
817

818
static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819
static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
820

821
#define parts_sqrt(A, S, F) \
822
    PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
823

824
static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825
                                        int scale, int frac_size);
826
static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827
                                         int scale, int frac_size);
828

829
#define parts_round_to_int_normal(A, R, C, F) \
830
    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
831

832
static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833
                                 int scale, float_status *s,
834
                                 const FloatFmt *fmt);
835
static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836
                                  int scale, float_status *s,
837
                                  const FloatFmt *fmt);
838

839
#define parts_round_to_int(A, R, C, S, F) \
840
    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
841

842
static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843
                                     int scale, int64_t min, int64_t max,
844
                                     float_status *s);
845
static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846
                                     int scale, int64_t min, int64_t max,
847
                                     float_status *s);
848

849
#define parts_float_to_sint(P, R, Z, MN, MX, S) \
850
    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
851

852
static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853
                                      int scale, uint64_t max,
854
                                      float_status *s);
855
static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856
                                       int scale, uint64_t max,
857
                                       float_status *s);
858

859
#define parts_float_to_uint(P, R, Z, M, S) \
860
    PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
861

862
static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863
                                            FloatRoundMode rmode,
864
                                            int bitsm1, float_status *s);
865
static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866
                                             FloatRoundMode rmode,
867
                                             int bitsm1, float_status *s);
868

869
#define parts_float_to_sint_modulo(P, R, M, S) \
870
    PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
871

872
static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873
                                  int scale, float_status *s);
874
static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875
                                   int scale, float_status *s);
876

877
#define parts_float_to_sint(P, R, Z, MN, MX, S) \
878
    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
879

880
#define parts_sint_to_float(P, I, Z, S) \
881
    PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882

883
static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884
                                  int scale, float_status *s);
885
static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886
                                   int scale, float_status *s);
887

888
#define parts_uint_to_float(P, I, Z, S) \
889
    PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890

891
static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892
                                    float_status *s, int flags);
893
static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894
                                      float_status *s, int flags);
895

896
#define parts_minmax(A, B, S, F) \
897
    PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898

899
static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900
                                     float_status *s, bool q);
901
static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902
                                      float_status *s, bool q);
903

904
#define parts_compare(A, B, S, Q) \
905
    PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906

907
static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908
static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909

910
#define parts_scalbn(A, N, S) \
911
    PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912

913
static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914
static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
915

916
#define parts_log2(A, S, F) \
917
    PARTS_GENERIC_64_128(log2, A)(A, S, F)
918

919
/*
920
 * Helper functions for softfloat-parts.c.inc, per-size operations.
921
 */
922

923
#define FRAC_GENERIC_64_128(NAME, P) \
924
    _Generic((P), FloatParts64 *: frac64_##NAME, \
925
                  FloatParts128 *: frac128_##NAME)
926

927
#define FRAC_GENERIC_64_128_256(NAME, P) \
928
    _Generic((P), FloatParts64 *: frac64_##NAME, \
929
                  FloatParts128 *: frac128_##NAME, \
930
                  FloatParts256 *: frac256_##NAME)
931

932
static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
933
{
934
    return uadd64_overflow(a->frac, b->frac, &r->frac);
935
}
936

937
static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
938
{
939
    bool c = 0;
940
    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941
    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942
    return c;
943
}
944

945
static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
946
{
947
    bool c = 0;
948
    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949
    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950
    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951
    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952
    return c;
953
}
954

955
#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
956

957
static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
958
{
959
    return uadd64_overflow(a->frac, c, &r->frac);
960
}
961

962
static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
963
{
964
    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965
    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
966
}
967

968
#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
969

970
static void frac64_allones(FloatParts64 *a)
971
{
972
    a->frac = -1;
973
}
974

975
static void frac128_allones(FloatParts128 *a)
976
{
977
    a->frac_hi = a->frac_lo = -1;
978
}
979

980
#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
981

982
static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
983
{
984
    return (a->frac == b->frac ? float_relation_equal
985
            : a->frac < b->frac ? float_relation_less
986
            : float_relation_greater);
987
}
988

989
static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
990
{
991
    uint64_t ta = a->frac_hi, tb = b->frac_hi;
992
    if (ta == tb) {
993
        ta = a->frac_lo, tb = b->frac_lo;
994
        if (ta == tb) {
995
            return float_relation_equal;
996
        }
997
    }
998
    return ta < tb ? float_relation_less : float_relation_greater;
999
}
1000

1001
#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1002

1003
static void frac64_clear(FloatParts64 *a)
1004
{
1005
    a->frac = 0;
1006
}
1007

1008
static void frac128_clear(FloatParts128 *a)
1009
{
1010
    a->frac_hi = a->frac_lo = 0;
1011
}
1012

1013
#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1014

1015
static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016
{
1017
    uint64_t n1, n0, r, q;
1018
    bool ret;
1019

1020
    /*
1021
     * We want a 2*N / N-bit division to produce exactly an N-bit
1022
     * result, so that we do not lose any precision and so that we
1023
     * do not have to renormalize afterward.  If A.frac < B.frac,
1024
     * then division would produce an (N-1)-bit result; shift A left
1025
     * by one to produce the an N-bit result, and return true to
1026
     * decrement the exponent to match.
1027
     *
1028
     * The udiv_qrnnd algorithm that we're using requires normalization,
1029
     * i.e. the msb of the denominator must be set, which is already true.
1030
     */
1031
    ret = a->frac < b->frac;
1032
    if (ret) {
1033
        n0 = a->frac;
1034
        n1 = 0;
1035
    } else {
1036
        n0 = a->frac >> 1;
1037
        n1 = a->frac << 63;
1038
    }
1039
    q = udiv_qrnnd(&r, n0, n1, b->frac);
1040

1041
    /* Set lsb if there is a remainder, to set inexact. */
1042
    a->frac = q | (r != 0);
1043

1044
    return ret;
1045
}
1046

1047
static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048
{
1049
    uint64_t q0, q1, a0, a1, b0, b1;
1050
    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051
    bool ret = false;
1052

1053
    a0 = a->frac_hi, a1 = a->frac_lo;
1054
    b0 = b->frac_hi, b1 = b->frac_lo;
1055

1056
    ret = lt128(a0, a1, b0, b1);
1057
    if (!ret) {
1058
        a1 = shr_double(a0, a1, 1);
1059
        a0 = a0 >> 1;
1060
    }
1061

1062
    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063
    q0 = estimateDiv128To64(a0, a1, b0);
1064

1065
    /*
1066
     * Estimate is high because B1 was not included (unless B1 == 0).
1067
     * Reduce quotient and increase remainder until remainder is non-negative.
1068
     * This loop will execute 0 to 2 times.
1069
     */
1070
    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071
    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072
    while (r0 != 0) {
1073
        q0--;
1074
        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075
    }
1076

1077
    /* Repeat using the remainder, producing a second word of quotient. */
1078
    q1 = estimateDiv128To64(r1, r2, b0);
1079
    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080
    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081
    while (r1 != 0) {
1082
        q1--;
1083
        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084
    }
1085

1086
    /* Any remainder indicates inexact; set sticky bit. */
1087
    q1 |= (r2 | r3) != 0;
1088

1089
    a->frac_hi = q0;
1090
    a->frac_lo = q1;
1091
    return ret;
1092
}
1093

1094
#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1095

1096
static bool frac64_eqz(FloatParts64 *a)
1097
{
1098
    return a->frac == 0;
1099
}
1100

1101
static bool frac128_eqz(FloatParts128 *a)
1102
{
1103
    return (a->frac_hi | a->frac_lo) == 0;
1104
}
1105

1106
#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1107

1108
static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109
{
1110
    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111
}
1112

1113
static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114
{
1115
    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116
                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117
}
1118

1119
#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120

1121
static void frac64_neg(FloatParts64 *a)
1122
{
1123
    a->frac = -a->frac;
1124
}
1125

1126
static void frac128_neg(FloatParts128 *a)
1127
{
1128
    bool c = 0;
1129
    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130
    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131
}
1132

1133
static void frac256_neg(FloatParts256 *a)
1134
{
1135
    bool c = 0;
1136
    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137
    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138
    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139
    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140
}
1141

1142
#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1143

1144
static int frac64_normalize(FloatParts64 *a)
1145
{
1146
    if (a->frac) {
1147
        int shift = clz64(a->frac);
1148
        a->frac <<= shift;
1149
        return shift;
1150
    }
1151
    return 64;
1152
}
1153

1154
static int frac128_normalize(FloatParts128 *a)
1155
{
1156
    if (a->frac_hi) {
1157
        int shl = clz64(a->frac_hi);
1158
        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159
        a->frac_lo <<= shl;
1160
        return shl;
1161
    } else if (a->frac_lo) {
1162
        int shl = clz64(a->frac_lo);
1163
        a->frac_hi = a->frac_lo << shl;
1164
        a->frac_lo = 0;
1165
        return shl + 64;
1166
    }
1167
    return 128;
1168
}
1169

1170
static int frac256_normalize(FloatParts256 *a)
1171
{
1172
    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173
    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1174
    int ret, shl;
1175

1176
    if (likely(a0)) {
1177
        shl = clz64(a0);
1178
        if (shl == 0) {
1179
            return 0;
1180
        }
1181
        ret = shl;
1182
    } else {
1183
        if (a1) {
1184
            ret = 64;
1185
            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186
        } else if (a2) {
1187
            ret = 128;
1188
            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189
        } else if (a3) {
1190
            ret = 192;
1191
            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192
        } else {
1193
            ret = 256;
1194
            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195
            goto done;
1196
        }
1197
        shl = clz64(a0);
1198
        if (shl == 0) {
1199
            goto done;
1200
        }
1201
        ret += shl;
1202
    }
1203

1204
    a0 = shl_double(a0, a1, shl);
1205
    a1 = shl_double(a1, a2, shl);
1206
    a2 = shl_double(a2, a3, shl);
1207
    a3 <<= shl;
1208

1209
 done:
1210
    a->frac_hi = a0;
1211
    a->frac_hm = a1;
1212
    a->frac_lm = a2;
1213
    a->frac_lo = a3;
1214
    return ret;
1215
}
1216

1217
#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1218

1219
static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220
{
1221
    uint64_t a0, a1, b0, t0, t1, q, quot;
1222
    int exp_diff = a->exp - b->exp;
1223
    int shift;
1224

1225
    a0 = a->frac;
1226
    a1 = 0;
1227

1228
    if (exp_diff < -1) {
1229
        if (mod_quot) {
1230
            *mod_quot = 0;
1231
        }
1232
        return;
1233
    }
1234
    if (exp_diff == -1) {
1235
        a0 >>= 1;
1236
        exp_diff = 0;
1237
    }
1238

1239
    b0 = b->frac;
1240
    quot = q = b0 <= a0;
1241
    if (q) {
1242
        a0 -= b0;
1243
    }
1244

1245
    exp_diff -= 64;
1246
    while (exp_diff > 0) {
1247
        q = estimateDiv128To64(a0, a1, b0);
1248
        q = q > 2 ? q - 2 : 0;
1249
        mul64To128(b0, q, &t0, &t1);
1250
        sub128(a0, a1, t0, t1, &a0, &a1);
1251
        shortShift128Left(a0, a1, 62, &a0, &a1);
1252
        exp_diff -= 62;
1253
        quot = (quot << 62) + q;
1254
    }
1255

1256
    exp_diff += 64;
1257
    if (exp_diff > 0) {
1258
        q = estimateDiv128To64(a0, a1, b0);
1259
        q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260
        mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261
        sub128(a0, a1, t0, t1, &a0, &a1);
1262
        shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263
        while (le128(t0, t1, a0, a1)) {
1264
            ++q;
1265
            sub128(a0, a1, t0, t1, &a0, &a1);
1266
        }
1267
        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268
    } else {
1269
        t0 = b0;
1270
        t1 = 0;
1271
    }
1272

1273
    if (mod_quot) {
1274
        *mod_quot = quot;
1275
    } else {
1276
        sub128(t0, t1, a0, a1, &t0, &t1);
1277
        if (lt128(t0, t1, a0, a1) ||
1278
            (eq128(t0, t1, a0, a1) && (q & 1))) {
1279
            a0 = t0;
1280
            a1 = t1;
1281
            a->sign = !a->sign;
1282
        }
1283
    }
1284

1285
    if (likely(a0)) {
1286
        shift = clz64(a0);
1287
        shortShift128Left(a0, a1, shift, &a0, &a1);
1288
    } else if (likely(a1)) {
1289
        shift = clz64(a1);
1290
        a0 = a1 << shift;
1291
        a1 = 0;
1292
        shift += 64;
1293
    } else {
1294
        a->cls = float_class_zero;
1295
        return;
1296
    }
1297

1298
    a->exp = b->exp + exp_diff - shift;
1299
    a->frac = a0 | (a1 != 0);
1300
}
1301

1302
static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303
                           uint64_t *mod_quot)
1304
{
1305
    uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306
    int exp_diff = a->exp - b->exp;
1307
    int shift;
1308

1309
    a0 = a->frac_hi;
1310
    a1 = a->frac_lo;
1311
    a2 = 0;
1312

1313
    if (exp_diff < -1) {
1314
        if (mod_quot) {
1315
            *mod_quot = 0;
1316
        }
1317
        return;
1318
    }
1319
    if (exp_diff == -1) {
1320
        shift128Right(a0, a1, 1, &a0, &a1);
1321
        exp_diff = 0;
1322
    }
1323

1324
    b0 = b->frac_hi;
1325
    b1 = b->frac_lo;
1326

1327
    quot = q = le128(b0, b1, a0, a1);
1328
    if (q) {
1329
        sub128(a0, a1, b0, b1, &a0, &a1);
1330
    }
1331

1332
    exp_diff -= 64;
1333
    while (exp_diff > 0) {
1334
        q = estimateDiv128To64(a0, a1, b0);
1335
        q = q > 4 ? q - 4 : 0;
1336
        mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337
        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338
        shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339
        exp_diff -= 61;
1340
        quot = (quot << 61) + q;
1341
    }
1342

1343
    exp_diff += 64;
1344
    if (exp_diff > 0) {
1345
        q = estimateDiv128To64(a0, a1, b0);
1346
        q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347
        mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348
        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349
        shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350
        while (le192(t0, t1, t2, a0, a1, a2)) {
1351
            ++q;
1352
            sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353
        }
1354
        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355
    } else {
1356
        t0 = b0;
1357
        t1 = b1;
1358
        t2 = 0;
1359
    }
1360

1361
    if (mod_quot) {
1362
        *mod_quot = quot;
1363
    } else {
1364
        sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365
        if (lt192(t0, t1, t2, a0, a1, a2) ||
1366
            (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367
            a0 = t0;
1368
            a1 = t1;
1369
            a2 = t2;
1370
            a->sign = !a->sign;
1371
        }
1372
    }
1373

1374
    if (likely(a0)) {
1375
        shift = clz64(a0);
1376
        shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377
    } else if (likely(a1)) {
1378
        shift = clz64(a1);
1379
        shortShift128Left(a1, a2, shift, &a0, &a1);
1380
        a2 = 0;
1381
        shift += 64;
1382
    } else if (likely(a2)) {
1383
        shift = clz64(a2);
1384
        a0 = a2 << shift;
1385
        a1 = a2 = 0;
1386
        shift += 128;
1387
    } else {
1388
        a->cls = float_class_zero;
1389
        return;
1390
    }
1391

1392
    a->exp = b->exp + exp_diff - shift;
1393
    a->frac_hi = a0;
1394
    a->frac_lo = a1 | (a2 != 0);
1395
}
1396

1397
#define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398

1399
static void frac64_shl(FloatParts64 *a, int c)
1400
{
1401
    a->frac <<= c;
1402
}
1403

1404
static void frac128_shl(FloatParts128 *a, int c)
1405
{
1406
    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407

1408
    if (c & 64) {
1409
        a0 = a1, a1 = 0;
1410
    }
1411

1412
    c &= 63;
1413
    if (c) {
1414
        a0 = shl_double(a0, a1, c);
1415
        a1 = a1 << c;
1416
    }
1417

1418
    a->frac_hi = a0;
1419
    a->frac_lo = a1;
1420
}
1421

1422
#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1423

1424
static void frac64_shr(FloatParts64 *a, int c)
1425
{
1426
    a->frac >>= c;
1427
}
1428

1429
static void frac128_shr(FloatParts128 *a, int c)
1430
{
1431
    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432

1433
    if (c & 64) {
1434
        a1 = a0, a0 = 0;
1435
    }
1436

1437
    c &= 63;
1438
    if (c) {
1439
        a1 = shr_double(a0, a1, c);
1440
        a0 = a0 >> c;
1441
    }
1442

1443
    a->frac_hi = a0;
1444
    a->frac_lo = a1;
1445
}
1446

1447
#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1448

1449
static void frac64_shrjam(FloatParts64 *a, int c)
1450
{
1451
    uint64_t a0 = a->frac;
1452

1453
    if (likely(c != 0)) {
1454
        if (likely(c < 64)) {
1455
            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456
        } else {
1457
            a0 = a0 != 0;
1458
        }
1459
        a->frac = a0;
1460
    }
1461
}
1462

1463
static void frac128_shrjam(FloatParts128 *a, int c)
1464
{
1465
    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466
    uint64_t sticky = 0;
1467

1468
    if (unlikely(c == 0)) {
1469
        return;
1470
    } else if (likely(c < 64)) {
1471
        /* nothing */
1472
    } else if (likely(c < 128)) {
1473
        sticky = a1;
1474
        a1 = a0;
1475
        a0 = 0;
1476
        c &= 63;
1477
        if (c == 0) {
1478
            goto done;
1479
        }
1480
    } else {
1481
        sticky = a0 | a1;
1482
        a0 = a1 = 0;
1483
        goto done;
1484
    }
1485

1486
    sticky |= shr_double(a1, 0, c);
1487
    a1 = shr_double(a0, a1, c);
1488
    a0 = a0 >> c;
1489

1490
 done:
1491
    a->frac_lo = a1 | (sticky != 0);
1492
    a->frac_hi = a0;
1493
}
1494

1495
static void frac256_shrjam(FloatParts256 *a, int c)
1496
{
1497
    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498
    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499
    uint64_t sticky = 0;
1500

1501
    if (unlikely(c == 0)) {
1502
        return;
1503
    } else if (likely(c < 64)) {
1504
        /* nothing */
1505
    } else if (likely(c < 256)) {
1506
        if (unlikely(c & 128)) {
1507
            sticky |= a2 | a3;
1508
            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509
        }
1510
        if (unlikely(c & 64)) {
1511
            sticky |= a3;
1512
            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513
        }
1514
        c &= 63;
1515
        if (c == 0) {
1516
            goto done;
1517
        }
1518
    } else {
1519
        sticky = a0 | a1 | a2 | a3;
1520
        a0 = a1 = a2 = a3 = 0;
1521
        goto done;
1522
    }
1523

1524
    sticky |= shr_double(a3, 0, c);
1525
    a3 = shr_double(a2, a3, c);
1526
    a2 = shr_double(a1, a2, c);
1527
    a1 = shr_double(a0, a1, c);
1528
    a0 = a0 >> c;
1529

1530
 done:
1531
    a->frac_lo = a3 | (sticky != 0);
1532
    a->frac_lm = a2;
1533
    a->frac_hm = a1;
1534
    a->frac_hi = a0;
1535
}
1536

1537
#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1538

1539
static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540
{
1541
    return usub64_overflow(a->frac, b->frac, &r->frac);
1542
}
1543

1544
static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545
{
1546
    bool c = 0;
1547
    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548
    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549
    return c;
1550
}
1551

1552
static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553
{
1554
    bool c = 0;
1555
    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556
    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557
    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558
    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559
    return c;
1560
}
1561

1562
#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1563

1564
static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565
{
1566
    r->frac = a->frac_hi | (a->frac_lo != 0);
1567
}
1568

1569
static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570
{
1571
    r->frac_hi = a->frac_hi;
1572
    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573
}
1574

1575
#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576

1577
static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578
{
1579
    r->frac_hi = a->frac;
1580
    r->frac_lo = 0;
1581
}
1582

1583
static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584
{
1585
    r->frac_hi = a->frac_hi;
1586
    r->frac_hm = a->frac_lo;
1587
    r->frac_lm = 0;
1588
    r->frac_lo = 0;
1589
}
1590

1591
#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1592

1593
/*
1594
 * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1595
 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596
 * and thus MIT licenced.
1597
 */
1598
static const uint16_t rsqrt_tab[128] = {
1599
    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600
    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601
    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602
    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603
    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604
    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605
    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606
    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607
    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608
    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609
    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610
    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611
    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612
    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613
    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614
    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615
};
1616

1617
#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1618
#define FloatPartsN    glue(FloatParts,N)
1619
#define FloatPartsW    glue(FloatParts,W)
1620

1621
#define N 64
1622
#define W 128
1623

1624
#include "softfloat-parts-addsub.c.inc"
1625
#include "softfloat-parts.c.inc"
1626

1627
#undef  N
1628
#undef  W
1629
#define N 128
1630
#define W 256
1631

1632
#include "softfloat-parts-addsub.c.inc"
1633
#include "softfloat-parts.c.inc"
1634

1635
#undef  N
1636
#undef  W
1637
#define N            256
1638

1639
#include "softfloat-parts-addsub.c.inc"
1640

1641
#undef  N
1642
#undef  W
1643
#undef  partsN
1644
#undef  FloatPartsN
1645
#undef  FloatPartsW
1646

1647
/*
1648
 * Pack/unpack routines with a specific FloatFmt.
1649
 */
1650

1651
static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652
                                      float_status *s, const FloatFmt *params)
1653
{
1654
    float16_unpack_raw(p, f);
1655
    parts_canonicalize(p, s, params);
1656
}
1657

1658
static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659
                                     float_status *s)
1660
{
1661
    float16a_unpack_canonical(p, f, s, &float16_params);
1662
}
1663

1664
static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665
                                      float_status *s)
1666
{
1667
    bfloat16_unpack_raw(p, f);
1668
    parts_canonicalize(p, s, &bfloat16_params);
1669
}
1670

1671
static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672
                                             float_status *s,
1673
                                             const FloatFmt *params)
1674
{
1675
    parts_uncanon(p, s, params);
1676
    return float16_pack_raw(p);
1677
}
1678

1679
static float16 float16_round_pack_canonical(FloatParts64 *p,
1680
                                            float_status *s)
1681
{
1682
    return float16a_round_pack_canonical(p, s, &float16_params);
1683
}
1684

1685
static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686
                                              float_status *s)
1687
{
1688
    parts_uncanon(p, s, &bfloat16_params);
1689
    return bfloat16_pack_raw(p);
1690
}
1691

1692
static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693
                                     float_status *s)
1694
{
1695
    float32_unpack_raw(p, f);
1696
    parts_canonicalize(p, s, &float32_params);
1697
}
1698

1699
static float32 float32_round_pack_canonical(FloatParts64 *p,
1700
                                            float_status *s)
1701
{
1702
    parts_uncanon(p, s, &float32_params);
1703
    return float32_pack_raw(p);
1704
}
1705

1706
static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707
                                     float_status *s)
1708
{
1709
    float64_unpack_raw(p, f);
1710
    parts_canonicalize(p, s, &float64_params);
1711
}
1712

1713
static float64 float64_round_pack_canonical(FloatParts64 *p,
1714
                                            float_status *s)
1715
{
1716
    parts_uncanon(p, s, &float64_params);
1717
    return float64_pack_raw(p);
1718
}
1719

1720
static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721
                                               float_status *s)
1722
{
1723
    parts_uncanon(p, s, &float32_params);
1724

1725
    /*
1726
     * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727
     * We need to adjust the fraction higher so that the least N bits are
1728
     * zero, and the fraction is adjacent to the float64 implicit bit.
1729
     */
1730
    switch (p->cls) {
1731
    case float_class_normal:
1732
        if (unlikely(p->exp == 0)) {
1733
            /*
1734
             * The result is denormal for float32, but can be represented
1735
             * in normalized form for float64.  Adjust, per canonicalize.
1736
             */
1737
            int shift = frac_normalize(p);
1738
            p->exp = (float32_params.frac_shift -
1739
                      float32_params.exp_bias - shift + 1 +
1740
                      float64_params.exp_bias);
1741
            frac_shr(p, float64_params.frac_shift);
1742
        } else {
1743
            frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744
            p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745
        }
1746
        break;
1747
    case float_class_snan:
1748
    case float_class_qnan:
1749
        frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750
        p->exp = float64_params.exp_max;
1751
        break;
1752
    case float_class_inf:
1753
        p->exp = float64_params.exp_max;
1754
        break;
1755
    case float_class_zero:
1756
        break;
1757
    default:
1758
        g_assert_not_reached();
1759
    }
1760

1761
    return float64_pack_raw(p);
1762
}
1763

1764
static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765
                                      float_status *s)
1766
{
1767
    float128_unpack_raw(p, f);
1768
    parts_canonicalize(p, s, &float128_params);
1769
}
1770

1771
static float128 float128_round_pack_canonical(FloatParts128 *p,
1772
                                              float_status *s)
1773
{
1774
    parts_uncanon(p, s, &float128_params);
1775
    return float128_pack_raw(p);
1776
}
1777

1778
/* Returns false if the encoding is invalid. */
1779
static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780
                                      float_status *s)
1781
{
1782
    /* Ensure rounding precision is set before beginning. */
1783
    switch (s->floatx80_rounding_precision) {
1784
    case floatx80_precision_x:
1785
    case floatx80_precision_d:
1786
    case floatx80_precision_s:
1787
        break;
1788
    default:
1789
        g_assert_not_reached();
1790
    }
1791

1792
    if (unlikely(floatx80_invalid_encoding(f))) {
1793
        float_raise(float_flag_invalid, s);
1794
        return false;
1795
    }
1796

1797
    floatx80_unpack_raw(p, f);
1798

1799
    if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800
        parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801
    } else {
1802
        /* The explicit integer bit is ignored, after invalid checks. */
1803
        p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804
        p->cls = (p->frac_hi == 0 ? float_class_inf
1805
                  : parts_is_snan_frac(p->frac_hi, s)
1806
                  ? float_class_snan : float_class_qnan);
1807
    }
1808
    return true;
1809
}
1810

1811
static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812
                                              float_status *s)
1813
{
1814
    const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815
    uint64_t frac;
1816
    int exp;
1817

1818
    switch (p->cls) {
1819
    case float_class_normal:
1820
        if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821
            parts_uncanon_normal(p, s, fmt);
1822
            frac = p->frac_hi;
1823
            exp = p->exp;
1824
        } else {
1825
            FloatParts64 p64;
1826

1827
            p64.sign = p->sign;
1828
            p64.exp = p->exp;
1829
            frac_truncjam(&p64, p);
1830
            parts_uncanon_normal(&p64, s, fmt);
1831
            frac = p64.frac;
1832
            exp = p64.exp;
1833
        }
1834
        if (exp != fmt->exp_max) {
1835
            break;
1836
        }
1837
        /* rounded to inf -- fall through to set frac correctly */
1838

1839
    case float_class_inf:
1840
        /* x86 and m68k differ in the setting of the integer bit. */
1841
        frac = floatx80_infinity_low;
1842
        exp = fmt->exp_max;
1843
        break;
1844

1845
    case float_class_zero:
1846
        frac = 0;
1847
        exp = 0;
1848
        break;
1849

1850
    case float_class_snan:
1851
    case float_class_qnan:
1852
        /* NaNs have the integer bit set. */
1853
        frac = p->frac_hi | (1ull << 63);
1854
        exp = fmt->exp_max;
1855
        break;
1856

1857
    default:
1858
        g_assert_not_reached();
1859
    }
1860

1861
    return packFloatx80(p->sign, exp, frac);
1862
}
1863

1864
/*
1865
 * Addition and subtraction
1866
 */
1867

1868
static float16 QEMU_FLATTEN
1869
float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1870
{
1871
    FloatParts64 pa, pb, *pr;
1872

1873
    float16_unpack_canonical(&pa, a, status);
1874
    float16_unpack_canonical(&pb, b, status);
1875
    pr = parts_addsub(&pa, &pb, status, subtract);
1876

1877
    return float16_round_pack_canonical(pr, status);
1878
}
1879

1880
float16 float16_add(float16 a, float16 b, float_status *status)
1881
{
1882
    return float16_addsub(a, b, status, false);
1883
}
1884

1885
float16 float16_sub(float16 a, float16 b, float_status *status)
1886
{
1887
    return float16_addsub(a, b, status, true);
1888
}
1889

1890
static float32 QEMU_SOFTFLOAT_ATTR
1891
soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1892
{
1893
    FloatParts64 pa, pb, *pr;
1894

1895
    float32_unpack_canonical(&pa, a, status);
1896
    float32_unpack_canonical(&pb, b, status);
1897
    pr = parts_addsub(&pa, &pb, status, subtract);
1898

1899
    return float32_round_pack_canonical(pr, status);
1900
}
1901

1902
static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1903
{
1904
    return soft_f32_addsub(a, b, status, false);
1905
}
1906

1907
static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1908
{
1909
    return soft_f32_addsub(a, b, status, true);
1910
}
1911

1912
static float64 QEMU_SOFTFLOAT_ATTR
1913
soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1914
{
1915
    FloatParts64 pa, pb, *pr;
1916

1917
    float64_unpack_canonical(&pa, a, status);
1918
    float64_unpack_canonical(&pb, b, status);
1919
    pr = parts_addsub(&pa, &pb, status, subtract);
1920

1921
    return float64_round_pack_canonical(pr, status);
1922
}
1923

1924
static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1925
{
1926
    return soft_f64_addsub(a, b, status, false);
1927
}
1928

1929
static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1930
{
1931
    return soft_f64_addsub(a, b, status, true);
1932
}
1933

1934
static float hard_f32_add(float a, float b)
1935
{
1936
    return a + b;
1937
}
1938

1939
static float hard_f32_sub(float a, float b)
1940
{
1941
    return a - b;
1942
}
1943

1944
static double hard_f64_add(double a, double b)
1945
{
1946
    return a + b;
1947
}
1948

1949
static double hard_f64_sub(double a, double b)
1950
{
1951
    return a - b;
1952
}
1953

1954
static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1955
{
1956
    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957
        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958
    }
1959
    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960
}
1961

1962
static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1963
{
1964
    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965
        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966
    } else {
1967
        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968
    }
1969
}
1970

1971
static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972
                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973
{
1974
    return float32_gen2(a, b, s, hard, soft,
1975
                        f32_is_zon2, f32_addsubmul_post);
1976
}
1977

1978
static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979
                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980
{
1981
    return float64_gen2(a, b, s, hard, soft,
1982
                        f64_is_zon2, f64_addsubmul_post);
1983
}
1984

1985
float32 QEMU_FLATTEN
1986
float32_add(float32 a, float32 b, float_status *s)
1987
{
1988
    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989
}
1990

1991
float32 QEMU_FLATTEN
1992
float32_sub(float32 a, float32 b, float_status *s)
1993
{
1994
    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995
}
1996

1997
float64 QEMU_FLATTEN
1998
float64_add(float64 a, float64 b, float_status *s)
1999
{
2000
    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001
}
2002

2003
float64 QEMU_FLATTEN
2004
float64_sub(float64 a, float64 b, float_status *s)
2005
{
2006
    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2007
}
2008

2009
static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010
                                 bool subtract)
2011
{
2012
    FloatParts64 pa, pb, *pr;
2013

2014
    float64_unpack_canonical(&pa, a, status);
2015
    float64_unpack_canonical(&pb, b, status);
2016
    pr = parts_addsub(&pa, &pb, status, subtract);
2017

2018
    return float64r32_round_pack_canonical(pr, status);
2019
}
2020

2021
float64 float64r32_add(float64 a, float64 b, float_status *status)
2022
{
2023
    return float64r32_addsub(a, b, status, false);
2024
}
2025

2026
float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027
{
2028
    return float64r32_addsub(a, b, status, true);
2029
}
2030

2031
static bfloat16 QEMU_FLATTEN
2032
bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2033
{
2034
    FloatParts64 pa, pb, *pr;
2035

2036
    bfloat16_unpack_canonical(&pa, a, status);
2037
    bfloat16_unpack_canonical(&pb, b, status);
2038
    pr = parts_addsub(&pa, &pb, status, subtract);
2039

2040
    return bfloat16_round_pack_canonical(pr, status);
2041
}
2042

2043
bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2044
{
2045
    return bfloat16_addsub(a, b, status, false);
2046
}
2047

2048
bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049
{
2050
    return bfloat16_addsub(a, b, status, true);
2051
}
2052

2053
static float128 QEMU_FLATTEN
2054
float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055
{
2056
    FloatParts128 pa, pb, *pr;
2057

2058
    float128_unpack_canonical(&pa, a, status);
2059
    float128_unpack_canonical(&pb, b, status);
2060
    pr = parts_addsub(&pa, &pb, status, subtract);
2061

2062
    return float128_round_pack_canonical(pr, status);
2063
}
2064

2065
float128 float128_add(float128 a, float128 b, float_status *status)
2066
{
2067
    return float128_addsub(a, b, status, false);
2068
}
2069

2070
float128 float128_sub(float128 a, float128 b, float_status *status)
2071
{
2072
    return float128_addsub(a, b, status, true);
2073
}
2074

2075
static floatx80 QEMU_FLATTEN
2076
floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077
{
2078
    FloatParts128 pa, pb, *pr;
2079

2080
    if (!floatx80_unpack_canonical(&pa, a, status) ||
2081
        !floatx80_unpack_canonical(&pb, b, status)) {
2082
        return floatx80_default_nan(status);
2083
    }
2084

2085
    pr = parts_addsub(&pa, &pb, status, subtract);
2086
    return floatx80_round_pack_canonical(pr, status);
2087
}
2088

2089
floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090
{
2091
    return floatx80_addsub(a, b, status, false);
2092
}
2093

2094
floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095
{
2096
    return floatx80_addsub(a, b, status, true);
2097
}
2098

2099
/*
2100
 * Multiplication
2101
 */
2102

2103
float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2104
{
2105
    FloatParts64 pa, pb, *pr;
2106

2107
    float16_unpack_canonical(&pa, a, status);
2108
    float16_unpack_canonical(&pb, b, status);
2109
    pr = parts_mul(&pa, &pb, status);
2110

2111
    return float16_round_pack_canonical(pr, status);
2112
}
2113

2114
static float32 QEMU_SOFTFLOAT_ATTR
2115
soft_f32_mul(float32 a, float32 b, float_status *status)
2116
{
2117
    FloatParts64 pa, pb, *pr;
2118

2119
    float32_unpack_canonical(&pa, a, status);
2120
    float32_unpack_canonical(&pb, b, status);
2121
    pr = parts_mul(&pa, &pb, status);
2122

2123
    return float32_round_pack_canonical(pr, status);
2124
}
2125

2126
static float64 QEMU_SOFTFLOAT_ATTR
2127
soft_f64_mul(float64 a, float64 b, float_status *status)
2128
{
2129
    FloatParts64 pa, pb, *pr;
2130

2131
    float64_unpack_canonical(&pa, a, status);
2132
    float64_unpack_canonical(&pb, b, status);
2133
    pr = parts_mul(&pa, &pb, status);
2134

2135
    return float64_round_pack_canonical(pr, status);
2136
}
2137

2138
static float hard_f32_mul(float a, float b)
2139
{
2140
    return a * b;
2141
}
2142

2143
static double hard_f64_mul(double a, double b)
2144
{
2145
    return a * b;
2146
}
2147

2148
float32 QEMU_FLATTEN
2149
float32_mul(float32 a, float32 b, float_status *s)
2150
{
2151
    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2152
                        f32_is_zon2, f32_addsubmul_post);
2153
}
2154

2155
float64 QEMU_FLATTEN
2156
float64_mul(float64 a, float64 b, float_status *s)
2157
{
2158
    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2159
                        f64_is_zon2, f64_addsubmul_post);
2160
}
2161

2162
float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163
{
2164
    FloatParts64 pa, pb, *pr;
2165

2166
    float64_unpack_canonical(&pa, a, status);
2167
    float64_unpack_canonical(&pb, b, status);
2168
    pr = parts_mul(&pa, &pb, status);
2169

2170
    return float64r32_round_pack_canonical(pr, status);
2171
}
2172

2173
bfloat16 QEMU_FLATTEN
2174
bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2175
{
2176
    FloatParts64 pa, pb, *pr;
2177

2178
    bfloat16_unpack_canonical(&pa, a, status);
2179
    bfloat16_unpack_canonical(&pb, b, status);
2180
    pr = parts_mul(&pa, &pb, status);
2181

2182
    return bfloat16_round_pack_canonical(pr, status);
2183
}
2184

2185
float128 QEMU_FLATTEN
2186
float128_mul(float128 a, float128 b, float_status *status)
2187
{
2188
    FloatParts128 pa, pb, *pr;
2189

2190
    float128_unpack_canonical(&pa, a, status);
2191
    float128_unpack_canonical(&pb, b, status);
2192
    pr = parts_mul(&pa, &pb, status);
2193

2194
    return float128_round_pack_canonical(pr, status);
2195
}
2196

2197
floatx80 QEMU_FLATTEN
2198
floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199
{
2200
    FloatParts128 pa, pb, *pr;
2201

2202
    if (!floatx80_unpack_canonical(&pa, a, status) ||
2203
        !floatx80_unpack_canonical(&pb, b, status)) {
2204
        return floatx80_default_nan(status);
2205
    }
2206

2207
    pr = parts_mul(&pa, &pb, status);
2208
    return floatx80_round_pack_canonical(pr, status);
2209
}
2210

2211
/*
2212
 * Fused multiply-add
2213
 */
2214

2215
float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2216
                                    int flags, float_status *status)
2217
{
2218
    FloatParts64 pa, pb, pc, *pr;
2219

2220
    float16_unpack_canonical(&pa, a, status);
2221
    float16_unpack_canonical(&pb, b, status);
2222
    float16_unpack_canonical(&pc, c, status);
2223
    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2224

2225
    return float16_round_pack_canonical(pr, status);
2226
}
2227

2228
static float32 QEMU_SOFTFLOAT_ATTR
2229
soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2230
                float_status *status)
2231
{
2232
    FloatParts64 pa, pb, pc, *pr;
2233

2234
    float32_unpack_canonical(&pa, a, status);
2235
    float32_unpack_canonical(&pb, b, status);
2236
    float32_unpack_canonical(&pc, c, status);
2237
    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2238

2239
    return float32_round_pack_canonical(pr, status);
2240
}
2241

2242
static float64 QEMU_SOFTFLOAT_ATTR
2243
soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2244
                float_status *status)
2245
{
2246
    FloatParts64 pa, pb, pc, *pr;
2247

2248
    float64_unpack_canonical(&pa, a, status);
2249
    float64_unpack_canonical(&pb, b, status);
2250
    float64_unpack_canonical(&pc, c, status);
2251
    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2252

2253
    return float64_round_pack_canonical(pr, status);
2254
}
2255

2256
static bool force_soft_fma;
2257

2258
float32 QEMU_FLATTEN
2259
float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2260
{
2261
    union_float32 ua, ub, uc, ur;
2262

2263
    ua.s = xa;
2264
    ub.s = xb;
2265
    uc.s = xc;
2266

2267
    if (unlikely(!can_use_fpu(s))) {
2268
        goto soft;
2269
    }
2270
    if (unlikely(flags & float_muladd_halve_result)) {
2271
        goto soft;
2272
    }
2273

2274
    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2275
    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2276
        goto soft;
2277
    }
2278

2279
    if (unlikely(force_soft_fma)) {
2280
        goto soft;
2281
    }
2282

2283
    /*
2284
     * When (a || b) == 0, there's no need to check for under/over flow,
2285
     * since we know the addend is (normal || 0) and the product is 0.
2286
     */
2287
    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2288
        union_float32 up;
2289
        bool prod_sign;
2290

2291
        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2292
        prod_sign ^= !!(flags & float_muladd_negate_product);
2293
        up.s = float32_set_sign(float32_zero, prod_sign);
2294

2295
        if (flags & float_muladd_negate_c) {
2296
            uc.h = -uc.h;
2297
        }
2298
        ur.h = up.h + uc.h;
2299
    } else {
2300
        union_float32 ua_orig = ua;
2301
        union_float32 uc_orig = uc;
2302

2303
        if (flags & float_muladd_negate_product) {
2304
            ua.h = -ua.h;
2305
        }
2306
        if (flags & float_muladd_negate_c) {
2307
            uc.h = -uc.h;
2308
        }
2309

2310
        ur.h = fmaf(ua.h, ub.h, uc.h);
2311

2312
        if (unlikely(f32_is_inf(ur))) {
2313
            float_raise(float_flag_overflow, s);
2314
        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2315
            ua = ua_orig;
2316
            uc = uc_orig;
2317
            goto soft;
2318
        }
2319
    }
2320
    if (flags & float_muladd_negate_result) {
2321
        return float32_chs(ur.s);
2322
    }
2323
    return ur.s;
2324

2325
 soft:
2326
    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2327
}
2328

2329
float64 QEMU_FLATTEN
2330
float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2331
{
2332
    union_float64 ua, ub, uc, ur;
2333

2334
    ua.s = xa;
2335
    ub.s = xb;
2336
    uc.s = xc;
2337

2338
    if (unlikely(!can_use_fpu(s))) {
2339
        goto soft;
2340
    }
2341
    if (unlikely(flags & float_muladd_halve_result)) {
2342
        goto soft;
2343
    }
2344

2345
    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2346
    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2347
        goto soft;
2348
    }
2349

2350
    if (unlikely(force_soft_fma)) {
2351
        goto soft;
2352
    }
2353

2354
    /*
2355
     * When (a || b) == 0, there's no need to check for under/over flow,
2356
     * since we know the addend is (normal || 0) and the product is 0.
2357
     */
2358
    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2359
        union_float64 up;
2360
        bool prod_sign;
2361

2362
        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2363
        prod_sign ^= !!(flags & float_muladd_negate_product);
2364
        up.s = float64_set_sign(float64_zero, prod_sign);
2365

2366
        if (flags & float_muladd_negate_c) {
2367
            uc.h = -uc.h;
2368
        }
2369
        ur.h = up.h + uc.h;
2370
    } else {
2371
        union_float64 ua_orig = ua;
2372
        union_float64 uc_orig = uc;
2373

2374
        if (flags & float_muladd_negate_product) {
2375
            ua.h = -ua.h;
2376
        }
2377
        if (flags & float_muladd_negate_c) {
2378
            uc.h = -uc.h;
2379
        }
2380

2381
        ur.h = fma(ua.h, ub.h, uc.h);
2382

2383
        if (unlikely(f64_is_inf(ur))) {
2384
            float_raise(float_flag_overflow, s);
2385
        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2386
            ua = ua_orig;
2387
            uc = uc_orig;
2388
            goto soft;
2389
        }
2390
    }
2391
    if (flags & float_muladd_negate_result) {
2392
        return float64_chs(ur.s);
2393
    }
2394
    return ur.s;
2395

2396
 soft:
2397
    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2398
}
2399

2400
float64 float64r32_muladd(float64 a, float64 b, float64 c,
2401
                          int flags, float_status *status)
2402
{
2403
    FloatParts64 pa, pb, pc, *pr;
2404

2405
    float64_unpack_canonical(&pa, a, status);
2406
    float64_unpack_canonical(&pb, b, status);
2407
    float64_unpack_canonical(&pc, c, status);
2408
    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2409

2410
    return float64r32_round_pack_canonical(pr, status);
2411
}
2412

2413
bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2414
                                      int flags, float_status *status)
2415
{
2416
    FloatParts64 pa, pb, pc, *pr;
2417

2418
    bfloat16_unpack_canonical(&pa, a, status);
2419
    bfloat16_unpack_canonical(&pb, b, status);
2420
    bfloat16_unpack_canonical(&pc, c, status);
2421
    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2422

2423
    return bfloat16_round_pack_canonical(pr, status);
2424
}
2425

2426
float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2427
                                      int flags, float_status *status)
2428
{
2429
    FloatParts128 pa, pb, pc, *pr;
2430

2431
    float128_unpack_canonical(&pa, a, status);
2432
    float128_unpack_canonical(&pb, b, status);
2433
    float128_unpack_canonical(&pc, c, status);
2434
    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2435

2436
    return float128_round_pack_canonical(pr, status);
2437
}
2438

2439
/*
2440
 * Division
2441
 */
2442

2443
float16 float16_div(float16 a, float16 b, float_status *status)
2444
{
2445
    FloatParts64 pa, pb, *pr;
2446

2447
    float16_unpack_canonical(&pa, a, status);
2448
    float16_unpack_canonical(&pb, b, status);
2449
    pr = parts_div(&pa, &pb, status);
2450

2451
    return float16_round_pack_canonical(pr, status);
2452
}
2453

2454
static float32 QEMU_SOFTFLOAT_ATTR
2455
soft_f32_div(float32 a, float32 b, float_status *status)
2456
{
2457
    FloatParts64 pa, pb, *pr;
2458

2459
    float32_unpack_canonical(&pa, a, status);
2460
    float32_unpack_canonical(&pb, b, status);
2461
    pr = parts_div(&pa, &pb, status);
2462

2463
    return float32_round_pack_canonical(pr, status);
2464
}
2465

2466
static float64 QEMU_SOFTFLOAT_ATTR
2467
soft_f64_div(float64 a, float64 b, float_status *status)
2468
{
2469
    FloatParts64 pa, pb, *pr;
2470

2471
    float64_unpack_canonical(&pa, a, status);
2472
    float64_unpack_canonical(&pb, b, status);
2473
    pr = parts_div(&pa, &pb, status);
2474

2475
    return float64_round_pack_canonical(pr, status);
2476
}
2477

2478
static float hard_f32_div(float a, float b)
2479
{
2480
    return a / b;
2481
}
2482

2483
static double hard_f64_div(double a, double b)
2484
{
2485
    return a / b;
2486
}
2487

2488
static bool f32_div_pre(union_float32 a, union_float32 b)
2489
{
2490
    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2491
        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2492
               fpclassify(b.h) == FP_NORMAL;
2493
    }
2494
    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2495
}
2496

2497
static bool f64_div_pre(union_float64 a, union_float64 b)
2498
{
2499
    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2500
        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2501
               fpclassify(b.h) == FP_NORMAL;
2502
    }
2503
    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2504
}
2505

2506
static bool f32_div_post(union_float32 a, union_float32 b)
2507
{
2508
    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2509
        return fpclassify(a.h) != FP_ZERO;
2510
    }
2511
    return !float32_is_zero(a.s);
2512
}
2513

2514
static bool f64_div_post(union_float64 a, union_float64 b)
2515
{
2516
    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2517
        return fpclassify(a.h) != FP_ZERO;
2518
    }
2519
    return !float64_is_zero(a.s);
2520
}
2521

2522
float32 QEMU_FLATTEN
2523
float32_div(float32 a, float32 b, float_status *s)
2524
{
2525
    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2526
                        f32_div_pre, f32_div_post);
2527
}
2528

2529
float64 QEMU_FLATTEN
2530
float64_div(float64 a, float64 b, float_status *s)
2531
{
2532
    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2533
                        f64_div_pre, f64_div_post);
2534
}
2535

2536
float64 float64r32_div(float64 a, float64 b, float_status *status)
2537
{
2538
    FloatParts64 pa, pb, *pr;
2539

2540
    float64_unpack_canonical(&pa, a, status);
2541
    float64_unpack_canonical(&pb, b, status);
2542
    pr = parts_div(&pa, &pb, status);
2543

2544
    return float64r32_round_pack_canonical(pr, status);
2545
}
2546

2547
bfloat16 QEMU_FLATTEN
2548
bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2549
{
2550
    FloatParts64 pa, pb, *pr;
2551

2552
    bfloat16_unpack_canonical(&pa, a, status);
2553
    bfloat16_unpack_canonical(&pb, b, status);
2554
    pr = parts_div(&pa, &pb, status);
2555

2556
    return bfloat16_round_pack_canonical(pr, status);
2557
}
2558

2559
float128 QEMU_FLATTEN
2560
float128_div(float128 a, float128 b, float_status *status)
2561
{
2562
    FloatParts128 pa, pb, *pr;
2563

2564
    float128_unpack_canonical(&pa, a, status);
2565
    float128_unpack_canonical(&pb, b, status);
2566
    pr = parts_div(&pa, &pb, status);
2567

2568
    return float128_round_pack_canonical(pr, status);
2569
}
2570

2571
floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2572
{
2573
    FloatParts128 pa, pb, *pr;
2574

2575
    if (!floatx80_unpack_canonical(&pa, a, status) ||
2576
        !floatx80_unpack_canonical(&pb, b, status)) {
2577
        return floatx80_default_nan(status);
2578
    }
2579

2580
    pr = parts_div(&pa, &pb, status);
2581
    return floatx80_round_pack_canonical(pr, status);
2582
}
2583

2584
/*
2585
 * Remainder
2586
 */
2587

2588
float32 float32_rem(float32 a, float32 b, float_status *status)
2589
{
2590
    FloatParts64 pa, pb, *pr;
2591

2592
    float32_unpack_canonical(&pa, a, status);
2593
    float32_unpack_canonical(&pb, b, status);
2594
    pr = parts_modrem(&pa, &pb, NULL, status);
2595

2596
    return float32_round_pack_canonical(pr, status);
2597
}
2598

2599
float64 float64_rem(float64 a, float64 b, float_status *status)
2600
{
2601
    FloatParts64 pa, pb, *pr;
2602

2603
    float64_unpack_canonical(&pa, a, status);
2604
    float64_unpack_canonical(&pb, b, status);
2605
    pr = parts_modrem(&pa, &pb, NULL, status);
2606

2607
    return float64_round_pack_canonical(pr, status);
2608
}
2609

2610
float128 float128_rem(float128 a, float128 b, float_status *status)
2611
{
2612
    FloatParts128 pa, pb, *pr;
2613

2614
    float128_unpack_canonical(&pa, a, status);
2615
    float128_unpack_canonical(&pb, b, status);
2616
    pr = parts_modrem(&pa, &pb, NULL, status);
2617

2618
    return float128_round_pack_canonical(pr, status);
2619
}
2620

2621
/*
2622
 * Returns the remainder of the extended double-precision floating-point value
2623
 * `a' with respect to the corresponding value `b'.
2624
 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2625
 * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2626
 * the remainder based on truncating the quotient toward zero instead and
2627
 * *quotient is set to the low 64 bits of the absolute value of the integer
2628
 * quotient.
2629
 */
2630
floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2631
                         uint64_t *quotient, float_status *status)
2632
{
2633
    FloatParts128 pa, pb, *pr;
2634

2635
    *quotient = 0;
2636
    if (!floatx80_unpack_canonical(&pa, a, status) ||
2637
        !floatx80_unpack_canonical(&pb, b, status)) {
2638
        return floatx80_default_nan(status);
2639
    }
2640
    pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2641

2642
    return floatx80_round_pack_canonical(pr, status);
2643
}
2644

2645
floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2646
{
2647
    uint64_t quotient;
2648
    return floatx80_modrem(a, b, false, &quotient, status);
2649
}
2650

2651
floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2652
{
2653
    uint64_t quotient;
2654
    return floatx80_modrem(a, b, true, &quotient, status);
2655
}
2656

2657
/*
2658
 * Float to Float conversions
2659
 *
2660
 * Returns the result of converting one float format to another. The
2661
 * conversion is performed according to the IEC/IEEE Standard for
2662
 * Binary Floating-Point Arithmetic.
2663
 *
2664
 * Usually this only needs to take care of raising invalid exceptions
2665
 * and handling the conversion on NaNs.
2666
 */
2667

2668
static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2669
{
2670
    switch (a->cls) {
2671
    case float_class_snan:
2672
        float_raise(float_flag_invalid_snan, s);
2673
        /* fall through */
2674
    case float_class_qnan:
2675
        /*
2676
         * There is no NaN in the destination format.  Raise Invalid
2677
         * and return a zero with the sign of the input NaN.
2678
         */
2679
        float_raise(float_flag_invalid, s);
2680
        a->cls = float_class_zero;
2681
        break;
2682

2683
    case float_class_inf:
2684
        /*
2685
         * There is no Inf in the destination format.  Raise Invalid
2686
         * and return the maximum normal with the correct sign.
2687
         */
2688
        float_raise(float_flag_invalid, s);
2689
        a->cls = float_class_normal;
2690
        a->exp = float16_params_ahp.exp_max;
2691
        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2692
                                  float16_params_ahp.frac_size + 1);
2693
        break;
2694

2695
    case float_class_normal:
2696
    case float_class_zero:
2697
        break;
2698

2699
    default:
2700
        g_assert_not_reached();
2701
    }
2702
}
2703

2704
static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2705
{
2706
    if (is_nan(a->cls)) {
2707
        parts_return_nan(a, s);
2708
    }
2709
}
2710

2711
static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2712
{
2713
    if (is_nan(a->cls)) {
2714
        parts_return_nan(a, s);
2715
    }
2716
}
2717

2718
#define parts_float_to_float(P, S) \
2719
    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2720

2721
static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2722
                                        float_status *s)
2723
{
2724
    a->cls = b->cls;
2725
    a->sign = b->sign;
2726
    a->exp = b->exp;
2727

2728
    if (a->cls == float_class_normal) {
2729
        frac_truncjam(a, b);
2730
    } else if (is_nan(a->cls)) {
2731
        /* Discard the low bits of the NaN. */
2732
        a->frac = b->frac_hi;
2733
        parts_return_nan(a, s);
2734
    }
2735
}
2736

2737
static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2738
                                       float_status *s)
2739
{
2740
    a->cls = b->cls;
2741
    a->sign = b->sign;
2742
    a->exp = b->exp;
2743
    frac_widen(a, b);
2744

2745
    if (is_nan(a->cls)) {
2746
        parts_return_nan(a, s);
2747
    }
2748
}
2749

2750
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2751
{
2752
    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2753
    FloatParts64 p;
2754

2755
    float16a_unpack_canonical(&p, a, s, fmt16);
2756
    parts_float_to_float(&p, s);
2757
    return float32_round_pack_canonical(&p, s);
2758
}
2759

2760
float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2761
{
2762
    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2763
    FloatParts64 p;
2764

2765
    float16a_unpack_canonical(&p, a, s, fmt16);
2766
    parts_float_to_float(&p, s);
2767
    return float64_round_pack_canonical(&p, s);
2768
}
2769

2770
float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2771
{
2772
    FloatParts64 p;
2773
    const FloatFmt *fmt;
2774

2775
    float32_unpack_canonical(&p, a, s);
2776
    if (ieee) {
2777
        parts_float_to_float(&p, s);
2778
        fmt = &float16_params;
2779
    } else {
2780
        parts_float_to_ahp(&p, s);
2781
        fmt = &float16_params_ahp;
2782
    }
2783
    return float16a_round_pack_canonical(&p, s, fmt);
2784
}
2785

2786
static float64 QEMU_SOFTFLOAT_ATTR
2787
soft_float32_to_float64(float32 a, float_status *s)
2788
{
2789
    FloatParts64 p;
2790

2791
    float32_unpack_canonical(&p, a, s);
2792
    parts_float_to_float(&p, s);
2793
    return float64_round_pack_canonical(&p, s);
2794
}
2795

2796
float64 float32_to_float64(float32 a, float_status *s)
2797
{
2798
    if (likely(float32_is_normal(a))) {
2799
        /* Widening conversion can never produce inexact results.  */
2800
        union_float32 uf;
2801
        union_float64 ud;
2802
        uf.s = a;
2803
        ud.h = uf.h;
2804
        return ud.s;
2805
    } else if (float32_is_zero(a)) {
2806
        return float64_set_sign(float64_zero, float32_is_neg(a));
2807
    } else {
2808
        return soft_float32_to_float64(a, s);
2809
    }
2810
}
2811

2812
float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2813
{
2814
    FloatParts64 p;
2815
    const FloatFmt *fmt;
2816

2817
    float64_unpack_canonical(&p, a, s);
2818
    if (ieee) {
2819
        parts_float_to_float(&p, s);
2820
        fmt = &float16_params;
2821
    } else {
2822
        parts_float_to_ahp(&p, s);
2823
        fmt = &float16_params_ahp;
2824
    }
2825
    return float16a_round_pack_canonical(&p, s, fmt);
2826
}
2827

2828
float32 float64_to_float32(float64 a, float_status *s)
2829
{
2830
    FloatParts64 p;
2831

2832
    float64_unpack_canonical(&p, a, s);
2833
    parts_float_to_float(&p, s);
2834
    return float32_round_pack_canonical(&p, s);
2835
}
2836

2837
float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2838
{
2839
    FloatParts64 p;
2840

2841
    bfloat16_unpack_canonical(&p, a, s);
2842
    parts_float_to_float(&p, s);
2843
    return float32_round_pack_canonical(&p, s);
2844
}
2845

2846
float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2847
{
2848
    FloatParts64 p;
2849

2850
    bfloat16_unpack_canonical(&p, a, s);
2851
    parts_float_to_float(&p, s);
2852
    return float64_round_pack_canonical(&p, s);
2853
}
2854

2855
bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2856
{
2857
    FloatParts64 p;
2858

2859
    float32_unpack_canonical(&p, a, s);
2860
    parts_float_to_float(&p, s);
2861
    return bfloat16_round_pack_canonical(&p, s);
2862
}
2863

2864
bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2865
{
2866
    FloatParts64 p;
2867

2868
    float64_unpack_canonical(&p, a, s);
2869
    parts_float_to_float(&p, s);
2870
    return bfloat16_round_pack_canonical(&p, s);
2871
}
2872

2873
float32 float128_to_float32(float128 a, float_status *s)
2874
{
2875
    FloatParts64 p64;
2876
    FloatParts128 p128;
2877

2878
    float128_unpack_canonical(&p128, a, s);
2879
    parts_float_to_float_narrow(&p64, &p128, s);
2880
    return float32_round_pack_canonical(&p64, s);
2881
}
2882

2883
float64 float128_to_float64(float128 a, float_status *s)
2884
{
2885
    FloatParts64 p64;
2886
    FloatParts128 p128;
2887

2888
    float128_unpack_canonical(&p128, a, s);
2889
    parts_float_to_float_narrow(&p64, &p128, s);
2890
    return float64_round_pack_canonical(&p64, s);
2891
}
2892

2893
float128 float32_to_float128(float32 a, float_status *s)
2894
{
2895
    FloatParts64 p64;
2896
    FloatParts128 p128;
2897

2898
    float32_unpack_canonical(&p64, a, s);
2899
    parts_float_to_float_widen(&p128, &p64, s);
2900
    return float128_round_pack_canonical(&p128, s);
2901
}
2902

2903
float128 float64_to_float128(float64 a, float_status *s)
2904
{
2905
    FloatParts64 p64;
2906
    FloatParts128 p128;
2907

2908
    float64_unpack_canonical(&p64, a, s);
2909
    parts_float_to_float_widen(&p128, &p64, s);
2910
    return float128_round_pack_canonical(&p128, s);
2911
}
2912

2913
float32 floatx80_to_float32(floatx80 a, float_status *s)
2914
{
2915
    FloatParts64 p64;
2916
    FloatParts128 p128;
2917

2918
    if (floatx80_unpack_canonical(&p128, a, s)) {
2919
        parts_float_to_float_narrow(&p64, &p128, s);
2920
    } else {
2921
        parts_default_nan(&p64, s);
2922
    }
2923
    return float32_round_pack_canonical(&p64, s);
2924
}
2925

2926
float64 floatx80_to_float64(floatx80 a, float_status *s)
2927
{
2928
    FloatParts64 p64;
2929
    FloatParts128 p128;
2930

2931
    if (floatx80_unpack_canonical(&p128, a, s)) {
2932
        parts_float_to_float_narrow(&p64, &p128, s);
2933
    } else {
2934
        parts_default_nan(&p64, s);
2935
    }
2936
    return float64_round_pack_canonical(&p64, s);
2937
}
2938

2939
float128 floatx80_to_float128(floatx80 a, float_status *s)
2940
{
2941
    FloatParts128 p;
2942

2943
    if (floatx80_unpack_canonical(&p, a, s)) {
2944
        parts_float_to_float(&p, s);
2945
    } else {
2946
        parts_default_nan(&p, s);
2947
    }
2948
    return float128_round_pack_canonical(&p, s);
2949
}
2950

2951
floatx80 float32_to_floatx80(float32 a, float_status *s)
2952
{
2953
    FloatParts64 p64;
2954
    FloatParts128 p128;
2955

2956
    float32_unpack_canonical(&p64, a, s);
2957
    parts_float_to_float_widen(&p128, &p64, s);
2958
    return floatx80_round_pack_canonical(&p128, s);
2959
}
2960

2961
floatx80 float64_to_floatx80(float64 a, float_status *s)
2962
{
2963
    FloatParts64 p64;
2964
    FloatParts128 p128;
2965

2966
    float64_unpack_canonical(&p64, a, s);
2967
    parts_float_to_float_widen(&p128, &p64, s);
2968
    return floatx80_round_pack_canonical(&p128, s);
2969
}
2970

2971
floatx80 float128_to_floatx80(float128 a, float_status *s)
2972
{
2973
    FloatParts128 p;
2974

2975
    float128_unpack_canonical(&p, a, s);
2976
    parts_float_to_float(&p, s);
2977
    return floatx80_round_pack_canonical(&p, s);
2978
}
2979

2980
/*
2981
 * Round to integral value
2982
 */
2983

2984
float16 float16_round_to_int(float16 a, float_status *s)
2985
{
2986
    FloatParts64 p;
2987

2988
    float16_unpack_canonical(&p, a, s);
2989
    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2990
    return float16_round_pack_canonical(&p, s);
2991
}
2992

2993
float32 float32_round_to_int(float32 a, float_status *s)
2994
{
2995
    FloatParts64 p;
2996

2997
    float32_unpack_canonical(&p, a, s);
2998
    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2999
    return float32_round_pack_canonical(&p, s);
3000
}
3001

3002
float64 float64_round_to_int(float64 a, float_status *s)
3003
{
3004
    FloatParts64 p;
3005

3006
    float64_unpack_canonical(&p, a, s);
3007
    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3008
    return float64_round_pack_canonical(&p, s);
3009
}
3010

3011
bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3012
{
3013
    FloatParts64 p;
3014

3015
    bfloat16_unpack_canonical(&p, a, s);
3016
    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3017
    return bfloat16_round_pack_canonical(&p, s);
3018
}
3019

3020
float128 float128_round_to_int(float128 a, float_status *s)
3021
{
3022
    FloatParts128 p;
3023

3024
    float128_unpack_canonical(&p, a, s);
3025
    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3026
    return float128_round_pack_canonical(&p, s);
3027
}
3028

3029
floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3030
{
3031
    FloatParts128 p;
3032

3033
    if (!floatx80_unpack_canonical(&p, a, status)) {
3034
        return floatx80_default_nan(status);
3035
    }
3036

3037
    parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3038
                       &floatx80_params[status->floatx80_rounding_precision]);
3039
    return floatx80_round_pack_canonical(&p, status);
3040
}
3041

3042
/*
3043
 * Floating-point to signed integer conversions
3044
 */
3045

3046
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3047
                              float_status *s)
3048
{
3049
    FloatParts64 p;
3050

3051
    float16_unpack_canonical(&p, a, s);
3052
    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3053
}
3054

3055
int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3056
                                float_status *s)
3057
{
3058
    FloatParts64 p;
3059

3060
    float16_unpack_canonical(&p, a, s);
3061
    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3062
}
3063

3064
int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3065
                                float_status *s)
3066
{
3067
    FloatParts64 p;
3068

3069
    float16_unpack_canonical(&p, a, s);
3070
    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3071
}
3072

3073
int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3074
                                float_status *s)
3075
{
3076
    FloatParts64 p;
3077

3078
    float16_unpack_canonical(&p, a, s);
3079
    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3080
}
3081

3082
int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3083
                                float_status *s)
3084
{
3085
    FloatParts64 p;
3086

3087
    float32_unpack_canonical(&p, a, s);
3088
    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3089
}
3090

3091
int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3092
                                float_status *s)
3093
{
3094
    FloatParts64 p;
3095

3096
    float32_unpack_canonical(&p, a, s);
3097
    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3098
}
3099

3100
int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3101
                                float_status *s)
3102
{
3103
    FloatParts64 p;
3104

3105
    float32_unpack_canonical(&p, a, s);
3106
    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3107
}
3108

3109
int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3110
                                float_status *s)
3111
{
3112
    FloatParts64 p;
3113

3114
    float64_unpack_canonical(&p, a, s);
3115
    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3116
}
3117

3118
int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3119
                                float_status *s)
3120
{
3121
    FloatParts64 p;
3122

3123
    float64_unpack_canonical(&p, a, s);
3124
    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3125
}
3126

3127
int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3128
                                float_status *s)
3129
{
3130
    FloatParts64 p;
3131

3132
    float64_unpack_canonical(&p, a, s);
3133
    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3134
}
3135

3136
int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3137
                               float_status *s)
3138
{
3139
    FloatParts64 p;
3140

3141
    bfloat16_unpack_canonical(&p, a, s);
3142
    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3143
}
3144

3145
int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3146
                                 float_status *s)
3147
{
3148
    FloatParts64 p;
3149

3150
    bfloat16_unpack_canonical(&p, a, s);
3151
    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3152
}
3153

3154
int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3155
                                 float_status *s)
3156
{
3157
    FloatParts64 p;
3158

3159
    bfloat16_unpack_canonical(&p, a, s);
3160
    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3161
}
3162

3163
int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164
                                 float_status *s)
3165
{
3166
    FloatParts64 p;
3167

3168
    bfloat16_unpack_canonical(&p, a, s);
3169
    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3170
}
3171

3172
static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3173
                                        int scale, float_status *s)
3174
{
3175
    FloatParts128 p;
3176

3177
    float128_unpack_canonical(&p, a, s);
3178
    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3179
}
3180

3181
static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3182
                                        int scale, float_status *s)
3183
{
3184
    FloatParts128 p;
3185

3186
    float128_unpack_canonical(&p, a, s);
3187
    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3188
}
3189

3190
static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3191
                                        int scale, float_status *s)
3192
{
3193
    int flags = 0;
3194
    Int128 r;
3195
    FloatParts128 p;
3196

3197
    float128_unpack_canonical(&p, a, s);
3198

3199
    switch (p.cls) {
3200
    case float_class_snan:
3201
        flags |= float_flag_invalid_snan;
3202
        /* fall through */
3203
    case float_class_qnan:
3204
        flags |= float_flag_invalid;
3205
        r = UINT128_MAX;
3206
        break;
3207

3208
    case float_class_inf:
3209
        flags = float_flag_invalid | float_flag_invalid_cvti;
3210
        r = p.sign ? INT128_MIN : INT128_MAX;
3211
        break;
3212

3213
    case float_class_zero:
3214
        return int128_zero();
3215

3216
    case float_class_normal:
3217
        if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3218
            flags = float_flag_inexact;
3219
        }
3220

3221
        if (p.exp < 127) {
3222
            int shift = 127 - p.exp;
3223
            r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3224
            if (p.sign) {
3225
                r = int128_neg(r);
3226
            }
3227
        } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3228
                   p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3229
            r = INT128_MIN;
3230
        } else {
3231
            flags = float_flag_invalid | float_flag_invalid_cvti;
3232
            r = p.sign ? INT128_MIN : INT128_MAX;
3233
        }
3234
        break;
3235

3236
    default:
3237
        g_assert_not_reached();
3238
    }
3239

3240
    float_raise(flags, s);
3241
    return r;
3242
}
3243

3244
static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3245
                                        int scale, float_status *s)
3246
{
3247
    FloatParts128 p;
3248

3249
    if (!floatx80_unpack_canonical(&p, a, s)) {
3250
        parts_default_nan(&p, s);
3251
    }
3252
    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3253
}
3254

3255
static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3256
                                        int scale, float_status *s)
3257
{
3258
    FloatParts128 p;
3259

3260
    if (!floatx80_unpack_canonical(&p, a, s)) {
3261
        parts_default_nan(&p, s);
3262
    }
3263
    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264
}
3265

3266
int8_t float16_to_int8(float16 a, float_status *s)
3267
{
3268
    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3269
}
3270

3271
int16_t float16_to_int16(float16 a, float_status *s)
3272
{
3273
    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3274
}
3275

3276
int32_t float16_to_int32(float16 a, float_status *s)
3277
{
3278
    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3279
}
3280

3281
int64_t float16_to_int64(float16 a, float_status *s)
3282
{
3283
    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3284
}
3285

3286
int16_t float32_to_int16(float32 a, float_status *s)
3287
{
3288
    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3289
}
3290

3291
int32_t float32_to_int32(float32 a, float_status *s)
3292
{
3293
    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3294
}
3295

3296
int64_t float32_to_int64(float32 a, float_status *s)
3297
{
3298
    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3299
}
3300

3301
int16_t float64_to_int16(float64 a, float_status *s)
3302
{
3303
    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3304
}
3305

3306
int32_t float64_to_int32(float64 a, float_status *s)
3307
{
3308
    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3309
}
3310

3311
int64_t float64_to_int64(float64 a, float_status *s)
3312
{
3313
    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3314
}
3315

3316
int32_t float128_to_int32(float128 a, float_status *s)
3317
{
3318
    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3319
}
3320

3321
int64_t float128_to_int64(float128 a, float_status *s)
3322
{
3323
    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3324
}
3325

3326
Int128 float128_to_int128(float128 a, float_status *s)
3327
{
3328
    return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3329
}
3330

3331
int32_t floatx80_to_int32(floatx80 a, float_status *s)
3332
{
3333
    return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3334
}
3335

3336
int64_t floatx80_to_int64(floatx80 a, float_status *s)
3337
{
3338
    return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3339
}
3340

3341
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3342
{
3343
    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3344
}
3345

3346
int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3347
{
3348
    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3349
}
3350

3351
int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3352
{
3353
    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3354
}
3355

3356
int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3357
{
3358
    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3359
}
3360

3361
int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3362
{
3363
    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3364
}
3365

3366
int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3367
{
3368
    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3369
}
3370

3371
int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3372
{
3373
    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3374
}
3375

3376
int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3377
{
3378
    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3379
}
3380

3381
int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3382
{
3383
    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3384
}
3385

3386
int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3387
{
3388
    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3389
}
3390

3391
int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3392
{
3393
    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3394
}
3395

3396
Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3397
{
3398
    return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3399
}
3400

3401
int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3402
{
3403
    return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3404
}
3405

3406
int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3407
{
3408
    return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3409
}
3410

3411
int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3412
{
3413
    return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3414
}
3415

3416
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3417
{
3418
    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3419
}
3420

3421
int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3422
{
3423
    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3424
}
3425

3426
int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3427
{
3428
    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3429
}
3430

3431
int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3432
{
3433
    return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3434
}
3435

3436
int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3437
{
3438
    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3439
}
3440

3441
int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3442
{
3443
    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3444
}
3445

3446
int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3447
{
3448
    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3449
}
3450

3451
int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3452
                                float_status *s)
3453
{
3454
    FloatParts64 p;
3455

3456
    float64_unpack_canonical(&p, a, s);
3457
    return parts_float_to_sint_modulo(&p, rmode, 31, s);
3458
}
3459

3460
int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3461
                                float_status *s)
3462
{
3463
    FloatParts64 p;
3464

3465
    float64_unpack_canonical(&p, a, s);
3466
    return parts_float_to_sint_modulo(&p, rmode, 63, s);
3467
}
3468

3469
/*
3470
 * Floating-point to unsigned integer conversions
3471
 */
3472

3473
uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3474
                                float_status *s)
3475
{
3476
    FloatParts64 p;
3477

3478
    float16_unpack_canonical(&p, a, s);
3479
    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3480
}
3481

3482
uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3483
                                  float_status *s)
3484
{
3485
    FloatParts64 p;
3486

3487
    float16_unpack_canonical(&p, a, s);
3488
    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3489
}
3490

3491
uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3492
                                  float_status *s)
3493
{
3494
    FloatParts64 p;
3495

3496
    float16_unpack_canonical(&p, a, s);
3497
    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3498
}
3499

3500
uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3501
                                  float_status *s)
3502
{
3503
    FloatParts64 p;
3504

3505
    float16_unpack_canonical(&p, a, s);
3506
    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3507
}
3508

3509
uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3510
                                  float_status *s)
3511
{
3512
    FloatParts64 p;
3513

3514
    float32_unpack_canonical(&p, a, s);
3515
    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3516
}
3517

3518
uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3519
                                  float_status *s)
3520
{
3521
    FloatParts64 p;
3522

3523
    float32_unpack_canonical(&p, a, s);
3524
    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3525
}
3526

3527
uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3528
                                  float_status *s)
3529
{
3530
    FloatParts64 p;
3531

3532
    float32_unpack_canonical(&p, a, s);
3533
    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3534
}
3535

3536
uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3537
                                  float_status *s)
3538
{
3539
    FloatParts64 p;
3540

3541
    float64_unpack_canonical(&p, a, s);
3542
    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3543
}
3544

3545
uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3546
                                  float_status *s)
3547
{
3548
    FloatParts64 p;
3549

3550
    float64_unpack_canonical(&p, a, s);
3551
    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3552
}
3553

3554
uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3555
                                  float_status *s)
3556
{
3557
    FloatParts64 p;
3558

3559
    float64_unpack_canonical(&p, a, s);
3560
    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3561
}
3562

3563
uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3564
                                 int scale, float_status *s)
3565
{
3566
    FloatParts64 p;
3567

3568
    bfloat16_unpack_canonical(&p, a, s);
3569
    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3570
}
3571

3572
uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3573
                                   int scale, float_status *s)
3574
{
3575
    FloatParts64 p;
3576

3577
    bfloat16_unpack_canonical(&p, a, s);
3578
    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3579
}
3580

3581
uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3582
                                   int scale, float_status *s)
3583
{
3584
    FloatParts64 p;
3585

3586
    bfloat16_unpack_canonical(&p, a, s);
3587
    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3588
}
3589

3590
uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3591
                                   int scale, float_status *s)
3592
{
3593
    FloatParts64 p;
3594

3595
    bfloat16_unpack_canonical(&p, a, s);
3596
    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3597
}
3598

3599
static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3600
                                          int scale, float_status *s)
3601
{
3602
    FloatParts128 p;
3603

3604
    float128_unpack_canonical(&p, a, s);
3605
    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3606
}
3607

3608
static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3609
                                          int scale, float_status *s)
3610
{
3611
    FloatParts128 p;
3612

3613
    float128_unpack_canonical(&p, a, s);
3614
    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3615
}
3616

3617
static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3618
                                         int scale, float_status *s)
3619
{
3620
    int flags = 0;
3621
    Int128 r;
3622
    FloatParts128 p;
3623

3624
    float128_unpack_canonical(&p, a, s);
3625

3626
    switch (p.cls) {
3627
    case float_class_snan:
3628
        flags |= float_flag_invalid_snan;
3629
        /* fall through */
3630
    case float_class_qnan:
3631
        flags |= float_flag_invalid;
3632
        r = UINT128_MAX;
3633
        break;
3634

3635
    case float_class_inf:
3636
        flags = float_flag_invalid | float_flag_invalid_cvti;
3637
        r = p.sign ? int128_zero() : UINT128_MAX;
3638
        break;
3639

3640
    case float_class_zero:
3641
        return int128_zero();
3642

3643
    case float_class_normal:
3644
        if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3645
            flags = float_flag_inexact;
3646
            if (p.cls == float_class_zero) {
3647
                r = int128_zero();
3648
                break;
3649
            }
3650
        }
3651

3652
        if (p.sign) {
3653
            flags = float_flag_invalid | float_flag_invalid_cvti;
3654
            r = int128_zero();
3655
        } else if (p.exp <= 127) {
3656
            int shift = 127 - p.exp;
3657
            r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3658
        } else {
3659
            flags = float_flag_invalid | float_flag_invalid_cvti;
3660
            r = UINT128_MAX;
3661
        }
3662
        break;
3663

3664
    default:
3665
        g_assert_not_reached();
3666
    }
3667

3668
    float_raise(flags, s);
3669
    return r;
3670
}
3671

3672
uint8_t float16_to_uint8(float16 a, float_status *s)
3673
{
3674
    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3675
}
3676

3677
uint16_t float16_to_uint16(float16 a, float_status *s)
3678
{
3679
    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3680
}
3681

3682
uint32_t float16_to_uint32(float16 a, float_status *s)
3683
{
3684
    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3685
}
3686

3687
uint64_t float16_to_uint64(float16 a, float_status *s)
3688
{
3689
    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3690
}
3691

3692
uint16_t float32_to_uint16(float32 a, float_status *s)
3693
{
3694
    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3695
}
3696

3697
uint32_t float32_to_uint32(float32 a, float_status *s)
3698
{
3699
    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3700
}
3701

3702
uint64_t float32_to_uint64(float32 a, float_status *s)
3703
{
3704
    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3705
}
3706

3707
uint16_t float64_to_uint16(float64 a, float_status *s)
3708
{
3709
    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3710
}
3711

3712
uint32_t float64_to_uint32(float64 a, float_status *s)
3713
{
3714
    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3715
}
3716

3717
uint64_t float64_to_uint64(float64 a, float_status *s)
3718
{
3719
    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3720
}
3721

3722
uint32_t float128_to_uint32(float128 a, float_status *s)
3723
{
3724
    return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3725
}
3726

3727
uint64_t float128_to_uint64(float128 a, float_status *s)
3728
{
3729
    return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3730
}
3731

3732
Int128 float128_to_uint128(float128 a, float_status *s)
3733
{
3734
    return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3735
}
3736

3737
uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3738
{
3739
    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3740
}
3741

3742
uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3743
{
3744
    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3745
}
3746

3747
uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3748
{
3749
    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3750
}
3751

3752
uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3753
{
3754
    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3755
}
3756

3757
uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3758
{
3759
    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3760
}
3761

3762
uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3763
{
3764
    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3765
}
3766

3767
uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3768
{
3769
    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3770
}
3771

3772
uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3773
{
3774
    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3775
}
3776

3777
uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3778
{
3779
    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3780
}
3781

3782
uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3783
{
3784
    return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3785
}
3786

3787
uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3788
{
3789
    return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3790
}
3791

3792
Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3793
{
3794
    return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3795
}
3796

3797
uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3798
{
3799
    return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3800
}
3801

3802
uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3803
{
3804
    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3805
}
3806

3807
uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3808
{
3809
    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3810
}
3811

3812
uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3813
{
3814
    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3815
}
3816

3817
uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3818
{
3819
    return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3820
}
3821

3822
uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3823
{
3824
    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3825
}
3826

3827
uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3828
{
3829
    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3830
}
3831

3832
uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3833
{
3834
    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3835
}
3836

3837
/*
3838
 * Signed integer to floating-point conversions
3839
 */
3840

3841
float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3842
{
3843
    FloatParts64 p;
3844

3845
    parts_sint_to_float(&p, a, scale, status);
3846
    return float16_round_pack_canonical(&p, status);
3847
}
3848

3849
float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3850
{
3851
    return int64_to_float16_scalbn(a, scale, status);
3852
}
3853

3854
float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3855
{
3856
    return int64_to_float16_scalbn(a, scale, status);
3857
}
3858

3859
float16 int64_to_float16(int64_t a, float_status *status)
3860
{
3861
    return int64_to_float16_scalbn(a, 0, status);
3862
}
3863

3864
float16 int32_to_float16(int32_t a, float_status *status)
3865
{
3866
    return int64_to_float16_scalbn(a, 0, status);
3867
}
3868

3869
float16 int16_to_float16(int16_t a, float_status *status)
3870
{
3871
    return int64_to_float16_scalbn(a, 0, status);
3872
}
3873

3874
float16 int8_to_float16(int8_t a, float_status *status)
3875
{
3876
    return int64_to_float16_scalbn(a, 0, status);
3877
}
3878

3879
float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3880
{
3881
    FloatParts64 p;
3882

3883
    /* Without scaling, there are no overflow concerns. */
3884
    if (likely(scale == 0) && can_use_fpu(status)) {
3885
        union_float32 ur;
3886
        ur.h = a;
3887
        return ur.s;
3888
    }
3889

3890
    parts64_sint_to_float(&p, a, scale, status);
3891
    return float32_round_pack_canonical(&p, status);
3892
}
3893

3894
float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3895
{
3896
    return int64_to_float32_scalbn(a, scale, status);
3897
}
3898

3899
float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3900
{
3901
    return int64_to_float32_scalbn(a, scale, status);
3902
}
3903

3904
float32 int64_to_float32(int64_t a, float_status *status)
3905
{
3906
    return int64_to_float32_scalbn(a, 0, status);
3907
}
3908

3909
float32 int32_to_float32(int32_t a, float_status *status)
3910
{
3911
    return int64_to_float32_scalbn(a, 0, status);
3912
}
3913

3914
float32 int16_to_float32(int16_t a, float_status *status)
3915
{
3916
    return int64_to_float32_scalbn(a, 0, status);
3917
}
3918

3919
float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3920
{
3921
    FloatParts64 p;
3922

3923
    /* Without scaling, there are no overflow concerns. */
3924
    if (likely(scale == 0) && can_use_fpu(status)) {
3925
        union_float64 ur;
3926
        ur.h = a;
3927
        return ur.s;
3928
    }
3929

3930
    parts_sint_to_float(&p, a, scale, status);
3931
    return float64_round_pack_canonical(&p, status);
3932
}
3933

3934
float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3935
{
3936
    return int64_to_float64_scalbn(a, scale, status);
3937
}
3938

3939
float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3940
{
3941
    return int64_to_float64_scalbn(a, scale, status);
3942
}
3943

3944
float64 int64_to_float64(int64_t a, float_status *status)
3945
{
3946
    return int64_to_float64_scalbn(a, 0, status);
3947
}
3948

3949
float64 int32_to_float64(int32_t a, float_status *status)
3950
{
3951
    return int64_to_float64_scalbn(a, 0, status);
3952
}
3953

3954
float64 int16_to_float64(int16_t a, float_status *status)
3955
{
3956
    return int64_to_float64_scalbn(a, 0, status);
3957
}
3958

3959
bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3960
{
3961
    FloatParts64 p;
3962

3963
    parts_sint_to_float(&p, a, scale, status);
3964
    return bfloat16_round_pack_canonical(&p, status);
3965
}
3966

3967
bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3968
{
3969
    return int64_to_bfloat16_scalbn(a, scale, status);
3970
}
3971

3972
bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3973
{
3974
    return int64_to_bfloat16_scalbn(a, scale, status);
3975
}
3976

3977
bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3978
{
3979
    return int64_to_bfloat16_scalbn(a, scale, status);
3980
}
3981

3982
bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3983
{
3984
    return int64_to_bfloat16_scalbn(a, 0, status);
3985
}
3986

3987
bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3988
{
3989
    return int64_to_bfloat16_scalbn(a, 0, status);
3990
}
3991

3992
bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3993
{
3994
    return int64_to_bfloat16_scalbn(a, 0, status);
3995
}
3996

3997
bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3998
{
3999
    return int64_to_bfloat16_scalbn(a, 0, status);
4000
}
4001

4002
float128 int128_to_float128(Int128 a, float_status *status)
4003
{
4004
    FloatParts128 p = { };
4005
    int shift;
4006

4007
    if (int128_nz(a)) {
4008
        p.cls = float_class_normal;
4009
        if (!int128_nonneg(a)) {
4010
            p.sign = true;
4011
            a = int128_neg(a);
4012
        }
4013

4014
        shift = clz64(int128_gethi(a));
4015
        if (shift == 64) {
4016
            shift += clz64(int128_getlo(a));
4017
        }
4018

4019
        p.exp = 127 - shift;
4020
        a = int128_lshift(a, shift);
4021

4022
        p.frac_hi = int128_gethi(a);
4023
        p.frac_lo = int128_getlo(a);
4024
    } else {
4025
        p.cls = float_class_zero;
4026
    }
4027

4028
    return float128_round_pack_canonical(&p, status);
4029
}
4030

4031
float128 int64_to_float128(int64_t a, float_status *status)
4032
{
4033
    FloatParts128 p;
4034

4035
    parts_sint_to_float(&p, a, 0, status);
4036
    return float128_round_pack_canonical(&p, status);
4037
}
4038

4039
float128 int32_to_float128(int32_t a, float_status *status)
4040
{
4041
    return int64_to_float128(a, status);
4042
}
4043

4044
floatx80 int64_to_floatx80(int64_t a, float_status *status)
4045
{
4046
    FloatParts128 p;
4047

4048
    parts_sint_to_float(&p, a, 0, status);
4049
    return floatx80_round_pack_canonical(&p, status);
4050
}
4051

4052
floatx80 int32_to_floatx80(int32_t a, float_status *status)
4053
{
4054
    return int64_to_floatx80(a, status);
4055
}
4056

4057
/*
4058
 * Unsigned Integer to floating-point conversions
4059
 */
4060

4061
float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4062
{
4063
    FloatParts64 p;
4064

4065
    parts_uint_to_float(&p, a, scale, status);
4066
    return float16_round_pack_canonical(&p, status);
4067
}
4068

4069
float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4070
{
4071
    return uint64_to_float16_scalbn(a, scale, status);
4072
}
4073

4074
float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4075
{
4076
    return uint64_to_float16_scalbn(a, scale, status);
4077
}
4078

4079
float16 uint64_to_float16(uint64_t a, float_status *status)
4080
{
4081
    return uint64_to_float16_scalbn(a, 0, status);
4082
}
4083

4084
float16 uint32_to_float16(uint32_t a, float_status *status)
4085
{
4086
    return uint64_to_float16_scalbn(a, 0, status);
4087
}
4088

4089
float16 uint16_to_float16(uint16_t a, float_status *status)
4090
{
4091
    return uint64_to_float16_scalbn(a, 0, status);
4092
}
4093

4094
float16 uint8_to_float16(uint8_t a, float_status *status)
4095
{
4096
    return uint64_to_float16_scalbn(a, 0, status);
4097
}
4098

4099
float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4100
{
4101
    FloatParts64 p;
4102

4103
    /* Without scaling, there are no overflow concerns. */
4104
    if (likely(scale == 0) && can_use_fpu(status)) {
4105
        union_float32 ur;
4106
        ur.h = a;
4107
        return ur.s;
4108
    }
4109

4110
    parts_uint_to_float(&p, a, scale, status);
4111
    return float32_round_pack_canonical(&p, status);
4112
}
4113

4114
float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4115
{
4116
    return uint64_to_float32_scalbn(a, scale, status);
4117
}
4118

4119
float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4120
{
4121
    return uint64_to_float32_scalbn(a, scale, status);
4122
}
4123

4124
float32 uint64_to_float32(uint64_t a, float_status *status)
4125
{
4126
    return uint64_to_float32_scalbn(a, 0, status);
4127
}
4128

4129
float32 uint32_to_float32(uint32_t a, float_status *status)
4130
{
4131
    return uint64_to_float32_scalbn(a, 0, status);
4132
}
4133

4134
float32 uint16_to_float32(uint16_t a, float_status *status)
4135
{
4136
    return uint64_to_float32_scalbn(a, 0, status);
4137
}
4138

4139
float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4140
{
4141
    FloatParts64 p;
4142

4143
    /* Without scaling, there are no overflow concerns. */
4144
    if (likely(scale == 0) && can_use_fpu(status)) {
4145
        union_float64 ur;
4146
        ur.h = a;
4147
        return ur.s;
4148
    }
4149

4150
    parts_uint_to_float(&p, a, scale, status);
4151
    return float64_round_pack_canonical(&p, status);
4152
}
4153

4154
float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4155
{
4156
    return uint64_to_float64_scalbn(a, scale, status);
4157
}
4158

4159
float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4160
{
4161
    return uint64_to_float64_scalbn(a, scale, status);
4162
}
4163

4164
float64 uint64_to_float64(uint64_t a, float_status *status)
4165
{
4166
    return uint64_to_float64_scalbn(a, 0, status);
4167
}
4168

4169
float64 uint32_to_float64(uint32_t a, float_status *status)
4170
{
4171
    return uint64_to_float64_scalbn(a, 0, status);
4172
}
4173

4174
float64 uint16_to_float64(uint16_t a, float_status *status)
4175
{
4176
    return uint64_to_float64_scalbn(a, 0, status);
4177
}
4178

4179
bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4180
{
4181
    FloatParts64 p;
4182

4183
    parts_uint_to_float(&p, a, scale, status);
4184
    return bfloat16_round_pack_canonical(&p, status);
4185
}
4186

4187
bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4188
{
4189
    return uint64_to_bfloat16_scalbn(a, scale, status);
4190
}
4191

4192
bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4193
{
4194
    return uint64_to_bfloat16_scalbn(a, scale, status);
4195
}
4196

4197
bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4198
{
4199
    return uint64_to_bfloat16_scalbn(a, scale, status);
4200
}
4201

4202
bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4203
{
4204
    return uint64_to_bfloat16_scalbn(a, 0, status);
4205
}
4206

4207
bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4208
{
4209
    return uint64_to_bfloat16_scalbn(a, 0, status);
4210
}
4211

4212
bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4213
{
4214
    return uint64_to_bfloat16_scalbn(a, 0, status);
4215
}
4216

4217
bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4218
{
4219
    return uint64_to_bfloat16_scalbn(a, 0, status);
4220
}
4221

4222
float128 uint64_to_float128(uint64_t a, float_status *status)
4223
{
4224
    FloatParts128 p;
4225

4226
    parts_uint_to_float(&p, a, 0, status);
4227
    return float128_round_pack_canonical(&p, status);
4228
}
4229

4230
float128 uint128_to_float128(Int128 a, float_status *status)
4231
{
4232
    FloatParts128 p = { };
4233
    int shift;
4234

4235
    if (int128_nz(a)) {
4236
        p.cls = float_class_normal;
4237

4238
        shift = clz64(int128_gethi(a));
4239
        if (shift == 64) {
4240
            shift += clz64(int128_getlo(a));
4241
        }
4242

4243
        p.exp = 127 - shift;
4244
        a = int128_lshift(a, shift);
4245

4246
        p.frac_hi = int128_gethi(a);
4247
        p.frac_lo = int128_getlo(a);
4248
    } else {
4249
        p.cls = float_class_zero;
4250
    }
4251

4252
    return float128_round_pack_canonical(&p, status);
4253
}
4254

4255
/*
4256
 * Minimum and maximum
4257
 */
4258

4259
static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4260
{
4261
    FloatParts64 pa, pb, *pr;
4262

4263
    float16_unpack_canonical(&pa, a, s);
4264
    float16_unpack_canonical(&pb, b, s);
4265
    pr = parts_minmax(&pa, &pb, s, flags);
4266

4267
    return float16_round_pack_canonical(pr, s);
4268
}
4269

4270
static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4271
                                float_status *s, int flags)
4272
{
4273
    FloatParts64 pa, pb, *pr;
4274

4275
    bfloat16_unpack_canonical(&pa, a, s);
4276
    bfloat16_unpack_canonical(&pb, b, s);
4277
    pr = parts_minmax(&pa, &pb, s, flags);
4278

4279
    return bfloat16_round_pack_canonical(pr, s);
4280
}
4281

4282
static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4283
{
4284
    FloatParts64 pa, pb, *pr;
4285

4286
    float32_unpack_canonical(&pa, a, s);
4287
    float32_unpack_canonical(&pb, b, s);
4288
    pr = parts_minmax(&pa, &pb, s, flags);
4289

4290
    return float32_round_pack_canonical(pr, s);
4291
}
4292

4293
static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4294
{
4295
    FloatParts64 pa, pb, *pr;
4296

4297
    float64_unpack_canonical(&pa, a, s);
4298
    float64_unpack_canonical(&pb, b, s);
4299
    pr = parts_minmax(&pa, &pb, s, flags);
4300

4301
    return float64_round_pack_canonical(pr, s);
4302
}
4303

4304
static float128 float128_minmax(float128 a, float128 b,
4305
                                float_status *s, int flags)
4306
{
4307
    FloatParts128 pa, pb, *pr;
4308

4309
    float128_unpack_canonical(&pa, a, s);
4310
    float128_unpack_canonical(&pb, b, s);
4311
    pr = parts_minmax(&pa, &pb, s, flags);
4312

4313
    return float128_round_pack_canonical(pr, s);
4314
}
4315

4316
#define MINMAX_1(type, name, flags) \
4317
    type type##_##name(type a, type b, float_status *s) \
4318
    { return type##_minmax(a, b, s, flags); }
4319

4320
#define MINMAX_2(type) \
4321
    MINMAX_1(type, max, 0)                                                \
4322
    MINMAX_1(type, maxnum, minmax_isnum)                                  \
4323
    MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4324
    MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4325
    MINMAX_1(type, min, minmax_ismin)                                     \
4326
    MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4327
    MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4328
    MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4329
4330
MINMAX_2(float16)
4331
MINMAX_2(bfloat16)
4332
MINMAX_2(float32)
4333
MINMAX_2(float64)
4334
MINMAX_2(float128)
4335

4336
#undef MINMAX_1
4337
#undef MINMAX_2
4338

4339
/*
4340
 * Floating point compare
4341
 */
4342

4343
static FloatRelation QEMU_FLATTEN
4344
float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4345
{
4346
    FloatParts64 pa, pb;
4347

4348
    float16_unpack_canonical(&pa, a, s);
4349
    float16_unpack_canonical(&pb, b, s);
4350
    return parts_compare(&pa, &pb, s, is_quiet);
4351
}
4352

4353
FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4354
{
4355
    return float16_do_compare(a, b, s, false);
4356
}
4357

4358
FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4359
{
4360
    return float16_do_compare(a, b, s, true);
4361
}
4362

4363
static FloatRelation QEMU_SOFTFLOAT_ATTR
4364
float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4365
{
4366
    FloatParts64 pa, pb;
4367

4368
    float32_unpack_canonical(&pa, a, s);
4369
    float32_unpack_canonical(&pb, b, s);
4370
    return parts_compare(&pa, &pb, s, is_quiet);
4371
}
4372

4373
static FloatRelation QEMU_FLATTEN
4374
float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4375
{
4376
    union_float32 ua, ub;
4377

4378
    ua.s = xa;
4379
    ub.s = xb;
4380

4381
    if (QEMU_NO_HARDFLOAT) {
4382
        goto soft;
4383
    }
4384

4385
    float32_input_flush2(&ua.s, &ub.s, s);
4386
    if (isgreaterequal(ua.h, ub.h)) {
4387
        if (isgreater(ua.h, ub.h)) {
4388
            return float_relation_greater;
4389
        }
4390
        return float_relation_equal;
4391
    }
4392
    if (likely(isless(ua.h, ub.h))) {
4393
        return float_relation_less;
4394
    }
4395
    /*
4396
     * The only condition remaining is unordered.
4397
     * Fall through to set flags.
4398
     */
4399
 soft:
4400
    return float32_do_compare(ua.s, ub.s, s, is_quiet);
4401
}
4402

4403
FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4404
{
4405
    return float32_hs_compare(a, b, s, false);
4406
}
4407

4408
FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4409
{
4410
    return float32_hs_compare(a, b, s, true);
4411
}
4412

4413
static FloatRelation QEMU_SOFTFLOAT_ATTR
4414
float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4415
{
4416
    FloatParts64 pa, pb;
4417

4418
    float64_unpack_canonical(&pa, a, s);
4419
    float64_unpack_canonical(&pb, b, s);
4420
    return parts_compare(&pa, &pb, s, is_quiet);
4421
}
4422

4423
static FloatRelation QEMU_FLATTEN
4424
float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4425
{
4426
    union_float64 ua, ub;
4427

4428
    ua.s = xa;
4429
    ub.s = xb;
4430

4431
    if (QEMU_NO_HARDFLOAT) {
4432
        goto soft;
4433
    }
4434

4435
    float64_input_flush2(&ua.s, &ub.s, s);
4436
    if (isgreaterequal(ua.h, ub.h)) {
4437
        if (isgreater(ua.h, ub.h)) {
4438
            return float_relation_greater;
4439
        }
4440
        return float_relation_equal;
4441
    }
4442
    if (likely(isless(ua.h, ub.h))) {
4443
        return float_relation_less;
4444
    }
4445
    /*
4446
     * The only condition remaining is unordered.
4447
     * Fall through to set flags.
4448
     */
4449
 soft:
4450
    return float64_do_compare(ua.s, ub.s, s, is_quiet);
4451
}
4452

4453
FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4454
{
4455
    return float64_hs_compare(a, b, s, false);
4456
}
4457

4458
FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4459
{
4460
    return float64_hs_compare(a, b, s, true);
4461
}
4462

4463
static FloatRelation QEMU_FLATTEN
4464
bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4465
{
4466
    FloatParts64 pa, pb;
4467

4468
    bfloat16_unpack_canonical(&pa, a, s);
4469
    bfloat16_unpack_canonical(&pb, b, s);
4470
    return parts_compare(&pa, &pb, s, is_quiet);
4471
}
4472

4473
FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4474
{
4475
    return bfloat16_do_compare(a, b, s, false);
4476
}
4477

4478
FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4479
{
4480
    return bfloat16_do_compare(a, b, s, true);
4481
}
4482

4483
static FloatRelation QEMU_FLATTEN
4484
float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4485
{
4486
    FloatParts128 pa, pb;
4487

4488
    float128_unpack_canonical(&pa, a, s);
4489
    float128_unpack_canonical(&pb, b, s);
4490
    return parts_compare(&pa, &pb, s, is_quiet);
4491
}
4492

4493
FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4494
{
4495
    return float128_do_compare(a, b, s, false);
4496
}
4497

4498
FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4499
{
4500
    return float128_do_compare(a, b, s, true);
4501
}
4502

4503
static FloatRelation QEMU_FLATTEN
4504
floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4505
{
4506
    FloatParts128 pa, pb;
4507

4508
    if (!floatx80_unpack_canonical(&pa, a, s) ||
4509
        !floatx80_unpack_canonical(&pb, b, s)) {
4510
        return float_relation_unordered;
4511
    }
4512
    return parts_compare(&pa, &pb, s, is_quiet);
4513
}
4514

4515
FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4516
{
4517
    return floatx80_do_compare(a, b, s, false);
4518
}
4519

4520
FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4521
{
4522
    return floatx80_do_compare(a, b, s, true);
4523
}
4524

4525
/*
4526
 * Scale by 2**N
4527
 */
4528

4529
float16 float16_scalbn(float16 a, int n, float_status *status)
4530
{
4531
    FloatParts64 p;
4532

4533
    float16_unpack_canonical(&p, a, status);
4534
    parts_scalbn(&p, n, status);
4535
    return float16_round_pack_canonical(&p, status);
4536
}
4537

4538
float32 float32_scalbn(float32 a, int n, float_status *status)
4539
{
4540
    FloatParts64 p;
4541

4542
    float32_unpack_canonical(&p, a, status);
4543
    parts_scalbn(&p, n, status);
4544
    return float32_round_pack_canonical(&p, status);
4545
}
4546

4547
float64 float64_scalbn(float64 a, int n, float_status *status)
4548
{
4549
    FloatParts64 p;
4550

4551
    float64_unpack_canonical(&p, a, status);
4552
    parts_scalbn(&p, n, status);
4553
    return float64_round_pack_canonical(&p, status);
4554
}
4555

4556
bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4557
{
4558
    FloatParts64 p;
4559

4560
    bfloat16_unpack_canonical(&p, a, status);
4561
    parts_scalbn(&p, n, status);
4562
    return bfloat16_round_pack_canonical(&p, status);
4563
}
4564

4565
float128 float128_scalbn(float128 a, int n, float_status *status)
4566
{
4567
    FloatParts128 p;
4568

4569
    float128_unpack_canonical(&p, a, status);
4570
    parts_scalbn(&p, n, status);
4571
    return float128_round_pack_canonical(&p, status);
4572
}
4573

4574
floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4575
{
4576
    FloatParts128 p;
4577

4578
    if (!floatx80_unpack_canonical(&p, a, status)) {
4579
        return floatx80_default_nan(status);
4580
    }
4581
    parts_scalbn(&p, n, status);
4582
    return floatx80_round_pack_canonical(&p, status);
4583
}
4584

4585
/*
4586
 * Square Root
4587
 */
4588

4589
float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4590
{
4591
    FloatParts64 p;
4592

4593
    float16_unpack_canonical(&p, a, status);
4594
    parts_sqrt(&p, status, &float16_params);
4595
    return float16_round_pack_canonical(&p, status);
4596
}
4597

4598
static float32 QEMU_SOFTFLOAT_ATTR
4599
soft_f32_sqrt(float32 a, float_status *status)
4600
{
4601
    FloatParts64 p;
4602

4603
    float32_unpack_canonical(&p, a, status);
4604
    parts_sqrt(&p, status, &float32_params);
4605
    return float32_round_pack_canonical(&p, status);
4606
}
4607

4608
static float64 QEMU_SOFTFLOAT_ATTR
4609
soft_f64_sqrt(float64 a, float_status *status)
4610
{
4611
    FloatParts64 p;
4612

4613
    float64_unpack_canonical(&p, a, status);
4614
    parts_sqrt(&p, status, &float64_params);
4615
    return float64_round_pack_canonical(&p, status);
4616
}
4617

4618
float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4619
{
4620
    union_float32 ua, ur;
4621

4622
    ua.s = xa;
4623
    if (unlikely(!can_use_fpu(s))) {
4624
        goto soft;
4625
    }
4626

4627
    float32_input_flush1(&ua.s, s);
4628
    if (QEMU_HARDFLOAT_1F32_USE_FP) {
4629
        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4630
                       fpclassify(ua.h) == FP_ZERO) ||
4631
                     signbit(ua.h))) {
4632
            goto soft;
4633
        }
4634
    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4635
                        float32_is_neg(ua.s))) {
4636
        goto soft;
4637
    }
4638
    ur.h = sqrtf(ua.h);
4639
    return ur.s;
4640

4641
 soft:
4642
    return soft_f32_sqrt(ua.s, s);
4643
}
4644

4645
float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4646
{
4647
    union_float64 ua, ur;
4648

4649
    ua.s = xa;
4650
    if (unlikely(!can_use_fpu(s))) {
4651
        goto soft;
4652
    }
4653

4654
    float64_input_flush1(&ua.s, s);
4655
    if (QEMU_HARDFLOAT_1F64_USE_FP) {
4656
        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4657
                       fpclassify(ua.h) == FP_ZERO) ||
4658
                     signbit(ua.h))) {
4659
            goto soft;
4660
        }
4661
    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4662
                        float64_is_neg(ua.s))) {
4663
        goto soft;
4664
    }
4665
    ur.h = sqrt(ua.h);
4666
    return ur.s;
4667

4668
 soft:
4669
    return soft_f64_sqrt(ua.s, s);
4670
}
4671

4672
float64 float64r32_sqrt(float64 a, float_status *status)
4673
{
4674
    FloatParts64 p;
4675

4676
    float64_unpack_canonical(&p, a, status);
4677
    parts_sqrt(&p, status, &float64_params);
4678
    return float64r32_round_pack_canonical(&p, status);
4679
}
4680

4681
bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4682
{
4683
    FloatParts64 p;
4684

4685
    bfloat16_unpack_canonical(&p, a, status);
4686
    parts_sqrt(&p, status, &bfloat16_params);
4687
    return bfloat16_round_pack_canonical(&p, status);
4688
}
4689

4690
float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4691
{
4692
    FloatParts128 p;
4693

4694
    float128_unpack_canonical(&p, a, status);
4695
    parts_sqrt(&p, status, &float128_params);
4696
    return float128_round_pack_canonical(&p, status);
4697
}
4698

4699
floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4700
{
4701
    FloatParts128 p;
4702

4703
    if (!floatx80_unpack_canonical(&p, a, s)) {
4704
        return floatx80_default_nan(s);
4705
    }
4706
    parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4707
    return floatx80_round_pack_canonical(&p, s);
4708
}
4709

4710
/*
4711
 * log2
4712
 */
4713
float32 float32_log2(float32 a, float_status *status)
4714
{
4715
    FloatParts64 p;
4716

4717
    float32_unpack_canonical(&p, a, status);
4718
    parts_log2(&p, status, &float32_params);
4719
    return float32_round_pack_canonical(&p, status);
4720
}
4721

4722
float64 float64_log2(float64 a, float_status *status)
4723
{
4724
    FloatParts64 p;
4725

4726
    float64_unpack_canonical(&p, a, status);
4727
    parts_log2(&p, status, &float64_params);
4728
    return float64_round_pack_canonical(&p, status);
4729
}
4730

4731
/*----------------------------------------------------------------------------
4732
| The pattern for a default generated NaN.
4733
*----------------------------------------------------------------------------*/
4734

4735
float16 float16_default_nan(float_status *status)
4736
{
4737
    FloatParts64 p;
4738

4739
    parts_default_nan(&p, status);
4740
    p.frac >>= float16_params.frac_shift;
4741
    return float16_pack_raw(&p);
4742
}
4743

4744
float32 float32_default_nan(float_status *status)
4745
{
4746
    FloatParts64 p;
4747

4748
    parts_default_nan(&p, status);
4749
    p.frac >>= float32_params.frac_shift;
4750
    return float32_pack_raw(&p);
4751
}
4752

4753
float64 float64_default_nan(float_status *status)
4754
{
4755
    FloatParts64 p;
4756

4757
    parts_default_nan(&p, status);
4758
    p.frac >>= float64_params.frac_shift;
4759
    return float64_pack_raw(&p);
4760
}
4761

4762
float128 float128_default_nan(float_status *status)
4763
{
4764
    FloatParts128 p;
4765

4766
    parts_default_nan(&p, status);
4767
    frac_shr(&p, float128_params.frac_shift);
4768
    return float128_pack_raw(&p);
4769
}
4770

4771
bfloat16 bfloat16_default_nan(float_status *status)
4772
{
4773
    FloatParts64 p;
4774

4775
    parts_default_nan(&p, status);
4776
    p.frac >>= bfloat16_params.frac_shift;
4777
    return bfloat16_pack_raw(&p);
4778
}
4779

4780
/*----------------------------------------------------------------------------
4781
| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4782
*----------------------------------------------------------------------------*/
4783

4784
float16 float16_silence_nan(float16 a, float_status *status)
4785
{
4786
    FloatParts64 p;
4787

4788
    float16_unpack_raw(&p, a);
4789
    p.frac <<= float16_params.frac_shift;
4790
    parts_silence_nan(&p, status);
4791
    p.frac >>= float16_params.frac_shift;
4792
    return float16_pack_raw(&p);
4793
}
4794

4795
float32 float32_silence_nan(float32 a, float_status *status)
4796
{
4797
    FloatParts64 p;
4798

4799
    float32_unpack_raw(&p, a);
4800
    p.frac <<= float32_params.frac_shift;
4801
    parts_silence_nan(&p, status);
4802
    p.frac >>= float32_params.frac_shift;
4803
    return float32_pack_raw(&p);
4804
}
4805

4806
float64 float64_silence_nan(float64 a, float_status *status)
4807
{
4808
    FloatParts64 p;
4809

4810
    float64_unpack_raw(&p, a);
4811
    p.frac <<= float64_params.frac_shift;
4812
    parts_silence_nan(&p, status);
4813
    p.frac >>= float64_params.frac_shift;
4814
    return float64_pack_raw(&p);
4815
}
4816

4817
bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4818
{
4819
    FloatParts64 p;
4820

4821
    bfloat16_unpack_raw(&p, a);
4822
    p.frac <<= bfloat16_params.frac_shift;
4823
    parts_silence_nan(&p, status);
4824
    p.frac >>= bfloat16_params.frac_shift;
4825
    return bfloat16_pack_raw(&p);
4826
}
4827

4828
float128 float128_silence_nan(float128 a, float_status *status)
4829
{
4830
    FloatParts128 p;
4831

4832
    float128_unpack_raw(&p, a);
4833
    frac_shl(&p, float128_params.frac_shift);
4834
    parts_silence_nan(&p, status);
4835
    frac_shr(&p, float128_params.frac_shift);
4836
    return float128_pack_raw(&p);
4837
}
4838

4839
/*----------------------------------------------------------------------------
4840
| If `a' is denormal and we are in flush-to-zero mode then set the
4841
| input-denormal exception and return zero. Otherwise just return the value.
4842
*----------------------------------------------------------------------------*/
4843

4844
static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4845
{
4846
    if (p.exp == 0 && p.frac != 0) {
4847
        float_raise(float_flag_input_denormal, status);
4848
        return true;
4849
    }
4850

4851
    return false;
4852
}
4853

4854
float16 float16_squash_input_denormal(float16 a, float_status *status)
4855
{
4856
    if (status->flush_inputs_to_zero) {
4857
        FloatParts64 p;
4858

4859
        float16_unpack_raw(&p, a);
4860
        if (parts_squash_denormal(p, status)) {
4861
            return float16_set_sign(float16_zero, p.sign);
4862
        }
4863
    }
4864
    return a;
4865
}
4866

4867
float32 float32_squash_input_denormal(float32 a, float_status *status)
4868
{
4869
    if (status->flush_inputs_to_zero) {
4870
        FloatParts64 p;
4871

4872
        float32_unpack_raw(&p, a);
4873
        if (parts_squash_denormal(p, status)) {
4874
            return float32_set_sign(float32_zero, p.sign);
4875
        }
4876
    }
4877
    return a;
4878
}
4879

4880
float64 float64_squash_input_denormal(float64 a, float_status *status)
4881
{
4882
    if (status->flush_inputs_to_zero) {
4883
        FloatParts64 p;
4884

4885
        float64_unpack_raw(&p, a);
4886
        if (parts_squash_denormal(p, status)) {
4887
            return float64_set_sign(float64_zero, p.sign);
4888
        }
4889
    }
4890
    return a;
4891
}
4892

4893
bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4894
{
4895
    if (status->flush_inputs_to_zero) {
4896
        FloatParts64 p;
4897

4898
        bfloat16_unpack_raw(&p, a);
4899
        if (parts_squash_denormal(p, status)) {
4900
            return bfloat16_set_sign(bfloat16_zero, p.sign);
4901
        }
4902
    }
4903
    return a;
4904
}
4905

4906
/*----------------------------------------------------------------------------
4907
| Normalizes the subnormal extended double-precision floating-point value
4908
| represented by the denormalized significand `aSig'.  The normalized exponent
4909
| and significand are stored at the locations pointed to by `zExpPtr' and
4910
| `zSigPtr', respectively.
4911
*----------------------------------------------------------------------------*/
4912

4913
void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4914
                                uint64_t *zSigPtr)
4915
{
4916
    int8_t shiftCount;
4917

4918
    shiftCount = clz64(aSig);
4919
    *zSigPtr = aSig<<shiftCount;
4920
    *zExpPtr = 1 - shiftCount;
4921
}
4922

4923
/*----------------------------------------------------------------------------
4924
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4925
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4926
| and returns the proper extended double-precision floating-point value
4927
| corresponding to the abstract input.  Ordinarily, the abstract value is
4928
| rounded and packed into the extended double-precision format, with the
4929
| inexact exception raised if the abstract input cannot be represented
4930
| exactly.  However, if the abstract value is too large, the overflow and
4931
| inexact exceptions are raised and an infinity or maximal finite value is
4932
| returned.  If the abstract value is too small, the input value is rounded to
4933
| a subnormal number, and the underflow and inexact exceptions are raised if
4934
| the abstract input cannot be represented exactly as a subnormal extended
4935
| double-precision floating-point number.
4936
|     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4937
| the result is rounded to the same number of bits as single or double
4938
| precision, respectively.  Otherwise, the result is rounded to the full
4939
| precision of the extended double-precision format.
4940
|     The input significand must be normalized or smaller.  If the input
4941
| significand is not normalized, `zExp' must be 0; in that case, the result
4942
| returned is a subnormal number, and it must not require rounding.  The
4943
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4944
| Floating-Point Arithmetic.
4945
*----------------------------------------------------------------------------*/
4946

4947
floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4948
                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4949
                              float_status *status)
4950
{
4951
    FloatRoundMode roundingMode;
4952
    bool roundNearestEven, increment, isTiny;
4953
    int64_t roundIncrement, roundMask, roundBits;
4954

4955
    roundingMode = status->float_rounding_mode;
4956
    roundNearestEven = ( roundingMode == float_round_nearest_even );
4957
    switch (roundingPrecision) {
4958
    case floatx80_precision_x:
4959
        goto precision80;
4960
    case floatx80_precision_d:
4961
        roundIncrement = UINT64_C(0x0000000000000400);
4962
        roundMask = UINT64_C(0x00000000000007FF);
4963
        break;
4964
    case floatx80_precision_s:
4965
        roundIncrement = UINT64_C(0x0000008000000000);
4966
        roundMask = UINT64_C(0x000000FFFFFFFFFF);
4967
        break;
4968
    default:
4969
        g_assert_not_reached();
4970
    }
4971
    zSig0 |= ( zSig1 != 0 );
4972
    switch (roundingMode) {
4973
    case float_round_nearest_even:
4974
    case float_round_ties_away:
4975
        break;
4976
    case float_round_to_zero:
4977
        roundIncrement = 0;
4978
        break;
4979
    case float_round_up:
4980
        roundIncrement = zSign ? 0 : roundMask;
4981
        break;
4982
    case float_round_down:
4983
        roundIncrement = zSign ? roundMask : 0;
4984
        break;
4985
    default:
4986
        abort();
4987
    }
4988
    roundBits = zSig0 & roundMask;
4989
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4990
        if (    ( 0x7FFE < zExp )
4991
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4992
           ) {
4993
            goto overflow;
4994
        }
4995
        if ( zExp <= 0 ) {
4996
            if (status->flush_to_zero) {
4997
                float_raise(float_flag_output_denormal, status);
4998
                return packFloatx80(zSign, 0, 0);
4999
            }
5000
            isTiny = status->tininess_before_rounding
5001
                  || (zExp < 0 )
5002
                  || (zSig0 <= zSig0 + roundIncrement);
5003
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
5004
            zExp = 0;
5005
            roundBits = zSig0 & roundMask;
5006
            if (isTiny && roundBits) {
5007
                float_raise(float_flag_underflow, status);
5008
            }
5009
            if (roundBits) {
5010
                float_raise(float_flag_inexact, status);
5011
            }
5012
            zSig0 += roundIncrement;
5013
            if ( (int64_t) zSig0 < 0 ) zExp = 1;
5014
            roundIncrement = roundMask + 1;
5015
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5016
                roundMask |= roundIncrement;
5017
            }
5018
            zSig0 &= ~ roundMask;
5019
            return packFloatx80( zSign, zExp, zSig0 );
5020
        }
5021
    }
5022
    if (roundBits) {
5023
        float_raise(float_flag_inexact, status);
5024
    }
5025
    zSig0 += roundIncrement;
5026
    if ( zSig0 < roundIncrement ) {
5027
        ++zExp;
5028
        zSig0 = UINT64_C(0x8000000000000000);
5029
    }
5030
    roundIncrement = roundMask + 1;
5031
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5032
        roundMask |= roundIncrement;
5033
    }
5034
    zSig0 &= ~ roundMask;
5035
    if ( zSig0 == 0 ) zExp = 0;
5036
    return packFloatx80( zSign, zExp, zSig0 );
5037
 precision80:
5038
    switch (roundingMode) {
5039
    case float_round_nearest_even:
5040
    case float_round_ties_away:
5041
        increment = ((int64_t)zSig1 < 0);
5042
        break;
5043
    case float_round_to_zero:
5044
        increment = 0;
5045
        break;
5046
    case float_round_up:
5047
        increment = !zSign && zSig1;
5048
        break;
5049
    case float_round_down:
5050
        increment = zSign && zSig1;
5051
        break;
5052
    default:
5053
        abort();
5054
    }
5055
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5056
        if (    ( 0x7FFE < zExp )
5057
             || (    ( zExp == 0x7FFE )
5058
                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5059
                  && increment
5060
                )
5061
           ) {
5062
            roundMask = 0;
5063
 overflow:
5064
            float_raise(float_flag_overflow | float_flag_inexact, status);
5065
            if (    ( roundingMode == float_round_to_zero )
5066
                 || ( zSign && ( roundingMode == float_round_up ) )
5067
                 || ( ! zSign && ( roundingMode == float_round_down ) )
5068
               ) {
5069
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5070
            }
5071
            return packFloatx80(zSign,
5072
                                floatx80_infinity_high,
5073
                                floatx80_infinity_low);
5074
        }
5075
        if ( zExp <= 0 ) {
5076
            isTiny = status->tininess_before_rounding
5077
                  || (zExp < 0)
5078
                  || !increment
5079
                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5080
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5081
            zExp = 0;
5082
            if (isTiny && zSig1) {
5083
                float_raise(float_flag_underflow, status);
5084
            }
5085
            if (zSig1) {
5086
                float_raise(float_flag_inexact, status);
5087
            }
5088
            switch (roundingMode) {
5089
            case float_round_nearest_even:
5090
            case float_round_ties_away:
5091
                increment = ((int64_t)zSig1 < 0);
5092
                break;
5093
            case float_round_to_zero:
5094
                increment = 0;
5095
                break;
5096
            case float_round_up:
5097
                increment = !zSign && zSig1;
5098
                break;
5099
            case float_round_down:
5100
                increment = zSign && zSig1;
5101
                break;
5102
            default:
5103
                abort();
5104
            }
5105
            if ( increment ) {
5106
                ++zSig0;
5107
                if (!(zSig1 << 1) && roundNearestEven) {
5108
                    zSig0 &= ~1;
5109
                }
5110
                if ( (int64_t) zSig0 < 0 ) zExp = 1;
5111
            }
5112
            return packFloatx80( zSign, zExp, zSig0 );
5113
        }
5114
    }
5115
    if (zSig1) {
5116
        float_raise(float_flag_inexact, status);
5117
    }
5118
    if ( increment ) {
5119
        ++zSig0;
5120
        if ( zSig0 == 0 ) {
5121
            ++zExp;
5122
            zSig0 = UINT64_C(0x8000000000000000);
5123
        }
5124
        else {
5125
            if (!(zSig1 << 1) && roundNearestEven) {
5126
                zSig0 &= ~1;
5127
            }
5128
        }
5129
    }
5130
    else {
5131
        if ( zSig0 == 0 ) zExp = 0;
5132
    }
5133
    return packFloatx80( zSign, zExp, zSig0 );
5134

5135
}
5136

5137
/*----------------------------------------------------------------------------
5138
| Takes an abstract floating-point value having sign `zSign', exponent
5139
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5140
| and returns the proper extended double-precision floating-point value
5141
| corresponding to the abstract input.  This routine is just like
5142
| `roundAndPackFloatx80' except that the input significand does not have to be
5143
| normalized.
5144
*----------------------------------------------------------------------------*/
5145

5146
floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5147
                                       bool zSign, int32_t zExp,
5148
                                       uint64_t zSig0, uint64_t zSig1,
5149
                                       float_status *status)
5150
{
5151
    int8_t shiftCount;
5152

5153
    if ( zSig0 == 0 ) {
5154
        zSig0 = zSig1;
5155
        zSig1 = 0;
5156
        zExp -= 64;
5157
    }
5158
    shiftCount = clz64(zSig0);
5159
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5160
    zExp -= shiftCount;
5161
    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5162
                                zSig0, zSig1, status);
5163

5164
}
5165

5166
/*----------------------------------------------------------------------------
5167
| Returns the binary exponential of the single-precision floating-point value
5168
| `a'. The operation is performed according to the IEC/IEEE Standard for
5169
| Binary Floating-Point Arithmetic.
5170
|
5171
| Uses the following identities:
5172
|
5173
| 1. -------------------------------------------------------------------------
5174
|      x    x*ln(2)
5175
|     2  = e
5176
|
5177
| 2. -------------------------------------------------------------------------
5178
|                      2     3     4     5           n
5179
|      x        x     x     x     x     x           x
5180
|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5181
|               1!    2!    3!    4!    5!          n!
5182
*----------------------------------------------------------------------------*/
5183

5184
static const float64 float32_exp2_coefficients[15] =
5185
{
5186
    const_float64( 0x3ff0000000000000ll ), /*  1 */
5187
    const_float64( 0x3fe0000000000000ll ), /*  2 */
5188
    const_float64( 0x3fc5555555555555ll ), /*  3 */
5189
    const_float64( 0x3fa5555555555555ll ), /*  4 */
5190
    const_float64( 0x3f81111111111111ll ), /*  5 */
5191
    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5192
    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5193
    const_float64( 0x3efa01a01a01a01all ), /*  8 */
5194
    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5195
    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5196
    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5197
    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5198
    const_float64( 0x3de6124613a86d09ll ), /* 13 */
5199
    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5200
    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5201
};
5202

5203
float32 float32_exp2(float32 a, float_status *status)
5204
{
5205
    FloatParts64 xp, xnp, tp, rp;
5206
    int i;
5207

5208
    float32_unpack_canonical(&xp, a, status);
5209
    if (unlikely(xp.cls != float_class_normal)) {
5210
        switch (xp.cls) {
5211
        case float_class_snan:
5212
        case float_class_qnan:
5213
            parts_return_nan(&xp, status);
5214
            return float32_round_pack_canonical(&xp, status);
5215
        case float_class_inf:
5216
            return xp.sign ? float32_zero : a;
5217
        case float_class_zero:
5218
            return float32_one;
5219
        default:
5220
            break;
5221
        }
5222
        g_assert_not_reached();
5223
    }
5224

5225
    float_raise(float_flag_inexact, status);
5226

5227
    float64_unpack_canonical(&tp, float64_ln2, status);
5228
    xp = *parts_mul(&xp, &tp, status);
5229
    xnp = xp;
5230

5231
    float64_unpack_canonical(&rp, float64_one, status);
5232
    for (i = 0 ; i < 15 ; i++) {
5233
        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5234
        rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5235
        xnp = *parts_mul(&xnp, &xp, status);
5236
    }
5237

5238
    return float32_round_pack_canonical(&rp, status);
5239
}
5240

5241
/*----------------------------------------------------------------------------
5242
| Rounds the extended double-precision floating-point value `a'
5243
| to the precision provided by floatx80_rounding_precision and returns the
5244
| result as an extended double-precision floating-point value.
5245
| The operation is performed according to the IEC/IEEE Standard for Binary
5246
| Floating-Point Arithmetic.
5247
*----------------------------------------------------------------------------*/
5248

5249
floatx80 floatx80_round(floatx80 a, float_status *status)
5250
{
5251
    FloatParts128 p;
5252

5253
    if (!floatx80_unpack_canonical(&p, a, status)) {
5254
        return floatx80_default_nan(status);
5255
    }
5256
    return floatx80_round_pack_canonical(&p, status);
5257
}
5258

5259
static void __attribute__((constructor)) softfloat_init(void)
5260
{
5261
    union_float64 ua, ub, uc, ur;
5262

5263
    if (QEMU_NO_HARDFLOAT) {
5264
        return;
5265
    }
5266
    /*
5267
     * Test that the host's FMA is not obviously broken. For example,
5268
     * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5269
     *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5270
     */
5271
    ua.s = 0x0020000000000001ULL;
5272
    ub.s = 0x3ca0000000000000ULL;
5273
    uc.s = 0x0020000000000000ULL;
5274
    ur.h = fma(ua.h, ub.h, uc.h);
5275
    if (ur.s != 0x0020000000000001ULL) {
5276
        force_soft_fma = true;
5277
    }
5278
}
5279
qemu

Использование cookies