8
typedef unsigned short int bits16;
9
typedef unsigned int bits32;
10
typedef signed char Word8;
11
typedef unsigned char UWord8;
12
typedef signed short Word16;
13
typedef unsigned short UWord16;
14
typedef signed int Word32;
15
typedef unsigned int UWord32;
16
typedef long long Word64;
17
typedef unsigned long long UWord64;
18
typedef unsigned short float16;
19
typedef signed int sbits32;
20
typedef signed short int sbits16;
24
#define MAX_U32 (UWord32)0xffffffffL
25
#define MAX_U16 (UWord16)0xffff
26
#define BITMASK_T(typ, w) (((typ)1 << (w)) - 1)
27
#define TESTBIT(x, n) (((x) >> (n)) & 1)
29
#define float16_default_nan 0x7E00
30
#define float16_default_nan_pos 0x7E00
31
#define float16_default_nan_neg 0xFE00
33
int8 float_exception_flags = 0;
36
float_round_nearest_even = 0,
39
float_round_to_zero = 3
42
int8 float_rounding_mode = float_round_nearest_even;
43
enum { float_tininess_after_rounding = 0, float_tininess_before_rounding = 1 };
44
int float_detect_tininess = float_tininess_after_rounding;
46
inline bits16 extractFloat16Frac(float16 a) {
50
inline int16 extractFloat16Exp(float16 a) {
51
return (a >> 10) & 0x1F;
54
inline flag extractFloat16Sign(float16 a) {
58
flag float16_is_quiet_nan(float16 a) {
59
return (0xFC00 <= (bits16)(a << 1));
62
flag float16_is_signaling_nan(float16 a) {
63
return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x01FF);
67
float_flag_inexact = 1,
68
float_flag_divbyzero = 2,
69
float_flag_underflow = 4,
70
float_flag_overflow = 8,
71
float_flag_invalid = 16
74
void float_raise(int8 flags) {
75
float_exception_flags |= flags;
86
float_raise(float_flag_invalid);
90
if (cIsSNaN || cIsQNaN) {
92
} else if (bIsSNaN || bIsQNaN) {
99
inline float16 packFloat16(flag zSign, int16 zExp, bits16 zSig) {
100
return (((bits16)zSign) << 15) + (((bits16)zExp) << 10) + zSig;
104
propagateFloat16MulAddNaN(float16 a, float16 b, float16 c, flag infzero) {
105
flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, cIsQuietNaN,
109
aIsQuietNaN = float16_is_quiet_nan(a);
110
aIsSignalingNaN = float16_is_signaling_nan(a);
111
bIsQuietNaN = float16_is_quiet_nan(b);
112
bIsSignalingNaN = float16_is_signaling_nan(b);
113
cIsQuietNaN = float16_is_quiet_nan(c);
114
cIsSignalingNaN = float16_is_signaling_nan(c);
116
if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) {
117
float_raise(float_flag_invalid);
120
selNaN = pickNaNMulAdd(
138
return float16_default_nan;
142
inline void shift32RightJamming(bits32 a, int16 count, bits32* zPtr) {
147
} else if (count < 32) {
148
z = (a >> count) | ((a << ((-count) & 31)) != 0);
155
void shift16RightJamming(bits16 a, int16 count, bits16* zPtr) {
160
} else if (count < 16) {
161
z = (a >> count) | (((a << ((-count) & 15)) & 0xffff) != 0);
168
Word8 GetRound(Word32 fcr) {
169
Word8 res, round_mode;
170
round_mode = fcr & 0x3; // lower 2 bits as rounding mode in FCR
171
res = (round_mode == 3)
175
: ((round_mode == 1) ? 3 : 0)); // Translate to float_rounding_mode
179
Word8 GetException(Word32 fsr) {
181
if (TESTBIT(fsr, 7) == 1)
182
res |= 32; // float_flag_inexact
183
if (TESTBIT(fsr, 8) == 1)
184
res |= 16; // float_flag_underflow
185
if (TESTBIT(fsr, 9) == 1)
186
res |= 8; // float_flag_overflow
187
if (TESTBIT(fsr, 10) == 1)
188
res |= 4; // float_flag_divbyzero
189
if (TESTBIT(fsr, 11) == 1)
190
res |= 1; // float_flag_invalid
194
float16 roundAndPackFloat16(flag zSign, int16 zExp, bits16 zSig) {
196
flag roundNearestEven;
197
int8 roundIncrement, roundBits;
200
roundingMode = float_rounding_mode;
201
roundNearestEven = (roundingMode == float_round_nearest_even);
202
roundIncrement = 0x8;
203
if (!roundNearestEven) {
204
// if ( ( ! roundNearestEven ) && ( roundingMode !=
205
// float_round_ties_away) ) {
206
if (roundingMode == float_round_to_zero) {
209
roundIncrement = 0xF;
211
if (roundingMode == float_round_up)
214
if (roundingMode == float_round_down)
219
roundBits = zSig & 0xF;
220
if (0x1D <= (bits16)zExp) {
222
((zExp == 0x1D) && ((sbits16)(zSig + roundIncrement) < 0))) {
223
float_raise(float_flag_overflow | float_flag_inexact);
224
return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
227
isTiny = (float_detect_tininess == float_tininess_before_rounding) ||
228
(zExp < -1) || (zSig + roundIncrement < 0x8000);
229
shift16RightJamming(zSig, -zExp, &zSig);
231
roundBits = zSig & 0xF;
233
if (isTiny && roundBits)
234
float_raise(float_flag_underflow);
238
float_exception_flags |= float_flag_inexact;
239
zSig = (zSig + roundIncrement) >> 4;
240
zSig &= ~(((roundBits ^ 0x8) == 0) & roundNearestEven);
243
return packFloat16(zSign, zExp, zSig);
246
int8 countLeadingZeros32(bits32 a) {
247
static const int8 countLeadingZerosHigh[] = {
248
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
249
3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
250
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
251
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
252
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
253
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
254
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
270
shiftCount += countLeadingZerosHigh[a >> 24];
274
void normalizeFloat16Subnormal(bits16 aSig, int16* zExpPtr, bits16* zSigPtr) {
277
shiftCount = countLeadingZeros32((bits32)aSig) - 16 - 5;
278
*zSigPtr = aSig << shiftCount;
279
*zExpPtr = 1 - shiftCount;
282
float16 float16_muladd(float16 a, float16 b, float16 c, flag negate_product) {
283
flag aSign, bSign, cSign, zSign;
284
int16 aExp, bExp, cExp, pExp, zExp, expDiff;
285
bits16 aSig, bSig, cSig;
286
flag pInf, pZero, pSign;
287
bits32 pSig32, cSig32, zSig32;
292
/* Extract the sign bit, exponent and significant */
293
aSig = extractFloat16Frac(a);
294
aExp = extractFloat16Exp(a);
295
aSign = extractFloat16Sign(a);
297
bSig = extractFloat16Frac(b);
298
bExp = extractFloat16Exp(b);
299
bSign = extractFloat16Sign(b);
301
cSig = extractFloat16Frac(c);
302
cExp = extractFloat16Exp(c);
303
cSign = extractFloat16Sign(c);
305
/* Flag to indicate fusedMultiplyAdd(0, inf, or fusedMultiplyAdd(inf, 0 c) */
307
((aExp == 0 && aSig == 0 && bExp == 0x1f && bSig == 0) ||
308
(aExp == 0x1f && aSig == 0 && bExp == 0 && bSig == 0));
310
/* CASE1: if any input is NaN => NaN propagate */
312
/* It is implementation-defined whether the cases of (0,inf,qnan)
313
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
314
* they return if they do), so we have to hand this information
315
* off to the target-specific pick-a-NaN routine.
318
/* IEEE754 7.2 - Invalid: fusedMultiplyAdd(0, inf, c) or
319
* fusedMultiplyAdd(inf, 0 , c) unless c is a quiet NaN; If c is a
320
* quiet NaN then it is implementation defined whether the invalid operation
321
* exception is signaled.
323
if (((aExp == 0x1f) && aSig) || ((bExp == 0x1f) && bSig) ||
324
((cExp == 0x1f) && cSig)) {
325
return propagateFloat16MulAddNaN(a, b, c, infzero);
328
/* Work out the sign and type of the product */
329
pSign = aSign ^ bSign;
330
if (negate_product) {
334
/* CASE2: fusedMultiplyAdd(0, inf, c) or fusedMultiplyAdd(inf,0, c) and c is
335
* not NaN => raise invalid */
337
float_raise(float_flag_invalid);
338
return float16_default_nan;
341
pInf = (aExp == 0x1f) || (bExp == 0x1f);
342
pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
344
/* CASE3 and CASE4: c is inf, p is number or inf*/
346
if (pInf && (pSign ^ cSign)) {
347
/* CASE3: addition of opposite-signed infinities => InvalidOperation */
348
float_raise(float_flag_invalid);
349
return float16_default_nan;
351
/* CASE4: Otherwise generate an infinity of the same sign */
352
return packFloat16(cSign, 0x1f, 0);
355
/* CASE5: c is number and p is inf */
357
return packFloat16(pSign, 0x1f, 0);
360
/* CASE6: c is number, p is zero */
364
/* Adding two exact zeroes */
365
if (pSign == cSign) {
367
} else if (float_rounding_mode == float_round_down) {
372
return packFloat16(zSign, 0, 0);
375
/* CASE7: Zero plus something non-zero : just return the something */
380
normalizeFloat16Subnormal(aSig, &aExp, &aSig);
383
normalizeFloat16Subnormal(bSig, &bExp, &bSig);
386
/* Calculate the actual result a * b + c */
388
/* NOTE: we subtract 0x7e where float16_mul() subtracts 0x7f
389
* because we want the true exponent, not the "one-less-than"
390
* flavour that roundAndPackFloat16() takes.
392
pExp = aExp + bExp - 0xe;
393
aSig = (aSig | 0x0400) << 4;
394
bSig = (bSig | 0x0400) << 5;
395
pSig32 = (bits32)aSig * bSig;
396
if ((sbits32)(pSig32 << 1) >= 0) {
403
/* Now pSig32 is the significand of the multiply, with the explicit bit in
408
/* Throw out the special case of c being an exact zero now */
409
shift32RightJamming(pSig32, 16, &pSig32);
411
return roundAndPackFloat16(zSign, pExp - 1, pSig);
413
normalizeFloat16Subnormal(cSig, &cExp, &cSig);
416
cSig32 = (bits32)cSig << (30 - 10);
417
cSig32 |= 0x40000000;
418
expDiff = pExp - cExp;
420
if (pSign == cSign) {
423
/* scale c to match p */
424
shift32RightJamming(cSig32, expDiff, &cSig32);
426
} else if (expDiff < 0) {
427
/* scale p to match c */
428
shift32RightJamming(pSig32, -expDiff, &pSig32);
431
/* no scaling needed */
434
/* Add significands and make sure explicit bit ends up in posn 62 */
435
zSig32 = pSig32 + cSig32;
436
if ((sbits32)zSig32 < 0) {
437
shift32RightJamming(zSig32, 1, &zSig32);
444
shift32RightJamming(cSig32, expDiff, &cSig32);
445
zSig32 = pSig32 - cSig32;
447
} else if (expDiff < 0) {
448
shift32RightJamming(pSig32, -expDiff, &pSig32);
449
zSig32 = cSig32 - pSig32;
454
if (cSig32 < pSig32) {
455
zSig32 = pSig32 - cSig32;
456
} else if (pSig32 < cSig32) {
457
zSig32 = cSig32 - pSig32;
462
if (float_rounding_mode == float_round_down) {
465
return packFloat16(zSign, 0, 0);
469
/* Normalize to put the explicit bit back into bit 62. */
470
shiftcount = countLeadingZeros32(zSig32) - 1;
471
zSig32 <<= shiftcount;
474
shift32RightJamming(zSig32, 16, &zSig32);
475
return roundAndPackFloat16(zSign, zExp, zSig32);
482
Word32 negate_product,
487
// Extract rounding mode from FCR/FSR to softfloat
488
float_rounding_mode = GetRound(fcr);
489
float_exception_flags = GetException(fsr_i);
490
// Call softfloat lib
491
*res = float16_muladd(d1, d2, d0, negate_product);
492
//*fsr_o = PutException(float_exception_flags, fsr_i);
506
fp_mac_h(b, input, a, 0, fcr, fsr_i, &res, &fsr);
512
float fake_fma_fp16_slow(float v1, float v2, float v3) {
513
uint32_t fcr_val = 0;
514
uint32_t fsr_val = 0x00000F80;
515
uint32_t exception_flags = 0;
517
uint16_t hv1, hv2, hv3, hresult;
518
hv1 = _cvtss_sh(v1, 0);
519
hv2 = _cvtss_sh(v2, 0);
520
hv3 = _cvtss_sh(v3, 0);
523
*reinterpret_cast<Word16*>(&hv1),
524
*reinterpret_cast<Word16*>(&hv2),
525
*reinterpret_cast<Word16*>(&hv3),
526
*reinterpret_cast<Word32*>(&fcr_val),
527
*reinterpret_cast<Word32*>(&fsr_val),
528
reinterpret_cast<Word16*>(&hresult),
529
reinterpret_cast<Word32*>(&exception_flags));
531
return _cvtsh_ss(hresult);
534
void fake_fma_fp16_slow(int N, const float* A, const float* B, float* Out) {
535
for (int n = 0; n < N; n++) {
536
Out[n] = fake_fma_fp16_slow(A[n], B[n], Out[n]);
540
} // namespace fp16_fma