llvm-project
155 строк · 6.2 Кб
1//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a fairly generic conversion from a wider to a narrower
10// IEEE-754 floating-point type in the default (round to nearest, ties to even)
11// rounding mode. The constants and types defined following the includes below
12// parameterize the conversion.
13//
14// This routine can be trivially adapted to support conversions to
15// half-precision or from quad-precision. It does not support types that don't
16// use the usual IEEE-754 interchange formats; specifically, some work would be
17// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
18// double-double format.
19//
20// Note please, however, that this implementation is only intended to support
21// *narrowing* operations; if you need to convert to a *wider* floating-point
22// type (e.g. float -> double), then this routine will not do what you want it
23// to.
24//
25// It also requires that integer types at least as large as both formats
26// are available on the target platform; this may pose a problem when trying
27// to add support for quad on some 32-bit systems, for example.
28//
29// Finally, the following assumptions are made:
30//
31// 1. Floating-point types and integer types have the same endianness on the
32// target platform.
33//
34// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
35// significand field being set.
36//
37//===----------------------------------------------------------------------===//
38
39#include "fp_trunc.h"40
41// The destination type may use a usual IEEE-754 interchange format or Intel
42// 80-bit format. In particular, for the destination type dstSigFracBits may be
43// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
44// standard types.
45static __inline dst_t __truncXfYf2__(src_t a) {46// Various constants whose values follow from the type parameters.47// Any reasonable optimizer will fold and propagate all of these.48const int srcInfExp = (1 << srcExpBits) - 1;49const int srcExpBias = srcInfExp >> 1;50
51const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits;52const src_rep_t roundMask =53(SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;54const src_rep_t halfway = SRC_REP_C(1)55<< (srcSigFracBits - dstSigFracBits - 1);56const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);57const src_rep_t srcNaNCode = srcQNaN - 1;58
59const int dstInfExp = (1 << dstExpBits) - 1;60const int dstExpBias = dstInfExp >> 1;61const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;62
63const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);64const dst_rep_t dstNaNCode = dstQNaN - 1;65
66const src_rep_t aRep = srcToRep(a);67const src_rep_t srcSign = extract_sign_from_src(aRep);68const src_rep_t srcExp = extract_exp_from_src(aRep);69const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);70
71dst_rep_t dstSign = srcSign;72dst_rep_t dstExp;73dst_rep_t dstSigFrac;74
75// Same size exponents and a's significand tail is 0.76// The significand can be truncated and the exponent can be copied over.77const int sigFracTailBits = srcSigFracBits - dstSigFracBits;78if (srcExpBits == dstExpBits &&79((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {80dstExp = srcExp;81dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);82return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));83}84
85const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias;86if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) {87// The exponent of a is within the range of normal numbers in the88// destination format. We can convert by simply right-shifting with89// rounding and adjusting the exponent.90dstExp = dstExpCandidate;91dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);92
93const src_rep_t roundBits = srcSigFrac & roundMask;94// Round to nearest.95if (roundBits > halfway)96dstSigFrac++;97// Tie to even.98else if (roundBits == halfway)99dstSigFrac += dstSigFrac & 1;100
101// Rounding has changed the exponent.102if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {103dstExp += 1;104dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);105}106} else if (srcExp == srcInfExp && srcSigFrac) {107// a is NaN.108// Conjure the result by beginning with infinity, setting the qNaN109// bit and inserting the (truncated) trailing NaN field.110dstExp = dstInfExp;111dstSigFrac = dstQNaN;112dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode;113} else if ((int)srcExp >= overflowExponent) {114dstExp = dstInfExp;115dstSigFrac = 0;116} else {117// a underflows on conversion to the destination type or is an exact118// zero. The result may be a denormal or zero. Extract the exponent119// to get the shift amount for the denormalization.120src_rep_t significand = srcSigFrac;121int shift = srcExpBias - dstExpBias - srcExp;122
123if (srcExp) {124// Set the implicit integer bit if the source is a normal number.125significand |= srcMinNormal;126shift += 1;127}128
129// Right shift by the denormalization amount with sticky.130if (shift > srcSigFracBits) {131dstExp = 0;132dstSigFrac = 0;133} else {134dstExp = 0;135const bool sticky = shift && ((significand << (srcBits - shift)) != 0);136src_rep_t denormalizedSignificand = significand >> shift | sticky;137dstSigFrac = denormalizedSignificand >> sigFracTailBits;138const src_rep_t roundBits = denormalizedSignificand & roundMask;139// Round to nearest140if (roundBits > halfway)141dstSigFrac++;142// Ties to even143else if (roundBits == halfway)144dstSigFrac += dstSigFrac & 1;145
146// Rounding has changed the exponent.147if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {148dstExp += 1;149dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);150}151}152}153
154return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));155}
156