jdk

Форк
0
/
sharedRuntimeTrig.cpp 
886 строк · 28.6 Кб
1
/*
2
 * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#include "precompiled.hpp"
26
#include "jni.h"
27
#include "runtime/interfaceSupport.inline.hpp"
28
#include "runtime/sharedRuntime.hpp"
29
#include "runtime/sharedRuntimeMath.hpp"
30

31
// This file contains copies of the C fdlibm routines originally used
32
// by StrictMath. The StrictMath sin, cos, and tan methods now use a
33
// Java port of the algorithm in java.lang.Fdlibm.java.
34

35
/*
36
 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
37
 * double x[],y[]; int e0,nx,prec; int ipio2[];
38
 *
39
 * __kernel_rem_pio2 return the last three digits of N with
40
 *              y = x - N*pi/2
41
 * so that |y| < pi/2.
42
 *
43
 * The method is to compute the integer (mod 8) and fraction parts of
44
 * (2/pi)*x without doing the full multiplication. In general we
45
 * skip the part of the product that are known to be a huge integer (
46
 * more accurately, = 0 mod 8 ). Thus the number of operations are
47
 * independent of the exponent of the input.
48
 *
49
 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
50
 *
51
 * Input parameters:
52
 *      x[]     The input value (must be positive) is broken into nx
53
 *              pieces of 24-bit integers in double precision format.
54
 *              x[i] will be the i-th 24 bit of x. The scaled exponent
55
 *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
56
 *              match x's up to 24 bits.
57
 *
58
 *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
59
 *                      e0 = ilogb(z)-23
60
 *                      z  = scalbn(z,-e0)
61
 *              for i = 0,1,2
62
 *                      x[i] = floor(z)
63
 *                      z    = (z-x[i])*2**24
64
 *
65
 *
66
 *      y[]     output result in an array of double precision numbers.
67
 *              The dimension of y[] is:
68
 *                      24-bit  precision       1
69
 *                      53-bit  precision       2
70
 *                      64-bit  precision       2
71
 *                      113-bit precision       3
72
 *              The actual value is the sum of them. Thus for 113-bit
73
 *              precsion, one may have to do something like:
74
 *
75
 *              long double t,w,r_head, r_tail;
76
 *              t = (long double)y[2] + (long double)y[1];
77
 *              w = (long double)y[0];
78
 *              r_head = t+w;
79
 *              r_tail = w - (r_head - t);
80
 *
81
 *      e0      The exponent of x[0]
82
 *
83
 *      nx      dimension of x[]
84
 *
85
 *      prec    an integer indicating the precision:
86
 *                      0       24  bits (single)
87
 *                      1       53  bits (double)
88
 *                      2       64  bits (extended)
89
 *                      3       113 bits (quad)
90
 *
91
 *      ipio2[]
92
 *              integer array, contains the (24*i)-th to (24*i+23)-th
93
 *              bit of 2/pi after binary point. The corresponding
94
 *              floating value is
95
 *
96
 *                      ipio2[i] * 2^(-24(i+1)).
97
 *
98
 * External function:
99
 *      double scalbn(), floor();
100
 *
101
 *
102
 * Here is the description of some local variables:
103
 *
104
 *      jk      jk+1 is the initial number of terms of ipio2[] needed
105
 *              in the computation. The recommended value is 2,3,4,
106
 *              6 for single, double, extended,and quad.
107
 *
108
 *      jz      local integer variable indicating the number of
109
 *              terms of ipio2[] used.
110
 *
111
 *      jx      nx - 1
112
 *
113
 *      jv      index for pointing to the suitable ipio2[] for the
114
 *              computation. In general, we want
115
 *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
116
 *              is an integer. Thus
117
 *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
118
 *              Hence jv = max(0,(e0-3)/24).
119
 *
120
 *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
121
 *
122
 *      q[]     double array with integral value, representing the
123
 *              24-bits chunk of the product of x and 2/pi.
124
 *
125
 *      q0      the corresponding exponent of q[0]. Note that the
126
 *              exponent for q[i] would be q0-24*i.
127
 *
128
 *      PIo2[]  double precision array, obtained by cutting pi/2
129
 *              into 24 bits chunks.
130
 *
131
 *      f[]     ipio2[] in floating point
132
 *
133
 *      iq[]    integer array by breaking up q[] in 24-bits chunk.
134
 *
135
 *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
136
 *
137
 *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
138
 *              it also indicates the *sign* of the result.
139
 *
140
 */
141

142

143
/*
144
 * Constants:
145
 * The hexadecimal values are the intended ones for the following
146
 * constants. The decimal values may be used, provided that the
147
 * compiler will convert from decimal to binary accurately enough
148
 * to produce the hexadecimal values shown.
149
 */
150

151

152
static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
153

154
static const double PIo2[] = {
155
  1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
156
  7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
157
  5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
158
  3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
159
  1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
160
  1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
161
  2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
162
  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
163
};
164

165
static const double
166
zeroB   = 0.0,
167
one     = 1.0,
168
two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
169
twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
170

171
static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
172
  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
173
  double z,fw,f[20],fq[20],q[20];
174

175
  /* initialize jk*/
176
  jk = init_jk[prec];
177
  jp = jk;
178

179
  /* determine jx,jv,q0, note that 3>q0 */
180
  jx =  nx-1;
181
  jv = (e0-3)/24; if(jv<0) jv=0;
182
  q0 =  e0-24*(jv+1);
183

184
  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
185
  j = jv-jx; m = jx+jk;
186
  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
187

188
  /* compute q[0],q[1],...q[jk] */
189
  for (i=0;i<=jk;i++) {
190
    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
191
    q[i] = fw;
192
  }
193

194
  jz = jk;
195
recompute:
196
  /* distill q[] into iq[] reversingly */
197
  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
198
    fw    =  (double)((int)(twon24* z));
199
    iq[i] =  (int)(z-two24B*fw);
200
    z     =  q[j-1]+fw;
201
  }
202

203
  /* compute n */
204
  z  = scalbnA(z,q0);           /* actual value of z */
205
  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
206
  n  = (int) z;
207
  z -= (double)n;
208
  ih = 0;
209
  if(q0>0) {    /* need iq[jz-1] to determine n */
210
    i  = (iq[jz-1]>>(24-q0)); n += i;
211
    iq[jz-1] -= i<<(24-q0);
212
    ih = iq[jz-1]>>(23-q0);
213
  }
214
  else if(q0==0) ih = iq[jz-1]>>23;
215
  else if(z>=0.5) ih=2;
216

217
  if(ih>0) {    /* q > 0.5 */
218
    n += 1; carry = 0;
219
    for(i=0;i<jz ;i++) {        /* compute 1-q */
220
      j = iq[i];
221
      if(carry==0) {
222
        if(j!=0) {
223
          carry = 1; iq[i] = 0x1000000- j;
224
        }
225
      } else  iq[i] = 0xffffff - j;
226
    }
227
    if(q0>0) {          /* rare case: chance is 1 in 12 */
228
      switch(q0) {
229
      case 1:
230
        iq[jz-1] &= 0x7fffff; break;
231
      case 2:
232
        iq[jz-1] &= 0x3fffff; break;
233
      }
234
    }
235
    if(ih==2) {
236
      z = one - z;
237
      if(carry!=0) z -= scalbnA(one,q0);
238
    }
239
  }
240

241
  /* check if recomputation is needed */
242
  if(z==zeroB) {
243
    j = 0;
244
    for (i=jz-1;i>=jk;i--) j |= iq[i];
245
    if(j==0) { /* need recomputation */
246
      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
247

248
      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
249
        f[jx+i] = (double) ipio2[jv+i];
250
        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
251
        q[i] = fw;
252
      }
253
      jz += k;
254
      goto recompute;
255
    }
256
  }
257

258
  /* chop off zero terms */
259
  if(z==0.0) {
260
    jz -= 1; q0 -= 24;
261
    while(iq[jz]==0) { jz--; q0-=24;}
262
  } else { /* break z into 24-bit if necessary */
263
    z = scalbnA(z,-q0);
264
    if(z>=two24B) {
265
      fw = (double)((int)(twon24*z));
266
      iq[jz] = (int)(z-two24B*fw);
267
      jz += 1; q0 += 24;
268
      iq[jz] = (int) fw;
269
    } else iq[jz] = (int) z ;
270
  }
271

272
  /* convert integer "bit" chunk to floating-point value */
273
  fw = scalbnA(one,q0);
274
  for(i=jz;i>=0;i--) {
275
    q[i] = fw*(double)iq[i]; fw*=twon24;
276
  }
277

278
  /* compute PIo2[0,...,jp]*q[jz,...,0] */
279
  for(i=jz;i>=0;i--) {
280
    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
281
    fq[jz-i] = fw;
282
  }
283

284
  /* compress fq[] into y[] */
285
  switch(prec) {
286
  case 0:
287
    fw = 0.0;
288
    for (i=jz;i>=0;i--) fw += fq[i];
289
    y[0] = (ih==0)? fw: -fw;
290
    break;
291
  case 1:
292
  case 2:
293
    fw = 0.0;
294
    for (i=jz;i>=0;i--) fw += fq[i];
295
    y[0] = (ih==0)? fw: -fw;
296
    fw = fq[0]-fw;
297
    for (i=1;i<=jz;i++) fw += fq[i];
298
    y[1] = (ih==0)? fw: -fw;
299
    break;
300
  case 3:       /* painful */
301
    for (i=jz;i>0;i--) {
302
      fw      = fq[i-1]+fq[i];
303
      fq[i]  += fq[i-1]-fw;
304
      fq[i-1] = fw;
305
    }
306
    for (i=jz;i>1;i--) {
307
      fw      = fq[i-1]+fq[i];
308
      fq[i]  += fq[i-1]-fw;
309
      fq[i-1] = fw;
310
    }
311
    for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
312
    if(ih==0) {
313
      y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
314
    } else {
315
      y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
316
    }
317
  }
318
  return n&7;
319
}
320

321

322
/*
323
 * ====================================================
324
 * Copyright (c) 1993 Oracle and/or its affiliates. All rights reserved.
325
 *
326
 * Developed at SunPro, a Sun Microsystems, Inc. business.
327
 * Permission to use, copy, modify, and distribute this
328
 * software is freely granted, provided that this notice
329
 * is preserved.
330
 * ====================================================
331
 *
332
 */
333

334
/* __ieee754_rem_pio2(x,y)
335
 *
336
 * return the remainder of x rem pi/2 in y[0]+y[1]
337
 * use __kernel_rem_pio2()
338
 */
339

340
/*
341
 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
342
 */
343
static const int two_over_pi[] = {
344
  0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
345
  0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
346
  0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
347
  0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
348
  0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
349
  0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
350
  0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
351
  0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
352
  0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
353
  0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
354
  0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
355
};
356

357
static const int npio2_hw[] = {
358
  0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
359
  0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
360
  0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
361
  0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
362
  0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
363
  0x404858EB, 0x404921FB,
364
};
365

366
/*
367
 * invpio2:  53 bits of 2/pi
368
 * pio2_1:   first  33 bit of pi/2
369
 * pio2_1t:  pi/2 - pio2_1
370
 * pio2_2:   second 33 bit of pi/2
371
 * pio2_2t:  pi/2 - (pio2_1+pio2_2)
372
 * pio2_3:   third  33 bit of pi/2
373
 * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
374
 */
375

376
static const double
377
zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
378
half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
379
two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
380
invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
381
pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
382
pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
383
pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
384
pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
385
pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
386
pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
387

388
static int __ieee754_rem_pio2(double x, double *y) {
389
  double z,w,t,r,fn;
390
  double tx[3];
391
  int e0,i,j,nx,n,ix,hx,i0;
392

393
  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
394
  hx = *(i0+(int*)&x);          /* high word of x */
395
  ix = hx&0x7fffffff;
396
  if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
397
    {y[0] = x; y[1] = 0; return 0;}
398
  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
399
    if(hx>0) {
400
      z = x - pio2_1;
401
      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
402
        y[0] = z - pio2_1t;
403
        y[1] = (z-y[0])-pio2_1t;
404
      } else {                /* near pi/2, use 33+33+53 bit pi */
405
        z -= pio2_2;
406
        y[0] = z - pio2_2t;
407
        y[1] = (z-y[0])-pio2_2t;
408
      }
409
      return 1;
410
    } else {    /* negative x */
411
      z = x + pio2_1;
412
      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
413
        y[0] = z + pio2_1t;
414
        y[1] = (z-y[0])+pio2_1t;
415
      } else {                /* near pi/2, use 33+33+53 bit pi */
416
        z += pio2_2;
417
        y[0] = z + pio2_2t;
418
        y[1] = (z-y[0])+pio2_2t;
419
      }
420
      return -1;
421
    }
422
  }
423
  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
424
    t  = fabsd(x);
425
    n  = (int) (t*invpio2+half);
426
    fn = (double)n;
427
    r  = t-fn*pio2_1;
428
    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
429
    if(n<32&&ix!=npio2_hw[n-1]) {
430
      y[0] = r-w;       /* quick check no cancellation */
431
    } else {
432
      j  = ix>>20;
433
      y[0] = r-w;
434
      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
435
      if(i>16) {  /* 2nd iteration needed, good to 118 */
436
        t  = r;
437
        w  = fn*pio2_2;
438
        r  = t-w;
439
        w  = fn*pio2_2t-((t-r)-w);
440
        y[0] = r-w;
441
        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
442
        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
443
          t  = r;       /* will cover all possible cases */
444
          w  = fn*pio2_3;
445
          r  = t-w;
446
          w  = fn*pio2_3t-((t-r)-w);
447
          y[0] = r-w;
448
        }
449
      }
450
    }
451
    y[1] = (r-y[0])-w;
452
    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
453
    else         return n;
454
  }
455
  /*
456
   * all other (large) arguments
457
   */
458
  if(ix>=0x7ff00000) {          /* x is inf or NaN */
459
    y[0]=y[1]=x-x; return 0;
460
  }
461
  /* set z = scalbn(|x|,ilogb(x)-23) */
462
  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
463
  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
464
  *(i0+(int*)&z) = ix - (e0<<20);
465
  for(i=0;i<2;i++) {
466
    tx[i] = (double)((int)(z));
467
    z     = (z-tx[i])*two24A;
468
  }
469
  tx[2] = z;
470
  nx = 3;
471
  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
472
  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
473
  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
474
  return n;
475
}
476

477

478
/* __kernel_sin( x, y, iy)
479
 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
480
 * Input x is assumed to be bounded by ~pi/4 in magnitude.
481
 * Input y is the tail of x.
482
 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
483
 *
484
 * Algorithm
485
 *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
486
 *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
487
 *      3. sin(x) is approximated by a polynomial of degree 13 on
488
 *         [0,pi/4]
489
 *                               3            13
490
 *              sin(x) ~ x + S1*x + ... + S6*x
491
 *         where
492
 *
493
 *      |sin(x)         2     4     6     8     10     12  |     -58
494
 *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
495
 *      |  x                                               |
496
 *
497
 *      4. sin(x+y) = sin(x) + sin'(x')*y
498
 *                  ~ sin(x) + (1-x*x/2)*y
499
 *         For better accuracy, let
500
 *                   3      2      2      2      2
501
 *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
502
 *         then                   3    2
503
 *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
504
 */
505

506
static const double
507
S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
508
S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
509
S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
510
S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
511
S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
512
S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
513

514
static double __kernel_sin(double x, double y, int iy)
515
{
516
        double z,r,v;
517
        int ix;
518
        ix = high(x)&0x7fffffff;                /* high word of x */
519
        if(ix<0x3e400000)                       /* |x| < 2**-27 */
520
           {if((int)x==0) return x;}            /* generate inexact */
521
        z       =  x*x;
522
        v       =  z*x;
523
        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
524
        if(iy==0) return x+v*(S1+z*r);
525
        else      return x-((z*(half*y-v*r)-y)-v*S1);
526
}
527

528
/*
529
 * __kernel_cos( x,  y )
530
 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
531
 * Input x is assumed to be bounded by ~pi/4 in magnitude.
532
 * Input y is the tail of x.
533
 *
534
 * Algorithm
535
 *      1. Since cos(-x) = cos(x), we need only to consider positive x.
536
 *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
537
 *      3. cos(x) is approximated by a polynomial of degree 14 on
538
 *         [0,pi/4]
539
 *                                       4            14
540
 *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
541
 *         where the remez error is
542
 *
543
 *      |              2     4     6     8     10    12     14 |     -58
544
 *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
545
 *      |                                                      |
546
 *
547
 *                     4     6     8     10    12     14
548
 *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
549
 *             cos(x) = 1 - x*x/2 + r
550
 *         since cos(x+y) ~ cos(x) - sin(x)*y
551
 *                        ~ cos(x) - x*y,
552
 *         a correction term is necessary in cos(x) and hence
553
 *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
554
 *         For better accuracy when x > 0.3, let qx = |x|/4 with
555
 *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
556
 *         Then
557
 *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
558
 *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
559
 *         magnitude of the latter is at least a quarter of x*x/2,
560
 *         thus, reducing the rounding error in the subtraction.
561
 */
562

563
static const double
564
C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
565
C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
566
C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
567
C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
568
C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
569
C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
570

571
static double __kernel_cos(double x, double y)
572
{
573
  double a,h,z,r,qx=0;
574
  int ix;
575
  ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
576
  if(ix<0x3e400000) {                   /* if x < 2**27 */
577
    if(((int)x)==0) return one;         /* generate inexact */
578
  }
579
  z  = x*x;
580
  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
581
  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
582
    return one - (0.5*z - (z*r - x*y));
583
  else {
584
    if(ix > 0x3fe90000) {               /* x > 0.78125 */
585
      qx = 0.28125;
586
    } else {
587
      set_high(&qx, ix-0x00200000); /* x/4 */
588
      set_low(&qx, 0);
589
    }
590
    h = 0.5*z-qx;
591
    a = one-qx;
592
    return a - (h - (z*r-x*y));
593
  }
594
}
595

596
/* __kernel_tan( x, y, k )
597
 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
598
 * Input x is assumed to be bounded by ~pi/4 in magnitude.
599
 * Input y is the tail of x.
600
 * Input k indicates whether tan (if k=1) or
601
 * -1/tan (if k= -1) is returned.
602
 *
603
 * Algorithm
604
 *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
605
 *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
606
 *      3. tan(x) is approximated by a odd polynomial of degree 27 on
607
 *         [0,0.67434]
608
 *                               3             27
609
 *              tan(x) ~ x + T1*x + ... + T13*x
610
 *         where
611
 *
612
 *              |tan(x)         2     4            26   |     -59.2
613
 *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
614
 *              |  x                                    |
615
 *
616
 *         Note: tan(x+y) = tan(x) + tan'(x)*y
617
 *                        ~ tan(x) + (1+x*x)*y
618
 *         Therefore, for better accuracy in computing tan(x+y), let
619
 *                   3      2      2       2       2
620
 *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
621
 *         then
622
 *                                  3    2
623
 *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
624
 *
625
 *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
626
 *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
627
 *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
628
 */
629

630
static const double
631
pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
632
pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
633
T[] =  {
634
  3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
635
  1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
636
  5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
637
  2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
638
  8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
639
  3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
640
  1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
641
  5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
642
  2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
643
  7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
644
  7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
645
 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
646
  2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
647
};
648

649
static double __kernel_tan(double x, double y, int iy)
650
{
651
  double z,r,v,w,s;
652
  int ix,hx;
653
  hx = high(x);           /* high word of x */
654
  ix = hx&0x7fffffff;     /* high word of |x| */
655
  if(ix<0x3e300000) {                     /* x < 2**-28 */
656
    if((int)x==0) {                       /* generate inexact */
657
      if (((ix | low(x)) | (iy + 1)) == 0)
658
        return one / fabsd(x);
659
      else {
660
        if (iy == 1)
661
          return x;
662
        else {    /* compute -1 / (x+y) carefully */
663
          double a, t;
664

665
          z = w = x + y;
666
          set_low(&z, 0);
667
          v = y - (z - x);
668
          t = a = -one / w;
669
          set_low(&t, 0);
670
          s = one + t * z;
671
          return t + a * (s + t * v);
672
        }
673
      }
674
    }
675
  }
676
  if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
677
    if(hx<0) {x = -x; y = -y;}
678
    z = pio4-x;
679
    w = pio4lo-y;
680
    x = z+w; y = 0.0;
681
  }
682
  z       =  x*x;
683
  w       =  z*z;
684
  /* Break x^5*(T[1]+x^2*T[2]+...) into
685
   *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
686
   *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
687
   */
688
  r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
689
  v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
690
  s = z*x;
691
  r = y + z*(s*(r+v)+y);
692
  r += T[0]*s;
693
  w = x+r;
694
  if(ix>=0x3FE59428) {
695
    v = (double)iy;
696
    return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
697
  }
698
  if(iy==1) return w;
699
  else {          /* if allow error up to 2 ulp,
700
                     simply return -1.0/(x+r) here */
701
    /*  compute -1.0/(x+r) accurately */
702
    double a,t;
703
    z  = w;
704
    set_low(&z, 0);
705
    v  = r-(z - x);     /* z+v = r+x */
706
    t = a  = -1.0/w;    /* a = -1.0/w */
707
    set_low(&t, 0);
708
    s  = 1.0+t*z;
709
    return t+a*(s+t*v);
710
  }
711
}
712

713

714
//----------------------------------------------------------------------
715
//
716
// Routines for new sin/cos implementation
717
//
718
//----------------------------------------------------------------------
719

720
/* sin(x)
721
 * Return sine function of x.
722
 *
723
 * kernel function:
724
 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
725
 *      __kernel_cos            ... cose function on [-pi/4,pi/4]
726
 *      __ieee754_rem_pio2      ... argument reduction routine
727
 *
728
 * Method.
729
 *      Let S,C and T denote the sin, cos and tan respectively on
730
 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
731
 *      in [-pi/4 , +pi/4], and let n = k mod 4.
732
 *      We have
733
 *
734
 *          n        sin(x)      cos(x)        tan(x)
735
 *     ----------------------------------------------------------
736
 *          0          S           C             T
737
 *          1          C          -S            -1/T
738
 *          2         -S          -C             T
739
 *          3         -C           S            -1/T
740
 *     ----------------------------------------------------------
741
 *
742
 * Special cases:
743
 *      Let trig be any of sin, cos, or tan.
744
 *      trig(+-INF)  is NaN, with signals;
745
 *      trig(NaN)    is that NaN;
746
 *
747
 * Accuracy:
748
 *      TRIG(x) returns trig(x) nearly rounded
749
 */
750

751
JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
752
  double y[2],z=0.0;
753
  int n, ix;
754

755
  /* High word of x. */
756
  ix = high(x);
757

758
  /* |x| ~< pi/4 */
759
  ix &= 0x7fffffff;
760
  if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
761

762
  /* sin(Inf or NaN) is NaN */
763
  else if (ix>=0x7ff00000) return x-x;
764

765
  /* argument reduction needed */
766
  else {
767
    n = __ieee754_rem_pio2(x,y);
768
    switch(n&3) {
769
    case 0: return  __kernel_sin(y[0],y[1],1);
770
    case 1: return  __kernel_cos(y[0],y[1]);
771
    case 2: return -__kernel_sin(y[0],y[1],1);
772
    default:
773
      return -__kernel_cos(y[0],y[1]);
774
    }
775
  }
776
JRT_END
777

778
/* cos(x)
779
 * Return cosine function of x.
780
 *
781
 * kernel function:
782
 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
783
 *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
784
 *      __ieee754_rem_pio2      ... argument reduction routine
785
 *
786
 * Method.
787
 *      Let S,C and T denote the sin, cos and tan respectively on
788
 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
789
 *      in [-pi/4 , +pi/4], and let n = k mod 4.
790
 *      We have
791
 *
792
 *          n        sin(x)      cos(x)        tan(x)
793
 *     ----------------------------------------------------------
794
 *          0          S           C             T
795
 *          1          C          -S            -1/T
796
 *          2         -S          -C             T
797
 *          3         -C           S            -1/T
798
 *     ----------------------------------------------------------
799
 *
800
 * Special cases:
801
 *      Let trig be any of sin, cos, or tan.
802
 *      trig(+-INF)  is NaN, with signals;
803
 *      trig(NaN)    is that NaN;
804
 *
805
 * Accuracy:
806
 *      TRIG(x) returns trig(x) nearly rounded
807
 */
808

809
JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
810
  double y[2],z=0.0;
811
  int n, ix;
812

813
  /* High word of x. */
814
  ix = high(x);
815

816
  /* |x| ~< pi/4 */
817
  ix &= 0x7fffffff;
818
  if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
819

820
  /* cos(Inf or NaN) is NaN */
821
  else if (ix>=0x7ff00000) return x-x;
822

823
  /* argument reduction needed */
824
  else {
825
    n = __ieee754_rem_pio2(x,y);
826
    switch(n&3) {
827
    case 0: return  __kernel_cos(y[0],y[1]);
828
    case 1: return -__kernel_sin(y[0],y[1],1);
829
    case 2: return -__kernel_cos(y[0],y[1]);
830
    default:
831
      return  __kernel_sin(y[0],y[1],1);
832
    }
833
  }
834
JRT_END
835

836
/* tan(x)
837
 * Return tangent function of x.
838
 *
839
 * kernel function:
840
 *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
841
 *      __ieee754_rem_pio2      ... argument reduction routine
842
 *
843
 * Method.
844
 *      Let S,C and T denote the sin, cos and tan respectively on
845
 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
846
 *      in [-pi/4 , +pi/4], and let n = k mod 4.
847
 *      We have
848
 *
849
 *          n        sin(x)      cos(x)        tan(x)
850
 *     ----------------------------------------------------------
851
 *          0          S           C             T
852
 *          1          C          -S            -1/T
853
 *          2         -S          -C             T
854
 *          3         -C           S            -1/T
855
 *     ----------------------------------------------------------
856
 *
857
 * Special cases:
858
 *      Let trig be any of sin, cos, or tan.
859
 *      trig(+-INF)  is NaN, with signals;
860
 *      trig(NaN)    is that NaN;
861
 *
862
 * Accuracy:
863
 *      TRIG(x) returns trig(x) nearly rounded
864
 */
865

866
JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
867
  double y[2],z=0.0;
868
  int n, ix;
869

870
  /* High word of x. */
871
  ix = high(x);
872

873
  /* |x| ~< pi/4 */
874
  ix &= 0x7fffffff;
875
  if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
876

877
  /* tan(Inf or NaN) is NaN */
878
  else if (ix>=0x7ff00000) return x-x;            /* NaN */
879

880
  /* argument reduction needed */
881
  else {
882
    n = __ieee754_rem_pio2(x,y);
883
    return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
884
                                                     -1 -- n odd */
885
  }
886
JRT_END
887

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.