/*
* e_rem_pio2.c
*
* Copyright (c) 1999-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include "math_private.h"
int __ieee754_rem_pio2(double x, double *y) {
int q;
y[1] = 0.0; /* default */
/*
* Simple cases: all nicked from the fdlibm version for speed.
*/
{
static const double invpio2 = 0x1.45f306dc9c883p-1;
static const double pio2s[] = {
0x1.921fb544p+0, /* 1.57079632673412561417e+00 */
0x1.0b4611a626331p-34, /* 6.07710050650619224932e-11 */
0x1.0b4611a6p-34, /* 6.07710050630396597660e-11 */
0x1.3198a2e037073p-69, /* 2.02226624879595063154e-21 */
0x1.3198a2ep-69, /* 2.02226624871116645580e-21 */
0x1.b839a252049c1p-104, /* 8.47842766036889956997e-32 */
};
double z,w,t,r,fn;
int i,j,idx,n,ix,hx;
hx = __HI(x); /* high word of x */
ix = hx&0x7fffffff;
if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
{y[0] = x; y[1] = 0; return 0;}
if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
if(hx>0) {
z = x - pio2s[0];
if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
y[0] = z - pio2s[1];
#ifdef bottomhalf
y[1] = (z-y[0])-pio2s[1];
#endif
} else { /* near pi/2, use 33+33+53 bit pi */
z -= pio2s[2];
y[0] = z - pio2s[3];
#ifdef bottomhalf
y[1] = (z-y[0])-pio2s[3];
#endif
}
return 1;
} else { /* negative x */
z = x + pio2s[0];
if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
y[0] = z + pio2s[1];
#ifdef bottomhalf
y[1] = (z-y[0])+pio2s[1];
#endif
} else { /* near pi/2, use 33+33+53 bit pi */
z += pio2s[2];
y[0] = z + pio2s[3];
#ifdef bottomhalf
y[1] = (z-y[0])+pio2s[3];
#endif
}
return -1;
}
}
if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
t = fabs(x);
n = (int) (t*invpio2+0.5);
fn = (double)n;
r = t-fn*pio2s[0];
w = fn*pio2s[1]; /* 1st round good to 85 bit */
j = ix>>20;
idx = 1;
while (1) {
y[0] = r-w;
if (idx == 3)
break;
i = j-(((__HI(y[0]))>>20)&0x7ff);
if(i <= 33*idx-17)
break;
t = r;
w = fn*pio2s[2*idx];
r = t-w;
w = fn*pio2s[2*idx+1]-((t-r)-w);
idx++;
}
#ifdef bottomhalf
y[1] = (r-y[0])-w;
#endif
if(hx<0) {
y[0] = -y[0];
#ifdef bottomhalf
y[1] = -y[1];
#endif
return -n;
} else {
return n;
}
}
}
{
static const unsigned twooverpi[] = {
/* We start with two zero words, because they take up less
* space than the array bounds checking and special-case
* handling that would have to occur in their absence. */
0, 0,
/* 2/pi in hex is 0.a2f9836e... */
0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599,
0x3c439041, 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0,
0x06492eea, 0x09d1921c, 0xfe1deb1c, 0xb129a73e, 0xe88235f5,
0x2ebb4484, 0xe99c7026, 0xb45f7e41, 0x3991d639, 0x835339f4,
0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f, 0xef2f118b,
0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1,
0x1f8d5d08,
};
/*
* Multiprecision multiplication of this nature is more
* readily done in integers than in VFP, since we can use
* UMULL (on CPUs that support it) to multiply 32 by 32 bits
* at a time whereas the VFP would only be able to do 12x12
* without losing accuracy.
*
* So extract the mantissa of the input number as two 32-bit
* integers.
*/
unsigned mant1 = 0x00100000 | (__HI(x) & 0xFFFFF);
unsigned mant2 = __LO(x);
/*
* Now work out which part of our stored value of 2/pi we're
* supposed to be multiplying by.
*
* Let the IEEE exponent field of x be e. With its bias
* removed, (e-1023) is the index of the set bit in bit 20
* of 'mant1' (i.e. that set bit has real place value
* 2^(e-1023)). So the lowest set bit in 'mant1', 52 bits
* further down, must have place value 2^(e-1075).
*
* We begin taking an interest in the value of 2/pi at the
* bit which multiplies by _that_ to give something with
* place value at most 2. In other words, the highest bit of
* 2/pi we're interested in is the one with place value
* 2/(2^(e-1075)) = 2^(1076-e).
*
* The bit at the top of the first zero word of the above
* array has place value 2^63. Hence, the bit we want to put
* at the top of the first word we extract from that array
* is the one at bit index n, where 63-n = 1076-e and hence
* n=e-1013.
*/
int topbitindex = ((__HI(x) >> 20) & 0x7FF) - 1013;
int wordindex = topbitindex >> 5;
int shiftup = topbitindex & 31;
int shiftdown = 32 - shiftup;
unsigned scaled[8];
int i;
scaled[7] = scaled[6] = 0;
for (i = 6; i-- > 0 ;) {
/*
* Extract a word from our representation of 2/pi.
*/
unsigned word;
if (shiftup)
word = (twooverpi[wordindex + i] << shiftup) | (twooverpi[wordindex + i + 1] >> shiftdown);
else
word = twooverpi[wordindex + i];
/*
* Multiply it by both words of our mantissa. (Should
* generate UMULLs where available.)
*/
unsigned long long mult1 = (unsigned long long)word * mant1;
unsigned long long mult2 = (unsigned long long)word * mant2;
/*
* Split those up into 32-bit chunks.
*/
unsigned bottom1 = (unsigned)mult1, top1 = (unsigned)(mult1 >> 32);
unsigned bottom2 = (unsigned)mult2, top2 = (unsigned)(mult2 >> 32);
/*
* Add those two chunks together.
*/
unsigned out1, out2, out3;
out3 = bottom2;
out2 = top2 + bottom1;
out1 = top1 + (out2 < top2);
/*
* And finally add them to our 'scaled' array.
*/
unsigned s3 = scaled[i+2], s2 = scaled[i+1], s1;
unsigned carry;
s3 = out3 + s3; carry = (s3 < out3);
s2 = out2 + s2 + carry; carry = carry ? (s2 <= out2) : (s2 < out2);
s1 = out1 + carry;
scaled[i+2] = s3;
scaled[i+1] = s2;
scaled[i] = s1;
}
/*
* The topmost set bit in mant1 is bit 20, and that has
* place value 2^(e-1023). The topmost bit (bit 31) of the
* most significant word we extracted from our twooverpi
* array had place value 2^(1076-e). So the product of those
* two bits must have place value 2^53; and that bit will
* have ended up as bit 19 of scaled[0]. Hence, the integer
* quadrant value we want to output, consisting of the bits
* with place values 2^1 and 2^0, must be 52 and 53 bits
* below that, i.e. precisely the topmost two bits of
* scaled[2].
*
* Or, at least, it will be once we add 1/2, to round to the
* _nearest_ multiple of pi/2 rather than the next one down.
*/
q = (scaled[2] + (1<<29)) >> 30;
/*
* Now we construct the output fraction, which is most
* simply done in the VFP. We just extract four consecutive
* bit strings from our chunk of binary data, convert them
* to doubles, equip each with an appropriate FP exponent,
* add them together, and (don't forget) multiply back up by
* pi/2. That way we don't have to work out ourselves where
* the highest fraction bit ended up.
*
* Since our displacement from the nearest multiple of pi/2
* can be positive or negative, the topmost of these four
* values must be arranged with its 2^-1 bit at the very top
* of the word, and then treated as a _signed_ integer.
*/
int i1 = (scaled[2] << 2);
unsigned i2 = scaled[3];
unsigned i3 = scaled[4];
unsigned i4 = scaled[5];
double f1 = i1, f2 = i2 * (0x1.0p-30), f3 = i3 * (0x1.0p-62), f4 = i4 * (0x1.0p-94);
/*
* Now f1+f2+f3+f4 is a representation, potentially to
* twice double precision, of 2^32 times ((2/pi)*x minus
* some integer). So our remaining job is to multiply
* back down by (pi/2)*2^-32, and convert back to one
* single-precision output number.
*/
double ftop = f1 + (f2 + (f3 + f4));
ftop = __SET_LO(ftop, 0);
double fbot = f4 - (((ftop-f1)-f2)-f3);
/* ... and multiply by a prec-and-a-half value of (pi/2)*2^-32. */
double ret = (ftop * 0x1.921fb54p-32) + (ftop * 0x1.10b4611a62633p-62 + fbot * 0x1.921fb54442d18p-32);
/* Just before we return, take the input sign into account. */
if (__HI(x) & 0x80000000) {
q = -q;
ret = -ret;
}
y[0] = ret;
return q;
}
}