CopyPastor

I will first present a single-precision implementation `lambert_wm1f()` for `float` mapped to IEEE-754 `binary32`.
As in the computation of the principal branch, functional iteration is not numerically stable near the branch point at *-1/e*. Therefore, a polynomial minimax approximation *p(t)* with *t=√(x+1/e)* is used which was generated with the Remez algorithm.
For functional iteration, a starting approximation with a maximum relative error of 1.488e-4 from the literature is used. Just one iteration with quadratic convergence is required to reach full single-precision accuracy. I determined experimentally that the Newton iteration works best near zero, while use of the Iacono-Boyd iteration is advantageous over the balance of the input domain.
When built against Intel's implementation of the standard C math library, the maximum error of the ISO C-99 implementation below is less than 2.5 ulp in an exhaustive test; a similar error bound should be achievable with any other high-quality implementation of the standard C math library. For an overview of the accuracy of commonly-used implementations see:
Brian Gladman, Vincenzo Innocente, John Mather, Paul Zimmermann, "Accuracy of Mathematical Functions in Single, Double, Double Extended, and Quadruple Precision." [HAL preprint hal-03141101v6](https://inria.hal.science/hal-03141101v6), Feb. 2024
```lang-c /* Compute the negative branch of the real-valued Lambert W function, W_(-1). Maximum error when built against the Intel MKL: 2.41426181 ulps @ -7.32652619e-2 */ float lambert_wm1f (float x) { const float c0 = 1.68090820e-1f; // 0x1.584000p-3 const float c1 = -2.96497345e-3f; // -0x1.84a000p-9 const float c2 = -2.87322998e-2f; // -0x1.d6c000p-6 const float c3 = 7.07275391e-1f; // 0x1.6a2000p-1 const float ln2 = 0.69314718f; const float l2e = 1.44269504f; const float em1_fact_0 = 0.625529587f; // exp(-1)_factor_0 const float em1_fact_1 = 0.588108778f; // exp(-1)_factor_1 float redx, r, s, t, w, e, num, den, rden;
if (isnan (x)) return x + x; if (x == 0.0f) return -INFINITY; if (x > 0.0f) return INFINITY / INFINITY; // NaN redx = fmaf (em1_fact_0, em1_fact_1, x); // x + exp(-1) if (redx <= 0.09765625f) { // expansion at -(exp(-1)) r = sqrtf (redx); w = -3.30250000e+2f; // -0x1.4a4000p+8 w = fmaf (w, r, 3.53563141e+2f); // 0x1.61902ap+8 w = fmaf (w, r, -1.91617889e+2f); // -0x1.7f3c5cp+7 w = fmaf (w, r, 4.94172478e+1f); // 0x1.8b5686p+5 w = fmaf (w, r, -1.23464909e+1f); // -0x1.8b1674p+3 w = fmaf (w, r, -1.38704872e+0f); // -0x1.6315a0p+0 w = fmaf (w, r, -1.99431837e+0f); // -0x1.fe8ba6p+0 w = fmaf (w, r, -1.81044364e+0f); // -0x1.cf793cp+0 w = fmaf (w, r, -2.33166337e+0f); // -0x1.2a73f2p+1 w = fmaf (w, r, -1.00000000e+0f); // -0x1.000000p+0 } else { /* Initial approximation based on: D. A. Barry, L. Li, and D.-S. Jeng, "Comments on 'Numerical Evaluation of the Lambert Function and Application to Generation of Generalized Gaussian Noise with Exponent 1/2'", IEEE Transactions on Signal Processing, Vol. 52, No. 5, May 2004, pp. 1456-1457 */ s = fmaf (log2f (-x), -ln2, -1.0f); t = sqrtf (s); w = -1.0f - s - (1.0f / fmaf (exp2f (c2 * t), c1 * t, fmaf (1.0f / t, c3, c0))); if (x > -0x1.0p-116f) { /* Newton iteration, for arguments near zero */ e = exp2f (fmaf (w, l2e, 32)); num = fmaf (w, e, -0x1.0p32 * x); den = fmaf (w, e, e); rden = 1.0f / den; w = fmaf (-num, rden, w); } else { /* Roberto Iacono and John Philip Boyd, "New approximations to the principal real-valued branch of the Lambert W function", Advances in Computational Mathematics, Vol. 43, No. 6, December 2017, pp. 1403-1436 */ t = w / (1.0f + w); w = fmaf (logf (x / w), t, t); } } return w; } ```
The double-precision implementation `lambert_wm1()` for `double` mapped to IEEE-754 `binary64` is structurally very similar to the single-precision implementation. A minimax polynomial approximation is used near the branch point, and the balance of the input domain is covered via three rounds of Newton iteration. A custom exponentiation function with built-in scaling by powers of two is used, as well as a custom logarithm function, thus largely isolating this code from differences in standard C math library implementations. It is not feasible to test the double-precision implementation exhaustively; in one billion random trials all errors found were < 2.6 ulps.
```lang-c double exp_scale_pos_normal (double a, int scale); double log_pos_normal (double a);
/* Compute the negative branch of the real-valued Lambert W function, W_(-1). Max. err. found across 1B random trials: 2.56233 ulps @ -0.3678692378632617 */ double lambert_wm1 (double z) { const double c0 = 1.6729676723480225e-1; // 0x1.569fbp-3; const double c1 = -2.7966443449258804e-3; // -0x1.6e8fdp-9; const double c2 = -2.1342277526855469e-2; // -0x1.5dac0p-6; const double c3 = 7.0781660079956055e-1; // 0x1.6a66fp-1; const double ln2 = 0.6931471805599453094172; const double exp2_64 = 1.8446744073709552e+19; // 0x1.0p64; const double em1_fact_0 = 0.57086272525975246; // 0x1.24481e7efdfcep-1 // exp(-1)_factor_0 const double em1_fact_1 = 0.64442715366299452; // 0x1.49f25b1b461b7p-1 // exp(-1)_factor_1 double redz, r, s, t, w, e, num, den, rden; int i; if (isnan (z)) return z + z; if (z == 0.0) return -INFINITY; if (z > 0.0) return INFINITY / INFINITY; // NaN redz = fma (em1_fact_0, em1_fact_1, z); // z + exp(-1) if (redz < 0.04296875) { // expansion at -(exp(-1)) r = sqrt (redz); w = -3.1102051749530146e+3; w = fma (w, r, 3.3514583413659661e+3); w = fma (w, r, -2.0376505203571792e+3); w = fma (w, r, 6.6470674321336662e+2); w = fma (w, r, -1.9891092047488328e+2); w = fma (w, r, 1.1792563777850908e+1); w = fma (w, r, -1.6044530625408662e+1); w = fma (w, r, -8.0468700837359766e+0); w = fma (w, r, -5.8822749101364442e+0); w = fma (w, r, -4.1741334842726321e+0); w = fma (w, r, -3.0669009628894104e+0); w = fma (w, r, -2.3535502058947735e+0); w = fma (w, r, -1.9366311293653595e+0); w = fma (w, r, -1.8121878855163436e+0); w = fma (w, r, -2.3316439815975385e+0); w = fma (w, r, -1.0000000000000000e+0); return w; } else { /* Initial approximation based on: D. A. Barry, L. Li, D.-S. Jeng, "Comments on 'Numerical Evaluation of the Lambert Function and Application to Generation of Generalized Gaussian Noise with Exponent 1/2'", IEEE Transactions on Signal Processing, Vol. 52, No. 5, May 2004, pp. 1456-1457 */ s = - fma (ln2, -64.0, log_pos_normal (-exp2_64 * z)) - 1.0; t = sqrt (s); w = -1.0 - s - (1.0 / fma (exp_scale_pos_normal (c2 * t, 0), c1 * t, fma (1.0 / t, c3, c0))); /* Newton iterations */ for (i = 0; i < 3; i++) { e = exp_scale_pos_normal (w, 64); num = fma (w, e, -exp2_64 * z); den = fma (w, e, e); rden = 1.0 / (den); w = fma (-num, rden, w); } } return w; }
int double2hiint (double a) { unsigned long long int t; memcpy (&t, &a, sizeof t); return (int)(t >> 32); }
int double2loint (double a) { unsigned long long int t; memcpy (&t, &a, sizeof t); return (int)(unsigned int)t; }
double hiloint2double (int hi, int lo) { double r; unsigned long long int t; t = ((unsigned long long int)(unsigned int)hi << 32) | (unsigned int)lo; memcpy (&r, &t, sizeof r); return r; }
/* exp(a) * 2**scale; pos. normal results only! Max. err. found: 0.89028 ulp */ double exp_scale_pos_normal (double a, int scale) { const double ln2_hi = 6.9314718055829871e-01; // 0x1.62e42fefa00000p-01 const double ln2_lo = 1.6465949582897082e-12; // 0x1.cf79abc9e3b3a0p-40 const double l2e = 1.4426950408889634; // 0x1.71547652b82fe0p+00 // log2(e) const double cvt = 6755399441055744.0; // 0x1.80000000000000p+52 // 3*2**51 double f, r; int i;
/* exp(a) = exp(i + f); i = rint (a / log(2)) */ r = fma (l2e, a, cvt); i = double2loint (r); r = r - cvt; f = fma (r, -ln2_hi, a); f = fma (r, -ln2_lo, f);
/* approximate r = exp(f) on interval [-log(2)/2,+log(2)/2] */ r = 2.5022018235176802e-8; // 0x1.ade0000000000p-26 r = fma (r, f, 2.7630903497145818e-7); // 0x1.28af3fcbbf09bp-22 r = fma (r, f, 2.7557514543490574e-6); // 0x1.71dee623774fap-19 r = fma (r, f, 2.4801491039409158e-5); // 0x1.a01997c8b50d7p-16 r = fma (r, f, 1.9841269589068419e-4); // 0x1.a01a01475db8cp-13 r = fma (r, f, 1.3888888945916566e-3); // 0x1.6c16c1852b805p-10 r = fma (r, f, 8.3333333334557735e-3); // 0x1.11111111224c7p-7 r = fma (r, f, 4.1666666666519782e-2); // 0x1.55555555502a5p-5 r = fma (r, f, 1.6666666666666477e-1); // 0x1.5555555555511p-3 r = fma (r, f, 5.0000000000000122e-1); // 0x1.000000000000bp-1 r = fma (r, f, 1.0000000000000000e+0); // 0x1.0000000000000p+0 r = fma (r, f, 1.0000000000000000e+0); // 0x1.0000000000000p+0
/* exp(a) = 2**(i+scale) * r */ r = hiloint2double (double2hiint (r) + ((i + scale) << 20), double2loint (r)); return r; }
/* compute natural logarithm of positive normals; max. err. found: 0.86902 ulp*/ double log_pos_normal (double a) { const double ln2_hi = 6.9314718055994529e-01; // 0x1.62e42fefa39efp-01 const double ln2_lo = 2.3190468138462996e-17; // 0x1.abc9e3b39803fp-56 double m, r, i, s, t, p, q; int e;
/* log(a) = log(m * 2**i) = i * log(2) + log(m) */ e = (double2hiint (a) - double2hiint (0.70703125)) & 0xfff00000; m = hiloint2double (double2hiint (a) - e, double2loint (a)); t = hiloint2double (0x41f00000, 0x80000000 ^ e); i = t - (hiloint2double (0x41f00000, 0x80000000));
/* m now in [181/256, 362/256]. Compute q = (m-1) / (m+1) */ p = m + 1.0; r = 1.0 / p; q = fma (m, r, -r); m = m - 1.0;
/* Compute (2*atanh(q)/q-2*q) as p(q**2), q in [-75/437, 53/309] */ s = q * q; r = 1.4794533702196025e-1; // 0x1.2efdf700d7135p-3 r = fma (r, s, 1.5314187748152339e-1); // 0x1.39a272db730f7p-3 r = fma (r, s, 1.8183559141306990e-1); // 0x1.746637f2f191bp-3 r = fma (r, s, 2.2222198669309609e-1); // 0x1.c71c522a64577p-3 r = fma (r, s, 2.8571428741489319e-1); // 0x1.24924941c9a2fp-2 r = fma (r, s, 3.9999999999418523e-1); // 0x1.999999998006cp-2 r = fma (r, s, 6.6666666666667340e-1); // 0x1.5555555555592p-1 r = r * s;
/* log(a) = 2*atanh(q) + i*log(2) = ln2_lo*i + p(q**2)*q + 2q + ln2_hi * i. Use K.C. Ng's trick to improve the accuracy of the computation, like so: p(q**2)*q + 2q = p(q**2)*q + q*t - t + m, where t = m**2/2. */ t = m * m * 0.5; r = fma (q, t, fma (q, r, ln2_lo * i)) - t + m; r = fma (ln2_hi, i, r);
return r; } ```

From the literature it is clear that the most common method of computing the Lambert W function over the real numbers is via functional iteration. The second-order Newton method is the simplest of these schemes:
wi+1 = wi - (wi exp(wi) - x) / (exp (wi) + wiexp(wi)).
Much of the literature prefers higher order methods, such as those by Halley, Fritsch, and Schroeder. During exploratory work I found that when performed in finite-precision floating-point arithmetic, the numerical properties of these higher-order iterations are not as favorable as the order may suggests. As a particular example, three Newton iterations consistently outperformed two Halley iterations in terms of accuracy. For this reason I settled on Newton iteration as the main building block, and used custom implementations of `exp()` that only need to delivers results that are representable as positive normals in IEEE-754 terminology to gain back some performance.
I further found that the Newton iteration will converge only slowly for large operands, in particular when the starting approximation is not very accurate. Since high iteration count is not conducive to good performance, I looked around for an alternative and found a superior candidate in the logarithm-based iteration scheme by Iacono and Boyd[*], which also has second order convergence:
wi+1 = (wi / (1 + wi)) * (1 + log (x / wi)
Many implementations of the Lambert W function appear to be using different starting approximations for different portions of the input domain. My preference was for a single starting approximation across the entire input domain with a view to future vectorized implementations.
Luckily Iacono and Boyd also provide a universal starting approximation that works across the entire input domain of W0, which, while not entirely living up to its promises, performs very well. I fine-tuned this for the single-precision implementation which deals with a much narrower input domain, using an optimizing search heuristic to achieve the best possible accuracy. I also employed custom implementations of `log()` that only have to deal with inputs that are positive normals.
Some care must be taken in both starting approximation and the Newton iteration to avoid overflow and underflow in intermediate computation. This is easily and cheaply accomplished by scaling by suitable powers of two.
While the resulting iteration schemes deliver accurate results in general, errors of many ulps occur for argument near zero and for arguments near *-1/e* ≈ -0.367879. I addressed the former issue by using the first few terms of the Taylor series expansion around zero: *x - x2 + (3/2)x3*. The fact that *W0 ≈ √(1+ex)-1* on [*-1/e, 0*] suggests the use of a minimax polynomial approximation *p(t)* with *t=√(x+1/e)* which turns out to work reasonably well near *-1/e*. I generated this approximation with the Remez algorithm.
The accuracy achieved for both IEEE-754 `binary32` mapped to `float`, and IEEE-754 `binary64` mapped to `double` is well within the specified error bound. Maximum error in the positive half-plane is less than 1.5 ulps, and maximum error in the negative half-plane is below 2.7 ulps. The single-precision code was tested exhaustively, while the double-precision code was tested with one billion random arguments.
---
[*] Roberto Iacono and John P. Boyd. "New approximations to the principal real-valued branch of the Lambert W-function." *Advances in Computational Mathematics*, Vol. 43, No. 6 , December 2017, pp. 1403-1436.
The single-precision implementation of the Lambert W0 function is as follows:
```lang-c float expf_scale_pos_normal (float a, int scale); float logf_pos_normal (float a);
/* Compute the principal branch of the Lambert W function, W_0. The maximum error in the positive half-plane is 1.49874 ulps and the maximum error in the negative half-plane is 2.56002 ulps */ float lambert_w0f (float z) { const float em1_fact_0 = 0.625529587f; // exp(-1)_factor_0 const float em1_fact_1 = 0.588108778f; // exp(-1)_factor_1 const float qe1 = 2.71828183f / 4.0f; // 0x1.5bf0a8p-01 // exp(1)/4 float e, w, num, den, rden, redz, y, r;
if (isnan (z) || (z == INFINITY) || (z == 0.0f)) return z + z; if (fabsf (z) < 1.220703125e-4f) return fmaf (-z, z, z); // 0x1.0p-13 redz = fmaf (em1_fact_0, em1_fact_1, z); // z + exp(-1) if (redz < 0.0625f) { // expansion at -(exp(-1)) r = sqrtf (redz); w = -1.23046875f; // -0x1.3b0000p+0 w = fmaf (w, r, 2.17185670f); // 0x1.15ff66p+1 w = fmaf (w, r, -2.19554094f); // -0x1.19077cp+1 w = fmaf (w, r, 1.92107077f); // 0x1.ebcb4cp+0 w = fmaf (w, r, -1.81141856f); // -0x1.cfb920p+0 w = fmaf (w, r, 2.33162979f); // 0x1.2a72d8p+1 w = fmaf (w, r, -1.00000000f); // -0x1.000000p+0 } else { /* Compute initial approximation. Based on: Roberto Iacono and John Philip Boyd, "New approximations to the principal real-valued branch of the Lambert W function", Advances in Computational Mathematics, Vol. 43, No. 6, December 2017, pp. 1403-1436 */ y = fmaf (2.0f, sqrtf (fmaf (qe1, z, 0.25f)), 1.0f); y = logf_pos_normal (fmaf (1.15262585f, y, -0.15262585f) / fmaf (0.45906518f, logf_pos_normal (y), 1.0f)); w = fmaf (2.0390625f, y, -1.0f);
/* perform Newton iterations to refine approximation to full accuracy */ for (int i = 0; i < 3; i++) { e = expf_scale_pos_normal (w, -3); // 0.125f * expf (w); num = fmaf (w, e, -0.125f * z); den = fmaf (w, e, e); rden = 1.0f / den; w = fmaf (-num, rden, w); } } return w; }
float uint32_as_float (uint32_t a) { float r; memcpy (&r, &a, sizeof r); return r; }
uint32_t float_as_uint32 (float a) { uint32_t r; memcpy (&r, &a, sizeof r); return r; }
/* exp(a) * 2**scale; positive normal results only! Maximum error 0.86565 ulp */ float expf_scale_pos_normal (float a, int scale) { const float flt_int_cvt = 12582912.0f; // 0x1.8p23 float f, r, j, t; uint32_t i;
/* exp(a) = 2**i * exp(f); i = rintf (a / log(2)) */ j = fmaf (1.442695f, a, flt_int_cvt); // // 0x1.715476p0 // log2(e) t = j - flt_int_cvt; f = fmaf (t, -6.93145752e-1f, a); // -0x1.62e400p-1 // log_2_hi f = fmaf (t, -1.42860677e-6f, f); // -0x1.7f7d1cp-20 // log_2_lo i = float_as_uint32 (j);
/* approximate r = exp(f) on interval [-log(2)/2, +log(2)/2] */ r = 1.37805939e-3f; // 0x1.694000p-10 r = fmaf (r, f, 8.37312452e-3f); // 0x1.125edcp-7 r = fmaf (r, f, 4.16695364e-2f); // 0x1.555b5ap-5 r = fmaf (r, f, 1.66664720e-1f); // 0x1.555450p-3 r = fmaf (r, f, 4.99999851e-1f); // 0x1.fffff6p-2 r = fmaf (r, f, 1.00000000e+0f); // 0x1.000000p+0 r = fmaf (r, f, 1.00000000e+0f); // 0x1.000000p+0
/* exp(a) = 2**(i+scale) * r; */ r = uint32_as_float (((i + scale) << 23) + float_as_uint32 (r)); return r; }
/* compute natural logarithm of positive normals; maximum error: 0.85089 ulp */ float logf_pos_normal (float a) { const float ln2 = 0.693147182f; // 0x1.62e430p-1 // log(2) const float two_to_m23 = 1.19209290e-7f; // 0x1.0p-23 float m, r, s, t, i, f; int32_t e;
/* log(a) = log(m * 2**i) = i * log(2) + log(m) */ e = (float_as_uint32 (a) - float_as_uint32 (0.666666667f)) & 0xff800000; m = uint32_as_float (float_as_uint32 (a) - e); i = (float)e * two_to_m23;
/* log(m) = log1p(f) */ f = m - 1.0f; s = f * f;
/* compute log1p(f) for f in [-1/3, 1/3] */ r = -0.130310059f; // -0x1.0ae000p-3 t = 0.140869141f; // 0x1.208000p-3 r = fmaf (r, s, -0.121483363f); // -0x1.f1988ap-4 t = fmaf (t, s, 0.139814854f); // 0x1.1e5740p-3 r = fmaf (r, s, -0.166846141f); // -0x1.55b36ep-3 t = fmaf (t, s, 0.200120345f); // 0x1.99d8b2p-3 r = fmaf (r, s, -0.249996200f); // -0x1.fffe02p-3 r = fmaf (t, f, r); r = fmaf (r, f, 0.333331972f); // 0x1.5554fap-2 r = fmaf (r, f, -0.500000000f); // -0x1.000000p-1 r = fmaf (r, s, f);
/* log(a) = i * log(2) + log(m) */ r = fmaf (i, ln2, r); return r; } ```
The double-precision implementation is structurally equivalent to the single-precision implementation, except that it makes use of the Iacono-Boyd iteration scheme:
```lang-c double exp_scale_pos_normal (double a, int scale); double log_pos_normal (double a);
/* Compute the principal branch of the Lambert W function, W_0. Maximum error: positive half-plane: 1.49210 ulp negative half-plane: 2.67824 ulp */ double lambert_w0 (double z) { const double em1_fact_0 = 0.57086272525975246; // 0x1.24481e7efdfcep-1 // exp(-1)_factor_0 const double em1_fact_1 = 0.64442715366299452; // 0x1.49f25b1b461b7p-1 // exp(-1)_factor_1 const double qe1 = 2.7182818284590452 * 0.25; // 0x1.5bf0a8b145769p-1 // exp(1)/4 double e, r, t, w, y, num, den, rden, redz; int i; if (isnan (z) || (z == INFINITY) || (z == 0.0)) return z + z; if (fabs (z) < 1.9073486328125e-6) return fma (fma (1.5, z, -1.) * z, z, z); redz = fma (em1_fact_0, em1_fact_1, z); // z + exp(-1) if (redz < 0.01025390625) { // expansion at -(exp(-1)) r = sqrt (redz); w = -7.8466654751155138; // -0x1.f62fc463917ffp+2 w = fma (w, r, 10.0241581340373877); // 0x1.40c5e74773ef5p+3 w = fma (w, r, -8.1029379749359691); // -0x1.034b44947bba0p+3 w = fma (w, r, 5.8322883145113726); // 0x1.75443634ead5fp+2 w = fma (w, r, -4.1738796362609882); // -0x1.0b20d80dcb9acp+2 w = fma (w, r, 3.0668053943936471); // 0x1.888d14440efd0p+1 w = fma (w, r, -2.3535499689514934); // -0x1.2d41201913016p+1 w = fma (w, r, 1.9366310979331112); // 0x1.efc70e3e0a0eap+0 w = fma (w, r, -1.8121878855270763); // -0x1.cfeb8b968bd2cp+0 w = fma (w, r, 2.3316439815968506); // 0x1.2a734f5b6fd56p+1 w = fma (w, r, -1.0000000000000000); // -0x1.0000000000000p+0 return w; } /* Roberto Iacono and John Philip Boyd, "New approximations to the principal real-valued branch of the Lambert W function", Advances in Computational Mathematics, Vol. 43, No. 6, December 2017, pp. 1403-1436 */ y = fma (2.0, sqrt (fma (qe1, z, 0.25)), 1.0); y = log_pos_normal (fma (1.14956131, y, -0.14956131) / fma (0.4549574, log_pos_normal (y), 1.0)); w = fma (2.036, y, -1.0);
/* Use iteration scheme w = (w / (1 + w)) * (1 + log (z / w) from Roberto Iacono and John Philip Boyd, "New approximations to the principal real-valued branch of the Lambert W function", Advances in Computational Mathematics, Vol. 43, No. 6, December 2017, pp. 1403-1436 */ for (i = 0; i < 3; i++) { t = w / (1.0 + w); w = fma (log_pos_normal (z / w), t, t); }
/* Fine tune approximation with a single Newton iteration */ e = exp_scale_pos_normal (w, -3); // 0.125 * exp (w) num = fma (w, e, -0.125 *z); den = fma (w, e, e); rden = 1.0 / den; w = fma (-num, rden, w);
return w; }
int double2hiint (double a) { unsigned long long int t; memcpy (&t, &a, sizeof t); return (int)(t >> 32); }
int double2loint (double a) { unsigned long long int t; memcpy (&t, &a, sizeof t); return (int)(unsigned int)t; }
double hiloint2double (int hi, int lo) { double r; unsigned long long int t; t = ((unsigned long long int)(unsigned int)hi << 32) | (unsigned int)lo; memcpy (&r, &t, sizeof r); return r; }
/* exp(a) * 2**scale; pos. normal results only! Max. err. found: 0.89028 ulp */ double exp_scale_pos_normal (double a, int scale) { const double ln2_hi = 6.9314718055829871e-01; // 0x1.62e42fefa00000p-01 const double ln2_lo = 1.6465949582897082e-12; // 0x1.cf79abc9e3b3a0p-40 const double l2e = 1.4426950408889634; // 0x1.71547652b82fe0p+00 // log2(e) const double cvt = 6755399441055744.0; // 0x1.80000000000000p+52 // 3*2**51 double f, r; int i;
/* exp(a) = exp(i + f); i = rint (a / log(2)) */ r = fma (l2e, a, cvt); i = double2loint (r); r = r - cvt; f = fma (r, -ln2_hi, a); f = fma (r, -ln2_lo, f);
/* approximate r = exp(f) on interval [-log(2)/2,+log(2)/2] */ r = 2.5022018235176802e-8; // 0x1.ade0000000000p-26 r = fma (r, f, 2.7630903497145818e-7); // 0x1.28af3fcbbf09bp-22 r = fma (r, f, 2.7557514543490574e-6); // 0x1.71dee623774fap-19 r = fma (r, f, 2.4801491039409158e-5); // 0x1.a01997c8b50d7p-16 r = fma (r, f, 1.9841269589068419e-4); // 0x1.a01a01475db8cp-13 r = fma (r, f, 1.3888888945916566e-3); // 0x1.6c16c1852b805p-10 r = fma (r, f, 8.3333333334557735e-3); // 0x1.11111111224c7p-7 r = fma (r, f, 4.1666666666519782e-2); // 0x1.55555555502a5p-5 r = fma (r, f, 1.6666666666666477e-1); // 0x1.5555555555511p-3 r = fma (r, f, 5.0000000000000122e-1); // 0x1.000000000000bp-1 r = fma (r, f, 1.0000000000000000e+0); // 0x1.0000000000000p+0 r = fma (r, f, 1.0000000000000000e+0); // 0x1.0000000000000p+0
/* exp(a) = 2**(i+scale) * r */ r = hiloint2double (double2hiint (r) + ((i + scale) << 20), double2loint (r)); return r; }
/* compute natural logarithm of positive normals; max. err. found: 0.86902 ulp*/ double log_pos_normal (double a) { const double ln2_hi = 6.9314718055994529e-01; // 0x1.62e42fefa39efp-01 const double ln2_lo = 2.3190468138462996e-17; // 0x1.abc9e3b39803fp-56 double m, r, i, s, t, p, q; int e;
/* log(a) = log(m * 2**i) = i * log(2) + log(m) */ e = (double2hiint (a) - double2hiint (0.70703125)) & 0xfff00000; m = hiloint2double (double2hiint (a) - e, double2loint (a)); t = hiloint2double (0x41f00000, 0x80000000 ^ e); i = t - (hiloint2double (0x41f00000, 0x80000000));
/* m now in [181/256, 362/256]. Compute q = (m-1) / (m+1) */ p = m + 1.0; r = 1.0 / p; q = fma (m, r, -r); m = m - 1.0;
/* compute (2*atanh(q)/q-2*q) as p(q**2), q in [-75/437, 53/309] */ s = q * q; r = 1.4794533702196025e-1; // 0x1.2efdf700d7135p-3 r = fma (r, s, 1.5314187748152339e-1); // 0x1.39a272db730f7p-3 r = fma (r, s, 1.8183559141306990e-1); // 0x1.746637f2f191bp-3 r = fma (r, s, 2.2222198669309609e-1); // 0x1.c71c522a64577p-3 r = fma (r, s, 2.8571428741489319e-1); // 0x1.24924941c9a2fp-2 r = fma (r, s, 3.9999999999418523e-1); // 0x1.999999998006cp-2 r = fma (r, s, 6.6666666666667340e-1); // 0x1.5555555555592p-1 r = r * s;
/* log(a) = 2*atanh(q) + i*log(2) = ln2_lo*i + p(q**2)*q + 2q + ln2_hi * i. Use K.C. Ng's trick to improve the accuracy of the computation, like so: p(q**2)*q + 2q = p(q**2)*q + q*t - t + m, where t = m**2/2. */ t = m * m * 0.5; r = fma (q, t, fma (q, r, ln2_lo * i)) - t + m; r = fma (ln2_hi, i, r);
return r; } ```

CopyPastor

Possible Plagiarism

Original Post