Skip to content

Commit c47daab

Browse files
authored
10889 extended domain coverage of betaIncomplete and betaIncompleteCompl (#10909)
* 10889: betaIncomplete NaN handling Bring std.mathspecial.betaIncomplete's handling of NaN parameter values in line with how D's operators like operator!"+" handle them. I.e., the NaN with the largest payload is returned. * 10889: Added support for degenerate cases to betaIncomplete Support was added to betaIncomplete and betaIncompleteCompl for the cases when a or b is +0 or infinity. * 10889 betaIncomplete a=b, x=.5 special case * 10889: Convert betaIncomplete to use beta * 10889: handle reversal truncation issue in betaIncomplete When x > 1/b and x is larger than the mean, the complement is computed instead. x -> 1-x. If prior to reversal, x < real.epsilon, 1-x = 1, so x = 1 after reversal. Since algorthm requires x < 1, the computation of complement fails. To resolve this failure, when the reversal causes x -> 1, use x -> nextDown(1-x) instead. This fix resolved a problematic case for betaIncompleteInv that is tracked by a unittest. The unittest was updated. * 10889: Extracted logBeta into a function * 10889: factor constant out of function for later use * 10889: optimized logBeta Combine the Stirling approximations of the individual log gamma terms when one of them is large and the difference between the arguments is significant. This reduces the impact of subtracting the two larger terms. * 10889: betaDistPowerSeries handle a*s approx eq 1 In betaDistPowerSeries, when s*a is close to 1, extreme loss of precision can occur when combining terms in log space. This fix resolves this issue. * 10889: clamp betaDistPowerSeries The Beta CDF has a hard upper limit of 1. Due to round-off error, the result of betaDistPowerSeries can be very slightly larger than 1 when it should be 1. In this case, a value of 1 is returned. To prevent clamping from hiding a more serious bug, if the result would be greater than 1 + 2 epsilon, NaN is returned to indicated the computation failed.
1 parent c1d94e1 commit c47daab

File tree

2 files changed

+223
-24
lines changed

2 files changed

+223
-24
lines changed

std/internal/math/gammafunction.d

Lines changed: 173 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,13 @@ real gamma(real x)
422422
4.59084371199880305320475827592915200343410999829340L) >= real.mant_dig-1);
423423
}
424424

425+
426+
/* This is the lower bound on x for when the Stirling approximation can be used
427+
* to compute ln(Γ(x)).
428+
*/
429+
private enum real LN_GAMMA_STIRLING_LB = 13.0L;
430+
431+
425432
/*****************************************************
426433
* Natural logarithm of gamma function.
427434
*
@@ -480,7 +487,7 @@ real logGamma(real x)
480487
return z;
481488
}
482489

483-
if ( x < 13.0L )
490+
if ( x < LN_GAMMA_STIRLING_LB )
484491
{
485492
z = 1.0L;
486493
nx = floor( x + 0.5L );
@@ -878,6 +885,67 @@ real beta(in real x, in real y)
878885
}
879886

880887

888+
/* This is the natural logarithm of the absolute value of the beta function. It
889+
* tries to eliminate reduce the loss of precision that happens when subtracting
890+
* large numbers by combining the Stirling approximations of the individual
891+
* logGamma calls.
892+
*
893+
* ln|B(x,y)| = ln|Γ(x)| + ln|Γ(y)| - ln|Γ(x+y)|. Stirling's approximation for
894+
* ln|Γ(z)| is ln|Γ(z)| ~ zln(z) - z + ln(2𝜋/z)/2 + 𝚺ₙ₌₁ᴺB₂ₙ/[2n(2n-1)z²ⁿ⁻¹],
895+
* where Bₙ is the nᵗʰ Bernoulli number.
896+
* 𝚺ₙ₌₁ᴺB₂ₙ/[2n(2n-1)z²ⁿ⁻¹] = 𝚺ₙ₌₁ᴺB₂ₙ/[2n(2n-1)z²ⁿ⁻²]/z
897+
* = 𝚺ₙ₌₀ᴺ⁻¹B₂₍ₙ₊₁₎/[(2n+2)(2n+1)z²ⁿ]/z
898+
* = [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/z²)ⁿ]/z,
899+
* where Cₙ = B₂₍ₙ₊₁₎/[(2n+2)(2n+1)].
900+
* ln|Γ(z)| ~ zln(z) - z + ln(2𝜋/z)/2 +[𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/z²)ⁿ]/z.
901+
*/
902+
private real logBeta(real x, in real y)
903+
{
904+
const larger = x > y ? x : y;
905+
const smaller = x < y ? x : y;
906+
const sum = larger + smaller;
907+
908+
if (larger >= LN_GAMMA_STIRLING_LB && sum >= LN_GAMMA_STIRLING_LB && larger - smaller > 10.0L)
909+
{
910+
// Assume x > y
911+
// ln|Γ(x)| - ln|Γ(x+y)|
912+
// ~ x⋅ln(x) - (x+y)ln(x+y) + y + ln(2𝜋/x)/2 - ln(2𝜋/[x+y])/2
913+
// + [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/x²)ⁿ]/x - [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/{x+y}²)ⁿ]/{x+y}.
914+
// x⋅ln(x) - (x+y)ln(x+y) + y + ln(2𝜋/x)/2 - ln(2𝜋/[x+y])/2
915+
// = ln(xˣ) - ln([x+y]ˣ⁺ʸ) + y + ln(√[2𝜋/x]) - ln(√[2𝜋/{x+y}])
916+
// = ln(xˣ⁻¹ᐟ²/[x+y]ˣ⁺ʸ⁻¹ᐟ²) + y = ln([x/{x+y}]ˣ⁺ʸ⁻¹ᐟ²x⁻ʸ) + y
917+
// = y - y⋅ln(x) + (.5 - x - y)ln(1 + y/x)
918+
// ln|B(x,y)|
919+
// ~ ln|Γ(y)| + y - y⋅ln(x) + (.5 - x - y)ln(1 + y/x)
920+
// + [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/x²)ⁿ]/x - [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/{x+y}²)ⁿ]/{x+y}.
921+
const gamDiffApprox = smaller - smaller*log(larger) + (0.5L - sum)*log1p(smaller/larger);
922+
923+
const gamDiffCorr
924+
= poly(1.0L/larger^^2, logGammaStirlingCoeffs) / larger
925+
- poly(1.0L/sum^^2, logGammaStirlingCoeffs) / sum;
926+
927+
return logGamma(smaller) + gamDiffApprox + gamDiffCorr;
928+
}
929+
930+
return logGamma(smaller) + logGamma(larger) - logGamma(sum);
931+
}
932+
933+
@safe unittest
934+
{
935+
assert(isClose(logBeta(1, 1), log(beta(1, 1))));
936+
assert(isClose(logBeta(3, 2), logBeta(2, 3)));
937+
assert(isClose(exp(logBeta(20, 4)), beta(20, 4)));
938+
assert(isClose(logBeta(30, 40), log(beta(30, 40))));
939+
940+
// The following were generated by scipy's betaln function.
941+
assert(feqrel(logBeta(-1.4, -0.4), 1.133_156_234_422_692_6) > double.mant_dig-3);
942+
assert(feqrel(logBeta(-0.5, 1.0), 0.693_147_180_559_945_2) > double.mant_dig-3);
943+
assert(feqrel(logBeta(1.0, 2.0), -0.693_147_180_559_945_3) > double.mant_dig-3);
944+
assert(feqrel(logBeta(14.0, 3.0), -7.426_549_072_397_305) > double.mant_dig-3);
945+
assert(feqrel(logBeta(20.0, 30.0), -33.968_820_791_977_386) > double.mant_dig-3);
946+
}
947+
948+
881949
private {
882950
/*
883951
* These value can be calculated like this:
@@ -944,19 +1012,35 @@ enum real BETA_BIGINV = 1.084202172485504434007e-19L;
9441012
*/
9451013
real betaIncomplete(real aa, real bb, real xx )
9461014
{
947-
if ( !(aa>0 && bb>0) )
1015+
// If any parameters are NaN, return the NaN with the largest payload.
1016+
if (isNaN(aa) || isNaN(bb) || isNaN(xx))
9481017
{
949-
if ( isNaN(aa) ) return aa;
950-
if ( isNaN(bb) ) return bb;
951-
return real.nan; // domain error
1018+
// With cmp,
1019+
// -NaN(larger) < -NaN(smaller) < -inf < inf < NaN(smaller) < NaN(larger).
1020+
const largerParam = cmp(abs(aa), abs(bb)) >= 0 ? aa : bb;
1021+
return cmp(abs(xx), abs(largerParam)) >= 0 ? xx : largerParam;
9521022
}
953-
if (!(xx>0 && xx<1.0))
1023+
1024+
// domain errors
1025+
if (signbit(aa) == 1 || signbit(bb) == 1) return real.nan;
1026+
if (xx < 0.0L || xx > 1.0L) return real.nan;
1027+
1028+
// edge cases
1029+
if ( xx == 0.0L ) return 0.0L;
1030+
if ( xx == 1.0L ) return 1.0L;
1031+
1032+
// degenerate cases
1033+
if (aa is +0.0L || aa is real.infinity || bb is +0.0L || bb is real.infinity)
9541034
{
955-
if (isNaN(xx)) return xx;
956-
if ( xx == 0.0L ) return 0.0;
957-
if ( xx == 1.0L ) return 1.0;
958-
return real.nan; // domain error
1035+
if (aa is +0.0L && bb is +0.0L) return real.nan;
1036+
if (aa is real.infinity && bb is real.infinity) return real.nan;
1037+
if (aa is +0.0L || bb is real.infinity) return 1.0L;
1038+
if (aa is real.infinity || bb is +0.0L) return 0.0L;
9591039
}
1040+
1041+
// symmetry
1042+
if (aa == bb && xx == 0.5L) return 0.5L;
1043+
9601044
if ( (bb * xx) <= 1.0L && xx <= 0.95L)
9611045
{
9621046
return betaDistPowerSeries(aa, bb, xx);
@@ -976,6 +1060,7 @@ real betaIncomplete(real aa, real bb, real xx )
9761060
b = aa;
9771061
xc = xx;
9781062
x = 1.0L - xx;
1063+
if (x == 1.0L) x = nextDown(x);
9791064
}
9801065
else
9811066
{
@@ -1017,12 +1102,12 @@ real betaIncomplete(real aa, real bb, real xx )
10171102
t *= pow(x,a);
10181103
t /= a;
10191104
t *= w;
1020-
t *= gamma(a+b) / (gamma(a) * gamma(b));
1105+
t /= beta(a, b);
10211106
}
10221107
else
10231108
{
10241109
/* Resort to logarithms. */
1025-
y += t + logGamma(a+b) - logGamma(a) - logGamma(b);
1110+
y += t - logBeta(a, b);
10261111
y += log(w/a);
10271112

10281113
t = exp(y);
@@ -1329,11 +1414,31 @@ done:
13291414
assert(isIdentical(betaIncompleteInv(2,NaN(0xABC),8), NaN(0xABC)));
13301415
assert(isIdentical(betaIncompleteInv(2,3, NaN(0xABC)), NaN(0xABC)));
13311416

1332-
assert(isNaN(betaIncomplete(-1, 2, 3)));
1417+
assert(isNaN(betaIncomplete(-0., 1, .5)));
1418+
assert(isNaN(betaIncomplete(1, -0., .5)));
1419+
assert(isNaN(betaIncomplete(1, 1, -1)));
1420+
assert(isNaN(betaIncomplete(1, 1, 2)));
1421+
1422+
assert(betaIncomplete(+0., +0., 0) == 0);
1423+
assert(isNaN(betaIncomplete(+0., +0., .5)));
1424+
assert(betaIncomplete(+0., +0., 1) == 1);
1425+
assert(betaIncomplete(+0., 1, .5) == 1);
1426+
assert(betaIncomplete(1, +0., 0) == 0);
1427+
assert(betaIncomplete(1, +0., .5) == 0);
1428+
assert(betaIncomplete(1, real.infinity, .5) == 1);
1429+
assert(betaIncomplete(real.infinity, real.infinity, 0) == 0);
1430+
assert(isNaN(betaIncomplete(real.infinity, real.infinity, .5)));
13331431

13341432
assert(betaIncomplete(1, 2, 0)==0);
13351433
assert(betaIncomplete(1, 2, 1)==1);
1336-
assert(isNaN(betaIncomplete(1, 2, 3)));
1434+
assert(betaIncomplete(9.99999984824320730e+30, 9.99999984824320730e+30, 0.5) == 0.5L);
1435+
assert(betaIncomplete(1.17549435082228751e-38, 9.99999977819630836e+22, 9.99999968265522539e-22) == 1.0L);
1436+
assert(betaIncomplete(1.00000001954148138e-25, 1.00000001490116119e-01, 1.17549435082228751e-38) == 1.0L);
1437+
assert(isClose(betaIncomplete(9.99999983775159024e-18, 9.99999977819630836e+22, 1.00000001954148138e-25), 1.0L));
1438+
assert(isClose(
1439+
betaIncomplete(9.99999974737875164e-06, 9.99999998050644787e+18, 9.99999968265522539e-22),
1440+
0.9999596214389047L));
1441+
13371442
assert(betaIncompleteInv(1, 1, 0)==0);
13381443
assert(betaIncompleteInv(1, 1, 1)==1);
13391444

@@ -1364,23 +1469,38 @@ done:
13641469
assert(betaIncompleteInv(0.01L, 8e-48L, 9e-26L) == 1-real.epsilon);
13651470

13661471
// Beware: a one-bit change in pow() changes almost all digits in the result!
1472+
// scipy says that this is 0.99999_99995_89020_6 (0x1.ffff_fffc_783f_2a7ap-1)
1473+
// in double precision.
13671474
assert(feqrel(
13681475
betaIncompleteInv(0x1.b3d151fbba0eb18p+1L, 1.2265e-19L, 2.44859e-18L),
1369-
0x1.c0110c8531d0952cp-1L
1476+
0x1.ffff_fffc_783f_2a7ap-1
13701477
) > 10);
13711478
// This next case uncovered a one-bit difference in the FYL2X instruction
13721479
// between Intel and AMD processors. This difference gets magnified by 2^^38.
1373-
// WolframAlpha crashes attempting to calculate this.
1374-
assert(feqrel(betaIncompleteInv(0x1.ff1275ae5b939bcap-41L, 4.6713e18L, 0.0813601L),
1375-
0x1.f97749d90c7adba8p-63L) >= real.mant_dig - 39);
1480+
// WolframAlpha fails to calculate this.
1481+
// scipy says that this is 2.225073858507201e-308 in double precision,
1482+
// essentially double.min-normal.
1483+
assert(isClose(
1484+
betaIncompleteInv(0x1.ff1275ae5b939bcap-41L, 4.6713e18L, 0.0813601L),
1485+
2.225073858507201e-308,
1486+
0,
1487+
1e-40));
1488+
1489+
// scipy says that this is 8.068764506083944e-20 to double precision. Since this is a
1490+
// regression test where the original value isn't a known good value, I' updating the
1491+
// test value to the current generated value, which is closer to the scipy value.
13761492
real a1 = 3.40483L;
1377-
assert(betaIncompleteInv(a1, 4.0640301659679627772e19L, 0.545113L) == 0x1.ba8c08108aaf5d14p-109L);
1493+
assert(betaIncompleteInv(a1, 4.0640301659679627772e19L, 0.545113L) == 0x1.2a867b1e12b9bdf0p-64L);
1494+
13781495
real b1 = 2.82847e-25L;
13791496
assert(feqrel(betaIncompleteInv(0.01L, b1, 9e-26L), 0x1.549696104490aa9p-830L) >= real.mant_dig-10);
13801497

13811498
// --- Problematic cases ---
1382-
// This is a situation where the series expansion fails to converge
1383-
assert( isNaN(betaIncompleteInv(0.12167L, 4.0640301659679627772e19L, 0.0813601L)));
1499+
// In the past, this was a situation where the series expansion failed
1500+
// to converge.
1501+
assert(!isNaN(betaIncompleteInv(0.12167L, 4.0640301659679627772e19L, 0.0813601L)));
1502+
// Using scipy, the result should be 1.683301919972747e-29.
1503+
13841504
// This next result is almost certainly erroneous.
13851505
// Mathematica states: "(cannot be determined by current methods)"
13861506
assert(betaIncomplete(1.16251e20L, 2.18e39L, 5.45e-20L) == -real.infinity);
@@ -1592,19 +1712,48 @@ real betaDistPowerSeries(real a, real b, real x )
15921712
u = a * log(x);
15931713
if ( (a+b) < MAXGAMMA && fabs(u) < MAXLOG )
15941714
{
1595-
t = gamma(a+b)/(gamma(a)*gamma(b));
1596-
s = s * t * pow(x,a);
1715+
s = s * pow(x,a) / beta(a, b);
15971716
}
15981717
else
15991718
{
1600-
t = logGamma(a+b) - logGamma(a) - logGamma(b) + u + log(s);
1719+
if (abs(a*s - 1.0L) < 0.01L)
1720+
{
1721+
// Compute logGamma(a+b) - logGamma(b)
1722+
real lnGamma_apb_m_lnGamma_b;
1723+
1724+
if (b >= LN_GAMMA_STIRLING_LB)
1725+
{
1726+
const gamDiffApprox = a - a*log(b) + (0.5L - a - b)*log1p(a/b);
1727+
1728+
const gamDiffCorr
1729+
= poly(1.0L/b^^2, logGammaStirlingCoeffs) / b
1730+
- poly(1.0L/(a+b)^^2, logGammaStirlingCoeffs) / (a+b);
1731+
1732+
lnGamma_apb_m_lnGamma_b = -gamDiffApprox - gamDiffCorr;
1733+
}
1734+
else
1735+
{
1736+
lnGamma_apb_m_lnGamma_b = logGamma(a+b) - logGamma(b);
1737+
}
1738+
1739+
// Compute log(s) - logGamma(a)
1740+
const ln_s_m_lnGamma_a = log1p(a*s - 1.0L) - log(a) - logGamma(a);
1741+
1742+
t = lnGamma_apb_m_lnGamma_b + u + ln_s_m_lnGamma_a;
1743+
}
1744+
else
1745+
{
1746+
t = u + log(s) - logBeta(a, b);
1747+
}
16011748

16021749
if ( t < MINLOG )
16031750
{
16041751
s = 0.0L;
16051752
} else
16061753
s = exp(t);
16071754
}
1755+
1756+
if (s > 1.0L) return (s - 2*real.epsilon <= 1.0L) ? 1.0L : real.nan;
16081757
return s;
16091758
}
16101759

std/mathspecial.d

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,19 @@ real logmdigammaInverse(real x)
279279
*
280280
* Returns:
281281
* It returns $(SUB I, x)(a,b), an element of [0,1].
282+
*
283+
* $(TABLE_SV
284+
* $(TR $(TH a) $(TH b) $(TH x) $(TH betaIncomplete(a, b, x)) )
285+
* $(TR $(TD negative) $(TD b) $(TD x) $(TD $(NAN)) )
286+
* $(TR $(TD a) $(TD negative) $(TD x) $(TD $(NAN)) )
287+
* $(TR $(TD a) $(TD b) $(TD $(LT) 0) $(TD $(NAN)) )
288+
* $(TR $(TD a) $(TD b) $(TD $(GT) 1) $(TD $(NAN)) )
289+
* $(TR $(TD +0) $(TD +0) $(TD (0,1)) $(TD $(NAN)) )
290+
* $(TR $(TD $(INFIN)) $(TD $(INFIN)) $(TD (0,1)) $(TD $(NAN)) )
291+
* )
292+
*
293+
* If one or more of the input parameters are $(NAN), the one with the largest payload is returned.
294+
* For equal payloads but with possibly different signs, the order of preference is x, a, b.
282295
*
283296
* Note:
284297
* The integral is evaluated by a continued fraction expansion or, when `b * x` is small, by a
@@ -287,10 +300,33 @@ real logmdigammaInverse(real x)
287300
* See_Also: $(LREF beta) $(LREF betaIncompleteCompl)
288301
*/
289302
real betaIncomplete(real a, real b, real x )
303+
in
304+
{
305+
if (!isNaN(a) && !isNaN(b) && !isNaN(x))
306+
{
307+
assert(signbit(a) == 0, "a must be positive");
308+
assert(signbit(b) == 0, "b must be positive");
309+
assert(x >= 0 && x <= 1, "x must be in [0,1]");
310+
}
311+
}
312+
out(i; isNaN(i) || (i >=0 && i <= 1))
313+
do
290314
{
291315
return std.internal.math.gammafunction.betaIncomplete(a, b, x);
292316
}
293317

318+
///
319+
@safe unittest
320+
{
321+
assert(betaIncomplete(1, 1, .5) == .5);
322+
assert(betaIncomplete(+0., +0., 0) == 0);
323+
assert(isNaN(betaIncomplete(+0., +0., .5)));
324+
assert(isNaN(betaIncomplete(real.infinity, real.infinity, .5)));
325+
assert(betaIncomplete(real.infinity, real.infinity, 1) == 1);
326+
assert(betaIncomplete(NaN(0x1), 1, NaN(0x2)) is NaN(0x2));
327+
assert(betaIncomplete(1, NaN(0x3), -NaN(0x3)) is -NaN(0x3));
328+
}
329+
294330
/** Regularized incomplete beta function complement $(SUB I$(SUP C), x)(a,b)
295331
*
296332
* Mathematically, if a $(GT) 0, b $(GT) 0, and 0 $(LE) x $(LE) 1, then
@@ -307,6 +343,19 @@ real betaIncomplete(real a, real b, real x )
307343
*
308344
* Returns:
309345
* It returns $(SUB I$(SUP C), x)(a,b), an element of [0,1].
346+
*
347+
* $(TABLE_SV
348+
* $(TR $(TH a) $(TH b) $(TH x) $(TH betaIncompleteCompl(a, b, x)) )
349+
* $(TR $(TD negative) $(TD b) $(TD x) $(TD $(NAN)) )
350+
* $(TR $(TD a) $(TD negative) $(TD x) $(TD $(NAN)) )
351+
* $(TR $(TD a) $(TD b) $(TD $(LT) 0) $(TD $(NAN)) )
352+
* $(TR $(TD a) $(TD b) $(TD $(GT) 1) $(TD $(NAN)) )
353+
* $(TR $(TD +0) $(TD +0) $(TD (0,1)) $(TD $(NAN)) )
354+
* $(TR $(TD $(INFIN)) $(TD $(INFIN)) $(TD (0,1)) $(TD $(NAN)) )
355+
* )
356+
*
357+
* If one or more of the input parameters are $(NAN), the one with the largest payload is returned.
358+
* For equal payloads but with possibly different signs, the order of preference is x, a, b.
310359
*
311360
* See_Also: $(LREF beta) $(LREF betaIncomplete)
312361
*/
@@ -322,6 +371,7 @@ in
322371
assert(x >= 0 && x <= 1, "x must be in [0, 1]");
323372
}
324373
}
374+
out(i; isNaN(i) || (i >=0 && i <= 1))
325375
do
326376
{
327377
return std.internal.math.gammafunction.betaIncomplete(b, a, 1-x);

0 commit comments

Comments
 (0)