10889 extended domain coverage of betaIncomplete and betaIncompleteCompl (#10909)

tedgin · web-flow · commit c47daab62e17 · 2025-12-07T16:33:11.000+08:00
* 10889: betaIncomplete NaN handling

Bring std.mathspecial.betaIncomplete's handling of NaN parameter values
in line with how D's operators like operator!"+" handle them. I.e., the
NaN with the largest payload is returned.

* 10889: Added support for degenerate cases to betaIncomplete

Support was added to betaIncomplete and betaIncompleteCompl
for the cases when a or b is +0 or infinity.

* 10889 betaIncomplete a=b, x=.5 special case

* 10889: Convert betaIncomplete to use beta

* 10889: handle reversal truncation issue in betaIncomplete

When x &gt; 1/b and x is larger than the mean, the complement is computed
instead. x -&gt; 1-x. If prior to reversal, x &lt; real.epsilon, 1-x = 1, so
x = 1 after reversal. Since algorthm requires x &lt; 1, the computation of
complement fails. To resolve this failure, when the reversal causes
x -&gt; 1, use x -&gt; nextDown(1-x) instead.

This fix resolved a problematic case for betaIncompleteInv that is
tracked by a unittest. The unittest was updated.

* 10889: Extracted logBeta into a function

* 10889: factor constant out of function for later use

* 10889: optimized logBeta

Combine the Stirling approximations of the individual log gamma terms
when one of them is large and the difference between the arguments is
significant. This reduces the impact of subtracting the two larger
terms.

* 10889: betaDistPowerSeries handle a*s approx eq 1

In betaDistPowerSeries, when s*a is close to 1, extreme loss of
precision can occur when combining terms in log space. This fix resolves
this issue.

* 10889: clamp betaDistPowerSeries

The Beta CDF has a hard upper limit of 1. Due to round-off error, the
result of betaDistPowerSeries can be very slightly larger than 1 when
it should be 1. In this case, a value of 1 is returned. To prevent
clamping from hiding a more serious bug, if the result would be greater
than 1 + 2 epsilon, NaN is returned to indicated the computation failed.
diff --git a/std/internal/math/gammafunction.d b/std/internal/math/gammafunction.d
@@ -422,6 +422,13 @@ real gamma(real x)
         4.59084371199880305320475827592915200343410999829340L) >= real.mant_dig-1);
 }
 
+
+/* This is the lower bound on x for when the Stirling approximation can be used
+ * to compute ln(Γ(x)).
+ */
+private enum real LN_GAMMA_STIRLING_LB = 13.0L;
+
+
 /*****************************************************
  * Natural logarithm of gamma function.
  *
@@ -480,7 +487,7 @@ real logGamma(real x)
         return z;
     }
 
-    if ( x < 13.0L )
+    if ( x < LN_GAMMA_STIRLING_LB )
     {
         z = 1.0L;
         nx = floor( x +  0.5L );
@@ -878,6 +885,67 @@ real beta(in real x, in real y)
 }
 
 
+/* This is the natural logarithm of the absolute value of the beta function. It
+ * tries to eliminate reduce the loss of precision that happens when subtracting
+ * large numbers by combining the Stirling approximations of the individual
+ * logGamma calls.
+ *
+ * ln|B(x,y)| = ln|Γ(x)| + ln|Γ(y)| - ln|Γ(x+y)|. Stirling's approximation for
+ * ln|Γ(z)| is ln|Γ(z)| ~ zln(z) - z + ln(2𝜋/z)/2 + 𝚺ₙ₌₁ᴺB₂ₙ/[2n(2n-1)z²ⁿ⁻¹],
+ * where Bₙ is the nᵗʰ Bernoulli number.
+ * 𝚺ₙ₌₁ᴺB₂ₙ/[2n(2n-1)z²ⁿ⁻¹] = 𝚺ₙ₌₁ᴺB₂ₙ/[2n(2n-1)z²ⁿ⁻²]/z
+ *    = 𝚺ₙ₌₀ᴺ⁻¹B₂₍ₙ₊₁₎/[(2n+2)(2n+1)z²ⁿ]/z
+ *    = [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/z²)ⁿ]/z,
+ * where Cₙ = B₂₍ₙ₊₁₎/[(2n+2)(2n+1)].
+ * ln|Γ(z)| ~ zln(z) - z + ln(2𝜋/z)/2 +[𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/z²)ⁿ]/z.
+ */
+private real logBeta(real x, in real y)
+{
+    const larger = x > y ? x : y;
+    const smaller = x < y ? x : y;
+    const sum = larger + smaller;
+
+    if (larger >= LN_GAMMA_STIRLING_LB && sum >= LN_GAMMA_STIRLING_LB && larger - smaller > 10.0L)
+    {
+        // Assume x > y
+        // ln|Γ(x)| - ln|Γ(x+y)|
+        //    ~ x⋅ln(x) - (x+y)ln(x+y) + y + ln(2𝜋/x)/2 - ln(2𝜋/[x+y])/2
+        //      + [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/x²)ⁿ]/x - [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/{x+y}²)ⁿ]/{x+y}.
+        // x⋅ln(x) - (x+y)ln(x+y) + y + ln(2𝜋/x)/2 - ln(2𝜋/[x+y])/2
+        //    = ln(xˣ) - ln([x+y]ˣ⁺ʸ) + y + ln(√[2𝜋/x]) - ln(√[2𝜋/{x+y}])
+        //    = ln(xˣ⁻¹ᐟ²/[x+y]ˣ⁺ʸ⁻¹ᐟ²) + y = ln([x/{x+y}]ˣ⁺ʸ⁻¹ᐟ²x⁻ʸ) + y
+        //    = y - y⋅ln(x) + (.5 - x - y)ln(1 + y/x)
+        // ln|B(x,y)|
+        //    ~ ln|Γ(y)| + y - y⋅ln(x) + (.5 - x - y)ln(1 + y/x)
+        //      + [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/x²)ⁿ]/x - [𝚺ₙ₌₀ᴺ⁻¹Cₙ(1/{x+y}²)ⁿ]/{x+y}.
+        const gamDiffApprox = smaller - smaller*log(larger) + (0.5L - sum)*log1p(smaller/larger);
+
+        const gamDiffCorr
+            = poly(1.0L/larger^^2, logGammaStirlingCoeffs) / larger
+            - poly(1.0L/sum^^2, logGammaStirlingCoeffs) / sum;
+
+        return logGamma(smaller) + gamDiffApprox + gamDiffCorr;
+    }
+
+    return logGamma(smaller) + logGamma(larger) - logGamma(sum);
+}
+
+@safe unittest
+{
+    assert(isClose(logBeta(1, 1), log(beta(1, 1))));
+    assert(isClose(logBeta(3, 2), logBeta(2, 3)));
+    assert(isClose(exp(logBeta(20, 4)), beta(20, 4)));
+    assert(isClose(logBeta(30, 40), log(beta(30, 40))));
+
+    // The following were generated by scipy's betaln function.
+    assert(feqrel(logBeta(-1.4, -0.4), 1.133_156_234_422_692_6) > double.mant_dig-3);
+    assert(feqrel(logBeta(-0.5, 1.0), 0.693_147_180_559_945_2) > double.mant_dig-3);
+    assert(feqrel(logBeta(1.0, 2.0), -0.693_147_180_559_945_3) > double.mant_dig-3);
+    assert(feqrel(logBeta(14.0, 3.0), -7.426_549_072_397_305) > double.mant_dig-3);
+    assert(feqrel(logBeta(20.0, 30.0), -33.968_820_791_977_386) > double.mant_dig-3);
+}
+
+
 private {
 /*
  * These value can be calculated like this:
@@ -944,19 +1012,35 @@ enum real BETA_BIGINV = 1.084202172485504434007e-19L;
  */
 real betaIncomplete(real aa, real bb, real xx )
 {
-    if ( !(aa>0 && bb>0) )
+    // If any parameters are NaN, return the NaN with the largest payload.
+    if (isNaN(aa) || isNaN(bb) || isNaN(xx))
     {
-         if ( isNaN(aa) ) return aa;
-         if ( isNaN(bb) ) return bb;
-         return real.nan; // domain error
+        // With cmp,
+        // -NaN(larger) < -NaN(smaller) < -inf < inf < NaN(smaller) < NaN(larger).
+        const largerParam = cmp(abs(aa), abs(bb)) >= 0 ? aa : bb;
+        return cmp(abs(xx), abs(largerParam)) >= 0 ? xx : largerParam;
     }
-    if (!(xx>0 && xx<1.0))
+
+    // domain errors
+    if (signbit(aa) == 1 || signbit(bb) == 1) return real.nan;
+    if (xx < 0.0L || xx > 1.0L) return real.nan;
+
+    // edge cases
+    if ( xx == 0.0L ) return 0.0L;
+    if ( xx == 1.0L )  return 1.0L;
+
+    // degenerate cases
+    if (aa is +0.0L || aa is real.infinity || bb is +0.0L || bb is real.infinity)
     {
-        if (isNaN(xx)) return xx;
-        if ( xx == 0.0L ) return 0.0;
-        if ( xx == 1.0L )  return 1.0;
-        return real.nan; // domain error
+        if (aa is +0.0L && bb is +0.0L) return real.nan;
+        if (aa is real.infinity && bb is real.infinity) return real.nan;
+        if (aa is +0.0L || bb is real.infinity) return 1.0L;
+        if (aa is real.infinity || bb is +0.0L) return 0.0L;
     }
+
+    // symmetry
+    if (aa == bb && xx == 0.5L) return 0.5L;
+
     if ( (bb * xx) <= 1.0L && xx <= 0.95L)
     {
         return betaDistPowerSeries(aa, bb, xx);
@@ -976,6 +1060,7 @@ real betaIncomplete(real aa, real bb, real xx )
         b = aa;
         xc = xx;
         x = 1.0L - xx;
+        if (x == 1.0L) x = nextDown(x);
     }
     else
     {
@@ -1017,12 +1102,12 @@ real betaIncomplete(real aa, real bb, real xx )
         t *= pow(x,a);
         t /= a;
         t *= w;
-        t *= gamma(a+b) / (gamma(a) * gamma(b));
+        t /= beta(a, b);
     }
     else
     {
         /* Resort to logarithms.  */
-        y += t + logGamma(a+b) - logGamma(a) - logGamma(b);
+        y += t - logBeta(a, b);
         y += log(w/a);
 
         t = exp(y);
@@ -1329,11 +1414,31 @@ done:
     assert(isIdentical(betaIncompleteInv(2,NaN(0xABC),8), NaN(0xABC)));
     assert(isIdentical(betaIncompleteInv(2,3, NaN(0xABC)), NaN(0xABC)));
 
-    assert(isNaN(betaIncomplete(-1, 2, 3)));
+    assert(isNaN(betaIncomplete(-0., 1, .5)));
+    assert(isNaN(betaIncomplete(1, -0., .5)));
+    assert(isNaN(betaIncomplete(1, 1, -1)));
+    assert(isNaN(betaIncomplete(1, 1, 2)));
+
+    assert(betaIncomplete(+0., +0., 0) == 0);
+    assert(isNaN(betaIncomplete(+0., +0., .5)));
+    assert(betaIncomplete(+0., +0., 1) == 1);
+    assert(betaIncomplete(+0., 1, .5) == 1);
+    assert(betaIncomplete(1, +0., 0) == 0);
+    assert(betaIncomplete(1, +0., .5) == 0);
+    assert(betaIncomplete(1, real.infinity, .5) == 1);
+    assert(betaIncomplete(real.infinity, real.infinity, 0) == 0);
+    assert(isNaN(betaIncomplete(real.infinity, real.infinity, .5)));
 
     assert(betaIncomplete(1, 2, 0)==0);
     assert(betaIncomplete(1, 2, 1)==1);
-    assert(isNaN(betaIncomplete(1, 2, 3)));
+    assert(betaIncomplete(9.99999984824320730e+30, 9.99999984824320730e+30, 0.5) == 0.5L);
+    assert(betaIncomplete(1.17549435082228751e-38, 9.99999977819630836e+22, 9.99999968265522539e-22) == 1.0L);
+    assert(betaIncomplete(1.00000001954148138e-25, 1.00000001490116119e-01, 1.17549435082228751e-38) == 1.0L);
+    assert(isClose(betaIncomplete(9.99999983775159024e-18, 9.99999977819630836e+22, 1.00000001954148138e-25), 1.0L));
+    assert(isClose(
+        betaIncomplete(9.99999974737875164e-06, 9.99999998050644787e+18, 9.99999968265522539e-22),
+        0.9999596214389047L));
+
     assert(betaIncompleteInv(1, 1, 0)==0);
     assert(betaIncompleteInv(1, 1, 1)==1);
 
@@ -1364,23 +1469,38 @@ done:
         assert(betaIncompleteInv(0.01L, 8e-48L, 9e-26L) == 1-real.epsilon);
 
         // Beware: a one-bit change in pow() changes almost all digits in the result!
+        // scipy says that this is 0.99999_99995_89020_6 (0x1.ffff_fffc_783f_2a7ap-1)
+        // in double precision.
         assert(feqrel(
             betaIncompleteInv(0x1.b3d151fbba0eb18p+1L, 1.2265e-19L, 2.44859e-18L),
-            0x1.c0110c8531d0952cp-1L
+            0x1.ffff_fffc_783f_2a7ap-1
         ) > 10);
         // This next case uncovered a one-bit difference in the FYL2X instruction
         // between Intel and AMD processors. This difference gets magnified by 2^^38.
-        // WolframAlpha crashes attempting to calculate this.
-        assert(feqrel(betaIncompleteInv(0x1.ff1275ae5b939bcap-41L, 4.6713e18L, 0.0813601L),
-            0x1.f97749d90c7adba8p-63L) >= real.mant_dig - 39);
+        // WolframAlpha fails to calculate this.
+        // scipy says that this is 2.225073858507201e-308 in double precision,
+        // essentially double.min-normal.
+        assert(isClose(
+            betaIncompleteInv(0x1.ff1275ae5b939bcap-41L, 4.6713e18L, 0.0813601L),
+            2.225073858507201e-308,
+            0,
+            1e-40));
+
+        // scipy says that this is 8.068764506083944e-20 to double precision. Since this is a
+        // regression test where the original value isn't a known good value, I' updating the
+        // test value to the current generated value, which is closer to the scipy value.
         real a1 = 3.40483L;
-        assert(betaIncompleteInv(a1, 4.0640301659679627772e19L, 0.545113L) == 0x1.ba8c08108aaf5d14p-109L);
+        assert(betaIncompleteInv(a1, 4.0640301659679627772e19L, 0.545113L) == 0x1.2a867b1e12b9bdf0p-64L);
+
         real b1 = 2.82847e-25L;
         assert(feqrel(betaIncompleteInv(0.01L, b1, 9e-26L), 0x1.549696104490aa9p-830L) >= real.mant_dig-10);
 
         // --- Problematic cases ---
-        // This is a situation where the series expansion fails to converge
-        assert( isNaN(betaIncompleteInv(0.12167L, 4.0640301659679627772e19L, 0.0813601L)));
+        // In the past, this was a situation where the series expansion failed
+        // to converge.
+        assert(!isNaN(betaIncompleteInv(0.12167L, 4.0640301659679627772e19L, 0.0813601L)));
+        // Using scipy, the result should be 1.683301919972747e-29.
+
         // This next result is almost certainly erroneous.
         // Mathematica states: "(cannot be determined by current methods)"
         assert(betaIncomplete(1.16251e20L, 2.18e39L, 5.45e-20L) == -real.infinity);
@@ -1592,19 +1712,48 @@ real betaDistPowerSeries(real a, real b, real x )
     u = a * log(x);
     if ( (a+b) < MAXGAMMA && fabs(u) < MAXLOG )
     {
-        t = gamma(a+b)/(gamma(a)*gamma(b));
-        s = s * t * pow(x,a);
+        s = s * pow(x,a) / beta(a, b);
     }
     else
     {
-        t = logGamma(a+b) - logGamma(a) - logGamma(b) + u + log(s);
+        if (abs(a*s - 1.0L) < 0.01L)
+        {
+            // Compute logGamma(a+b) - logGamma(b)
+            real lnGamma_apb_m_lnGamma_b;
+
+            if (b >= LN_GAMMA_STIRLING_LB)
+            {
+                const gamDiffApprox = a - a*log(b) + (0.5L - a - b)*log1p(a/b);
+
+                const gamDiffCorr
+                    = poly(1.0L/b^^2, logGammaStirlingCoeffs) / b
+                    - poly(1.0L/(a+b)^^2, logGammaStirlingCoeffs) / (a+b);
+
+                lnGamma_apb_m_lnGamma_b = -gamDiffApprox - gamDiffCorr;
+            }
+            else
+            {
+                lnGamma_apb_m_lnGamma_b = logGamma(a+b) - logGamma(b);
+            }
+
+            // Compute log(s) - logGamma(a)
+            const ln_s_m_lnGamma_a = log1p(a*s - 1.0L) - log(a) - logGamma(a);
+
+            t = lnGamma_apb_m_lnGamma_b + u + ln_s_m_lnGamma_a;
+        }
+        else
+        {
+            t = u + log(s) - logBeta(a, b);
+        }
 
         if ( t < MINLOG )
         {
             s = 0.0L;
         } else
             s = exp(t);
     }
+
+    if (s > 1.0L) return (s - 2*real.epsilon <= 1.0L) ? 1.0L : real.nan;
     return s;
 }
 
diff --git a/std/mathspecial.d b/std/mathspecial.d
@@ -279,6 +279,19 @@ real logmdigammaInverse(real x)
  *
  * Returns:
  *   It returns $(SUB I, x)(a,b), an element of [0,1].
+ *
+  * $(TABLE_SV
+ *   $(TR $(TH a)         $(TH b)         $(TH x)        $(TH betaIncomplete(a, b, x)) )
+ *   $(TR $(TD negative)  $(TD b)         $(TD x)        $(TD $(NAN))                  )
+ *   $(TR $(TD a)         $(TD negative)  $(TD x)        $(TD $(NAN))                  )
+ *   $(TR $(TD a)         $(TD b)         $(TD $(LT) 0)  $(TD $(NAN))                  )
+ *   $(TR $(TD a)         $(TD b)         $(TD $(GT) 1)  $(TD $(NAN))                  )
+ *   $(TR $(TD +0)        $(TD +0)        $(TD (0,1))    $(TD $(NAN))                  )
+ *   $(TR $(TD $(INFIN))  $(TD $(INFIN))  $(TD (0,1))    $(TD $(NAN))                  )
+ * )
+ *
+ * If one or more of the input parameters are $(NAN), the one with the largest payload is returned.
+ * For equal payloads but with possibly different signs, the order of preference is x, a, b.
  *
  * Note:
  *   The integral is evaluated by a continued fraction expansion or, when `b * x` is small, by a
@@ -287,10 +300,33 @@ real logmdigammaInverse(real x)
  * See_Also: $(LREF beta) $(LREF betaIncompleteCompl)
  */
 real betaIncomplete(real a, real b, real x )
+in
+{
+    if (!isNaN(a) && !isNaN(b) && !isNaN(x))
+    {
+        assert(signbit(a) == 0, "a must be positive");
+        assert(signbit(b) == 0, "b must be positive");
+        assert(x >= 0 && x <= 1, "x must be in [0,1]");
+    }
+}
+out(i; isNaN(i) || (i >=0 && i <= 1))
+do
 {
     return std.internal.math.gammafunction.betaIncomplete(a, b, x);
 }
 
+///
+@safe unittest
+{
+    assert(betaIncomplete(1, 1, .5) == .5);
+    assert(betaIncomplete(+0., +0., 0) == 0);
+    assert(isNaN(betaIncomplete(+0., +0., .5)));
+    assert(isNaN(betaIncomplete(real.infinity, real.infinity, .5)));
+    assert(betaIncomplete(real.infinity, real.infinity, 1) == 1);
+    assert(betaIncomplete(NaN(0x1), 1, NaN(0x2)) is NaN(0x2));
+    assert(betaIncomplete(1, NaN(0x3), -NaN(0x3)) is -NaN(0x3));
+}
+
 /** Regularized incomplete beta function complement $(SUB I$(SUP C), x)(a,b)
  *
  * Mathematically, if a $(GT) 0, b $(GT) 0, and 0 $(LE) x $(LE) 1, then
@@ -307,6 +343,19 @@ real betaIncomplete(real a, real b, real x )
  *
  * Returns:
  *   It returns $(SUB I$(SUP C), x)(a,b), an element of [0,1].
+ *
+   * $(TABLE_SV
+ *   $(TR $(TH a)         $(TH b)         $(TH x)        $(TH betaIncompleteCompl(a, b, x)) )
+ *   $(TR $(TD negative)  $(TD b)         $(TD x)        $(TD $(NAN))                       )
+ *   $(TR $(TD a)         $(TD negative)  $(TD x)        $(TD $(NAN))                       )
+ *   $(TR $(TD a)         $(TD b)         $(TD $(LT) 0)  $(TD $(NAN))                       )
+ *   $(TR $(TD a)         $(TD b)         $(TD $(GT) 1)  $(TD $(NAN))                       )
+ *   $(TR $(TD +0)        $(TD +0)        $(TD (0,1))    $(TD $(NAN))                       )
+ *   $(TR $(TD $(INFIN))  $(TD $(INFIN))  $(TD (0,1))    $(TD $(NAN))                       )
+ * )
+ *
+ * If one or more of the input parameters are $(NAN), the one with the largest payload is returned.
+ * For equal payloads but with possibly different signs, the order of preference is x, a, b.
  *
  * See_Also: $(LREF beta) $(LREF betaIncomplete)
  */
@@ -322,6 +371,7 @@ in
         assert(x >= 0 && x <= 1, "x must be in [0, 1]");
     }
 }
+out(i; isNaN(i) || (i >=0 && i <= 1))
 do
 {
     return std.internal.math.gammafunction.betaIncomplete(b, a, 1-x);