001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.statistics.distribution; 019 020import org.apache.commons.numbers.gamma.LogBeta; 021import org.apache.commons.numbers.gamma.RegularizedBeta; 022 023/** 024 * Implementation of the F-distribution. 025 * 026 * <p>The probability density function of \( X \) is: 027 * 028 * <p>\[ \begin{aligned} 029 * f(x; n, m) &= \frac{1}{\operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \left(\frac{n}{m}\right)^{n/2} x^{n/2 - 1} \left(1+\frac{n}{m} \, x \right)^{-(n+m)/2} \\ 030 * &= \frac{n^{\frac n 2} m^{\frac m 2} x^{\frac{n}{2}-1} }{ (nx+m)^{\frac{(n+m)}{2}} \operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \end{aligned} \] 031 * 032 * <p>for \( n, m > 0 \) the degrees of freedom, \( \operatorname{B}(a, b) \) is the beta function, 033 * and \( x \in [0, \infty) \). 034 * 035 * @see <a href="https://en.wikipedia.org/wiki/F-distribution">F-distribution (Wikipedia)</a> 036 * @see <a href="https://mathworld.wolfram.com/F-Distribution.html">F-distribution (MathWorld)</a> 037 */ 038public final class FDistribution extends AbstractContinuousDistribution { 039 /** Support lower bound. */ 040 private static final double SUPPORT_LO = 0; 041 /** Support upper bound. */ 042 private static final double SUPPORT_HI = Double.POSITIVE_INFINITY; 043 /** The minimum degrees of freedom for the denominator when computing the mean. */ 044 private static final double MIN_DENOMINATOR_DF_FOR_MEAN = 2.0; 045 /** The minimum degrees of freedom for the denominator when computing the variance. */ 046 private static final double MIN_DENOMINATOR_DF_FOR_VARIANCE = 4.0; 047 048 /** The numerator degrees of freedom. */ 049 private final double numeratorDegreesOfFreedom; 050 /** The denominator degrees of freedom. */ 051 private final double denominatorDegreesOfFreedom; 052 /** n/2 * log(n) + m/2 * log(m) with n = numerator DF and m = denominator DF. */ 053 private final double nHalfLogNmHalfLogM; 054 /** LogBeta(n/2, n/2) with n = numerator DF. */ 055 private final double logBetaNhalfMhalf; 056 /** Cached value for inverse probability function. */ 057 private final double mean; 058 /** Cached value for inverse probability function. */ 059 private final double variance; 060 061 /** 062 * @param numeratorDegreesOfFreedom Numerator degrees of freedom. 063 * @param denominatorDegreesOfFreedom Denominator degrees of freedom. 064 */ 065 private FDistribution(double numeratorDegreesOfFreedom, 066 double denominatorDegreesOfFreedom) { 067 this.numeratorDegreesOfFreedom = numeratorDegreesOfFreedom; 068 this.denominatorDegreesOfFreedom = denominatorDegreesOfFreedom; 069 final double nhalf = numeratorDegreesOfFreedom / 2; 070 final double mhalf = denominatorDegreesOfFreedom / 2; 071 nHalfLogNmHalfLogM = nhalf * Math.log(numeratorDegreesOfFreedom) + 072 mhalf * Math.log(denominatorDegreesOfFreedom); 073 logBetaNhalfMhalf = LogBeta.value(nhalf, mhalf); 074 075 if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_MEAN) { 076 mean = denominatorDegreesOfFreedom / (denominatorDegreesOfFreedom - 2); 077 } else { 078 mean = Double.NaN; 079 } 080 if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_VARIANCE) { 081 final double denomDFMinusTwo = denominatorDegreesOfFreedom - 2; 082 variance = (2 * (denominatorDegreesOfFreedom * denominatorDegreesOfFreedom) * 083 (numeratorDegreesOfFreedom + denominatorDegreesOfFreedom - 2)) / 084 (numeratorDegreesOfFreedom * (denomDFMinusTwo * denomDFMinusTwo) * 085 (denominatorDegreesOfFreedom - 4)); 086 } else { 087 variance = Double.NaN; 088 } 089 } 090 091 /** 092 * Creates an F-distribution. 093 * 094 * @param numeratorDegreesOfFreedom Numerator degrees of freedom. 095 * @param denominatorDegreesOfFreedom Denominator degrees of freedom. 096 * @return the distribution 097 * @throws IllegalArgumentException if {@code numeratorDegreesOfFreedom <= 0} or 098 * {@code denominatorDegreesOfFreedom <= 0}. 099 */ 100 public static FDistribution of(double numeratorDegreesOfFreedom, 101 double denominatorDegreesOfFreedom) { 102 if (numeratorDegreesOfFreedom <= 0) { 103 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, 104 numeratorDegreesOfFreedom); 105 } 106 if (denominatorDegreesOfFreedom <= 0) { 107 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, 108 denominatorDegreesOfFreedom); 109 } 110 return new FDistribution(numeratorDegreesOfFreedom, denominatorDegreesOfFreedom); 111 } 112 113 /** 114 * Gets the numerator degrees of freedom parameter of this distribution. 115 * 116 * @return the numerator degrees of freedom. 117 */ 118 public double getNumeratorDegreesOfFreedom() { 119 return numeratorDegreesOfFreedom; 120 } 121 122 /** 123 * Gets the denominator degrees of freedom parameter of this distribution. 124 * 125 * @return the denominator degrees of freedom. 126 */ 127 public double getDenominatorDegreesOfFreedom() { 128 return denominatorDegreesOfFreedom; 129 } 130 131 /** {@inheritDoc} 132 * 133 * <p>Returns the limit when {@code x = 0}: 134 * <ul> 135 * <li>{@code df1 < 2}: Infinity 136 * <li>{@code df1 == 2}: 1 137 * <li>{@code df1 > 2}: 0 138 * </ul> 139 * <p>Where {@code df1} is the numerator degrees of freedom. 140 */ 141 @Override 142 public double density(double x) { 143 if (x <= SUPPORT_LO || 144 x >= SUPPORT_HI) { 145 // Special case x=0: 146 // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor 147 if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) { 148 return numeratorDegreesOfFreedom == 2 ? 149 1 : 150 Double.POSITIVE_INFINITY; 151 } 152 return 0; 153 } 154 return computeDensity(x, false); 155 } 156 157 /** {@inheritDoc} 158 * 159 * <p>Returns the limit when {@code x = 0}: 160 * <ul> 161 * <li>{@code df1 < 2}: Infinity 162 * <li>{@code df1 == 2}: 0 163 * <li>{@code df1 > 2}: -Infinity 164 * </ul> 165 * <p>Where {@code df1} is the numerator degrees of freedom. 166 */ 167 @Override 168 public double logDensity(double x) { 169 if (x <= SUPPORT_LO || 170 x >= SUPPORT_HI) { 171 // Special case x=0: 172 // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor 173 if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) { 174 return numeratorDegreesOfFreedom == 2 ? 175 0 : 176 Double.POSITIVE_INFINITY; 177 } 178 return Double.NEGATIVE_INFINITY; 179 } 180 return computeDensity(x, true); 181 } 182 183 /** 184 * Compute the density at point x. Assumes x is within the support bound. 185 * 186 * @param x Value 187 * @param log true to compute the log density 188 * @return pdf(x) or logpdf(x) 189 */ 190 private double computeDensity(double x, boolean log) { 191 // The log computation may suffer cancellation effects due to summation of large 192 // opposing terms. Use it when the standard PDF result is not normal. 193 194 // Keep the z argument to the regularized beta well away from 1 to avoid rounding error. 195 // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html 196 197 final double n = numeratorDegreesOfFreedom; 198 final double m = denominatorDegreesOfFreedom; 199 final double nx = n * x; 200 final double z = m + nx; 201 final double y = n * m / (z * z); 202 double p; 203 if (nx > m) { 204 p = y * RegularizedBeta.derivative(m / z, 205 m / 2, n / 2); 206 } else { 207 p = y * RegularizedBeta.derivative(nx / z, 208 n / 2, m / 2); 209 } 210 // RegularizedBeta.derivative can have intermediate overflow before normalisation 211 // with small y. Check the result for a normal finite number. 212 if (p <= Double.MAX_VALUE && p >= Double.MIN_NORMAL) { 213 return log ? Math.log(p) : p; 214 } 215 // Fall back to the log computation 216 p = nHalfLogNmHalfLogM + (n / 2 - 1) * Math.log(x) - logBetaNhalfMhalf - 217 ((n + m) / 2) * Math.log(z); 218 return log ? p : Math.exp(p); 219 } 220 221 /** {@inheritDoc} */ 222 @Override 223 public double cumulativeProbability(double x) { 224 if (x <= SUPPORT_LO) { 225 return 0; 226 } else if (x >= SUPPORT_HI) { 227 return 1; 228 } 229 230 final double n = numeratorDegreesOfFreedom; 231 final double m = denominatorDegreesOfFreedom; 232 233 // Keep the z argument to the regularized beta well away from 1 to avoid rounding error. 234 // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html 235 // Note the logic in the Boost documentation for pdf and cdf is contradictory. 236 // This order will keep the argument far from 1. 237 238 final double nx = n * x; 239 if (nx > m) { 240 return RegularizedBeta.complement(m / (m + nx), 241 m / 2, n / 2); 242 } 243 return RegularizedBeta.value(nx / (m + nx), 244 n / 2, m / 2); 245 } 246 247 /** {@inheritDoc} */ 248 @Override 249 public double survivalProbability(double x) { 250 if (x <= SUPPORT_LO) { 251 return 1; 252 } else if (x >= SUPPORT_HI) { 253 return 0; 254 } 255 256 final double n = numeratorDegreesOfFreedom; 257 final double m = denominatorDegreesOfFreedom; 258 259 // Do the opposite of the CDF 260 261 final double nx = n * x; 262 if (nx > m) { 263 return RegularizedBeta.value(m / (m + nx), 264 m / 2, n / 2); 265 } 266 return RegularizedBeta.complement(nx / (m + nx), 267 n / 2, m / 2); 268 } 269 270 /** 271 * {@inheritDoc} 272 * 273 * <p>For denominator degrees of freedom parameter \( m \), the mean is: 274 * 275 * <p>\[ \mathbb{E}[X] = \begin{cases} 276 * \frac{m}{m-2} & \text{for } m \gt 2 \\ 277 * \text{undefined} & \text{otherwise} 278 * \end{cases} \] 279 * 280 * @return the mean, or {@link Double#NaN NaN} if it is not defined. 281 */ 282 @Override 283 public double getMean() { 284 return mean; 285 } 286 287 /** 288 * {@inheritDoc} 289 * 290 * <p>For numerator degrees of freedom parameter \( n \) and denominator 291 * degrees of freedom parameter \( m \), the variance is: 292 * 293 * <p>\[ \operatorname{var}[X] = \begin{cases} 294 * \frac{2m^2 (n+m-2)}{n (m-2)^2 (m-4)} & \text{for } m \gt 4 \\ 295 * \text{undefined} & \text{otherwise} 296 * \end{cases} \] 297 * 298 * @return the variance, or {@link Double#NaN NaN} if it is not defined. 299 */ 300 @Override 301 public double getVariance() { 302 return variance; 303 } 304 305 /** 306 * {@inheritDoc} 307 * 308 * <p>The lower bound of the support is always 0. 309 * 310 * @return 0. 311 */ 312 @Override 313 public double getSupportLowerBound() { 314 return SUPPORT_LO; 315 } 316 317 /** 318 * {@inheritDoc} 319 * 320 * <p>The upper bound of the support is always positive infinity. 321 * 322 * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}. 323 */ 324 @Override 325 public double getSupportUpperBound() { 326 return SUPPORT_HI; 327 } 328}