001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.statistics.distribution;
019
020import org.apache.commons.numbers.gamma.LogBeta;
021import org.apache.commons.numbers.gamma.RegularizedBeta;
022
023/**
024 * Implementation of the F-distribution.
025 *
026 * <p>The probability density function of \( X \) is:
027 *
028 * <p>\[ \begin{aligned}
029 *       f(x; n, m) &amp;= \frac{1}{\operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)} \left(\frac{n}{m}\right)^{n/2} x^{n/2 - 1} \left(1+\frac{n}{m} \, x \right)^{-(n+m)/2} \\
030 *                  &amp;= \frac{n^{\frac n 2} m^{\frac m 2} x^{\frac{n}{2}-1} }{ (nx+m)^{\frac{(n+m)}{2}} \operatorname{B}\left(\frac{n}{2},\frac{m}{2}\right)}   \end{aligned} \]
031 *
032 * <p>for \( n, m &gt; 0 \) the degrees of freedom, \( \operatorname{B}(a, b) \) is the beta function,
033 * and \( x \in [0, \infty) \).
034 *
035 * @see <a href="https://en.wikipedia.org/wiki/F-distribution">F-distribution (Wikipedia)</a>
036 * @see <a href="https://mathworld.wolfram.com/F-Distribution.html">F-distribution (MathWorld)</a>
037 */
038public final class FDistribution extends AbstractContinuousDistribution {
039    /** Support lower bound. */
040    private static final double SUPPORT_LO = 0;
041    /** Support upper bound. */
042    private static final double SUPPORT_HI = Double.POSITIVE_INFINITY;
043    /** The minimum degrees of freedom for the denominator when computing the mean. */
044    private static final double MIN_DENOMINATOR_DF_FOR_MEAN = 2.0;
045    /** The minimum degrees of freedom for the denominator when computing the variance. */
046    private static final double MIN_DENOMINATOR_DF_FOR_VARIANCE = 4.0;
047
048    /** The numerator degrees of freedom. */
049    private final double numeratorDegreesOfFreedom;
050    /** The denominator degrees of freedom. */
051    private final double denominatorDegreesOfFreedom;
052    /** n/2 * log(n) + m/2 * log(m) with n = numerator DF and m = denominator DF. */
053    private final double nHalfLogNmHalfLogM;
054    /** LogBeta(n/2, n/2) with n = numerator DF. */
055    private final double logBetaNhalfMhalf;
056    /** Cached value for inverse probability function. */
057    private final double mean;
058    /** Cached value for inverse probability function. */
059    private final double variance;
060
061    /**
062     * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
063     * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
064     */
065    private FDistribution(double numeratorDegreesOfFreedom,
066                          double denominatorDegreesOfFreedom) {
067        this.numeratorDegreesOfFreedom = numeratorDegreesOfFreedom;
068        this.denominatorDegreesOfFreedom = denominatorDegreesOfFreedom;
069        final double nhalf = numeratorDegreesOfFreedom / 2;
070        final double mhalf = denominatorDegreesOfFreedom / 2;
071        nHalfLogNmHalfLogM = nhalf * Math.log(numeratorDegreesOfFreedom) +
072                             mhalf * Math.log(denominatorDegreesOfFreedom);
073        logBetaNhalfMhalf = LogBeta.value(nhalf, mhalf);
074
075        if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_MEAN) {
076            mean = denominatorDegreesOfFreedom / (denominatorDegreesOfFreedom - 2);
077        } else {
078            mean = Double.NaN;
079        }
080        if (denominatorDegreesOfFreedom > MIN_DENOMINATOR_DF_FOR_VARIANCE) {
081            final double denomDFMinusTwo = denominatorDegreesOfFreedom - 2;
082            variance = (2 * (denominatorDegreesOfFreedom * denominatorDegreesOfFreedom) *
083                            (numeratorDegreesOfFreedom + denominatorDegreesOfFreedom - 2)) /
084                       (numeratorDegreesOfFreedom * (denomDFMinusTwo * denomDFMinusTwo) *
085                            (denominatorDegreesOfFreedom - 4));
086        } else {
087            variance = Double.NaN;
088        }
089    }
090
091    /**
092     * Creates an F-distribution.
093     *
094     * @param numeratorDegreesOfFreedom Numerator degrees of freedom.
095     * @param denominatorDegreesOfFreedom Denominator degrees of freedom.
096     * @return the distribution
097     * @throws IllegalArgumentException if {@code numeratorDegreesOfFreedom <= 0} or
098     * {@code denominatorDegreesOfFreedom <= 0}.
099     */
100    public static FDistribution of(double numeratorDegreesOfFreedom,
101                                   double denominatorDegreesOfFreedom) {
102        if (numeratorDegreesOfFreedom <= 0) {
103            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
104                                            numeratorDegreesOfFreedom);
105        }
106        if (denominatorDegreesOfFreedom <= 0) {
107            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
108                                            denominatorDegreesOfFreedom);
109        }
110        return new FDistribution(numeratorDegreesOfFreedom, denominatorDegreesOfFreedom);
111    }
112
113    /**
114     * Gets the numerator degrees of freedom parameter of this distribution.
115     *
116     * @return the numerator degrees of freedom.
117     */
118    public double getNumeratorDegreesOfFreedom() {
119        return numeratorDegreesOfFreedom;
120    }
121
122    /**
123     * Gets the denominator degrees of freedom parameter of this distribution.
124     *
125     * @return the denominator degrees of freedom.
126     */
127    public double getDenominatorDegreesOfFreedom() {
128        return denominatorDegreesOfFreedom;
129    }
130
131    /** {@inheritDoc}
132     *
133     * <p>Returns the limit when {@code x = 0}:
134     * <ul>
135     * <li>{@code df1 < 2}: Infinity
136     * <li>{@code df1 == 2}: 1
137     * <li>{@code df1 > 2}: 0
138     * </ul>
139     * <p>Where {@code df1} is the numerator degrees of freedom.
140     */
141    @Override
142    public double density(double x) {
143        if (x <= SUPPORT_LO ||
144            x >= SUPPORT_HI) {
145            // Special case x=0:
146            // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
147            if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
148                return numeratorDegreesOfFreedom == 2 ?
149                    1 :
150                    Double.POSITIVE_INFINITY;
151            }
152            return 0;
153        }
154        return computeDensity(x, false);
155    }
156
157    /** {@inheritDoc}
158     *
159     * <p>Returns the limit when {@code x = 0}:
160     * <ul>
161     * <li>{@code df1 < 2}: Infinity
162     * <li>{@code df1 == 2}: 0
163     * <li>{@code df1 > 2}: -Infinity
164     * </ul>
165     * <p>Where {@code df1} is the numerator degrees of freedom.
166     */
167    @Override
168    public double logDensity(double x) {
169        if (x <= SUPPORT_LO ||
170            x >= SUPPORT_HI) {
171            // Special case x=0:
172            // PDF reduces to the term x^(df1 / 2 - 1) multiplied by a scaling factor
173            if (x == SUPPORT_LO && numeratorDegreesOfFreedom <= 2) {
174                return numeratorDegreesOfFreedom == 2 ?
175                    0 :
176                    Double.POSITIVE_INFINITY;
177            }
178            return Double.NEGATIVE_INFINITY;
179        }
180        return computeDensity(x, true);
181    }
182
183    /**
184     * Compute the density at point x. Assumes x is within the support bound.
185     *
186     * @param x Value
187     * @param log true to compute the log density
188     * @return pdf(x) or logpdf(x)
189     */
190    private double computeDensity(double x, boolean log) {
191        // The log computation may suffer cancellation effects due to summation of large
192        // opposing terms. Use it when the standard PDF result is not normal.
193
194        // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
195        // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
196
197        final double n = numeratorDegreesOfFreedom;
198        final double m = denominatorDegreesOfFreedom;
199        final double nx = n * x;
200        final double z = m + nx;
201        final double y = n * m / (z * z);
202        double p;
203        if (nx > m) {
204            p = y * RegularizedBeta.derivative(m / z,
205                                               m / 2, n / 2);
206        } else {
207            p = y * RegularizedBeta.derivative(nx / z,
208                                               n / 2, m / 2);
209        }
210        // RegularizedBeta.derivative can have intermediate overflow before normalisation
211        // with small y. Check the result for a normal finite number.
212        if (p <= Double.MAX_VALUE && p >= Double.MIN_NORMAL) {
213            return log ? Math.log(p) : p;
214        }
215        // Fall back to the log computation
216        p = nHalfLogNmHalfLogM + (n / 2 - 1) * Math.log(x) - logBetaNhalfMhalf -
217                ((n + m) / 2) * Math.log(z);
218        return log ? p : Math.exp(p);
219    }
220
221    /** {@inheritDoc} */
222    @Override
223    public double cumulativeProbability(double x)  {
224        if (x <= SUPPORT_LO) {
225            return 0;
226        } else if (x >= SUPPORT_HI) {
227            return 1;
228        }
229
230        final double n = numeratorDegreesOfFreedom;
231        final double m = denominatorDegreesOfFreedom;
232
233        // Keep the z argument to the regularized beta well away from 1 to avoid rounding error.
234        // See: https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/f_dist.html
235        // Note the logic in the Boost documentation for pdf and cdf is contradictory.
236        // This order will keep the argument far from 1.
237
238        final double nx = n * x;
239        if (nx > m) {
240            return RegularizedBeta.complement(m / (m + nx),
241                                              m / 2, n / 2);
242        }
243        return RegularizedBeta.value(nx / (m + nx),
244                                     n / 2, m / 2);
245    }
246
247    /** {@inheritDoc} */
248    @Override
249    public double survivalProbability(double x)  {
250        if (x <= SUPPORT_LO) {
251            return 1;
252        } else if (x >= SUPPORT_HI) {
253            return 0;
254        }
255
256        final double n = numeratorDegreesOfFreedom;
257        final double m = denominatorDegreesOfFreedom;
258
259        // Do the opposite of the CDF
260
261        final double nx = n * x;
262        if (nx > m) {
263            return RegularizedBeta.value(m / (m + nx),
264                                         m / 2, n / 2);
265        }
266        return RegularizedBeta.complement(nx / (m + nx),
267                                          n / 2, m / 2);
268    }
269
270    /**
271     * {@inheritDoc}
272     *
273     * <p>For denominator degrees of freedom parameter \( m \), the mean is:
274     *
275     * <p>\[ \mathbb{E}[X] = \begin{cases}
276     *       \frac{m}{m-2}    &amp; \text{for } m \gt 2 \\
277     *       \text{undefined} &amp; \text{otherwise}
278     *       \end{cases} \]
279     *
280     * @return the mean, or {@link Double#NaN NaN} if it is not defined.
281     */
282    @Override
283    public double getMean() {
284        return mean;
285    }
286
287    /**
288     * {@inheritDoc}
289     *
290     * <p>For numerator degrees of freedom parameter \( n \) and denominator
291     * degrees of freedom parameter \( m \), the variance is:
292     *
293     * <p>\[ \operatorname{var}[X] = \begin{cases}
294     *       \frac{2m^2 (n+m-2)}{n (m-2)^2 (m-4)} &amp; \text{for } m \gt 4 \\
295     *       \text{undefined}                     &amp; \text{otherwise}
296     *       \end{cases} \]
297     *
298     * @return the variance, or {@link Double#NaN NaN} if it is not defined.
299     */
300    @Override
301    public double getVariance() {
302        return variance;
303    }
304
305    /**
306     * {@inheritDoc}
307     *
308     * <p>The lower bound of the support is always 0.
309     *
310     * @return 0.
311     */
312    @Override
313    public double getSupportLowerBound() {
314        return SUPPORT_LO;
315    }
316
317    /**
318     * {@inheritDoc}
319     *
320     * <p>The upper bound of the support is always positive infinity.
321     *
322     * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
323     */
324    @Override
325    public double getSupportUpperBound() {
326        return SUPPORT_HI;
327    }
328}