Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.distribution;
018
019import org.apache.commons.numbers.gamma.Beta;
020import org.apache.commons.numbers.gamma.LogBeta;
021import org.apache.commons.numbers.gamma.RegularizedBeta;
022import org.apache.commons.rng.UniformRandomProvider;
023import org.apache.commons.rng.sampling.distribution.TSampler;
024
025/**
026 * Implementation of Student's t-distribution.
027 *
028 * <p>The probability density function of \( X \) is:
029 *
030 * <p>\[ f(x; v) = \frac{\Gamma(\frac{\nu+1}{2})} {\sqrt{\nu\pi}\,\Gamma(\frac{\nu}{2})} \left(1+\frac{t^2}{\nu} \right)^{\!-\frac{\nu+1}{2}} \]
031 *
032 * <p>for \( v &gt; 0 \) the degrees of freedom,
033 * \( \Gamma \) is the gamma function, and
034 * \( x \in (-\infty, \infty) \).
035 *
036 * @see <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">Student&#39;s t-distribution (Wikipedia)</a>
037 * @see <a href="https://mathworld.wolfram.com/Studentst-Distribution.html">Student&#39;s t-distribution (MathWorld)</a>
038 */
039public abstract class TDistribution extends AbstractContinuousDistribution {
040    /** The degrees of freedom. */
041    private final double degreesOfFreedom;
042
043    /**
044     * Specialisation of the T-distribution used when there are infinite degrees of freedom.
045     * In this case the distribution matches a normal distribution. This is used when the
046     * variance is not different from 1.0.
047     *
048     * <p>This delegates all methods to the standard normal distribution. Instances are
049     * allowed to provide access to the degrees of freedom used during construction.
050     */
051    private static class NormalTDistribution extends TDistribution {
052        /** A standard normal distribution used for calculations.
053         * This is immutable and thread-safe and can be used across instances. */
054        private static final NormalDistribution STANDARD_NORMAL = NormalDistribution.of(0, 1);
055
056        /**
057         * @param degreesOfFreedom Degrees of freedom.
058         */
059        NormalTDistribution(double degreesOfFreedom) {
060            super(degreesOfFreedom);
061        }
062
063        @Override
064        public double density(double x) {
065            return STANDARD_NORMAL.density(x);
066        }
067
068        @Override
069        public double probability(double x0, double x1) {
070            return STANDARD_NORMAL.probability(x0, x1);
071        }
072
073        @Override
074        public double logDensity(double x) {
075            return STANDARD_NORMAL.logDensity(x);
076        }
077
078        @Override
079        public double cumulativeProbability(double x) {
080            return STANDARD_NORMAL.cumulativeProbability(x);
081        }
082
083        @Override
084        public double inverseCumulativeProbability(double p) {
085            return STANDARD_NORMAL.inverseCumulativeProbability(p);
086        }
087
088        // Survival probability functions inherit the symmetry operations from the TDistribution
089
090        @Override
091        public double getMean() {
092            return 0;
093        }
094
095        @Override
096        public double getVariance() {
097            return 1.0;
098        }
099
100        @Override
101        public Sampler createSampler(UniformRandomProvider rng) {
102            return STANDARD_NORMAL.createSampler(rng);
103        }
104    }
105
106    /**
107     * Implementation of Student's T-distribution.
108     */
109    private static class StudentsTDistribution extends TDistribution {
110        /** 2. */
111        private static final double TWO = 2;
112        /** The threshold for the density function where the
113         * power function base minus 1 is close to zero. */
114        private static final double CLOSE_TO_ZERO = 0.25;
115
116        /** -(v + 1) / 2, where v = degrees of freedom. */
117        private final double mvp1Over2;
118        /** Density normalisation factor, sqrt(v) * beta(1/2, v/2), where v = degrees of freedom. */
119        private final double densityNormalisation;
120        /** Log density normalisation term, 0.5 * log(v) + log(beta(1/2, v/2)), where v = degrees of freedom. */
121        private final double logDensityNormalisation;
122        /** Cached value for inverse probability function. */
123        private final double mean;
124        /** Cached value for inverse probability function. */
125        private final double variance;
126
127        /**
128         * @param degreesOfFreedom Degrees of freedom.
129         * @param variance Precomputed variance
130         */
131        StudentsTDistribution(double degreesOfFreedom, double variance) {
132            super(degreesOfFreedom);
133
134            mvp1Over2 = -0.5 * (degreesOfFreedom + 1);
135            densityNormalisation = Math.sqrt(degreesOfFreedom) * Beta.value(0.5, degreesOfFreedom / 2);
136            logDensityNormalisation = 0.5 * Math.log(degreesOfFreedom) + LogBeta.value(0.5, degreesOfFreedom / 2);
137            mean = degreesOfFreedom > 1 ? 0 : Double.NaN;
138            this.variance = variance;
139        }
140
141        /**
142         * @param degreesOfFreedom Degrees of freedom.
143         * @return the variance
144         */
145        static double computeVariance(double degreesOfFreedom) {
146            if (degreesOfFreedom == Double.POSITIVE_INFINITY) {
147                return 1;
148            } else if (degreesOfFreedom > TWO) {
149                return degreesOfFreedom / (degreesOfFreedom - 2);
150            } else if (degreesOfFreedom > 1) {
151                return Double.POSITIVE_INFINITY;
152            } else {
153                return Double.NaN;
154            }
155        }
156
157        @Override
158        public double density(double x) {
159            //          1                       -(v+1)/2
160            // ------------------- * (1 + t^2/v)
161            // sqrt(v) B(1/2, v/2)
162
163            final double t2OverV = x * x / getDegreesOfFreedom();
164            if (t2OverV < CLOSE_TO_ZERO) {
165                // Avoid power function when the base is close to 1
166                return Math.exp(Math.log1p(t2OverV) * mvp1Over2) / densityNormalisation;
167            }
168            return Math.pow(1 + t2OverV, mvp1Over2) / densityNormalisation;
169        }
170
171        @Override
172        public double logDensity(double x) {
173            return Math.log1p(x * x / getDegreesOfFreedom()) * mvp1Over2 - logDensityNormalisation;
174        }
175
176        @Override
177        public double cumulativeProbability(double x) {
178            if (x == 0) {
179                return 0.5;
180            }
181            final double v = getDegreesOfFreedom();
182
183            // cdf(t) = 1 - 0.5 * I_x(t)(v/2, 1/2)
184            // where x(t) = v / (v + t^2)
185            //
186            // When t^2 << v using the regularized beta results
187            // in loss of precision. Use the complement instead:
188            // I[x](a,b) = 1 - I[1-x](b,a)
189            // x   = v   / (v + t^2)
190            // 1-x = t^2 / (v + t^2)
191            // Use the threshold documented in the Boost t-distribution:
192            // https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/students_t_dist.html
193
194            final double t2 = x * x;
195            final double z;
196            if (v < 2 * t2) {
197                z = RegularizedBeta.value(v / (v + t2), v / 2, 0.5) / 2;
198            } else {
199                z = RegularizedBeta.complement(t2 / (v + t2), 0.5, v / 2) / 2;
200            }
201            return x > 0 ? 1 - z : z;
202        }
203
204        @Override
205        public double getMean() {
206            return mean;
207        }
208
209        @Override
210        public double getVariance() {
211            return variance;
212        }
213
214        @Override
215        double getMedian() {
216            // Overridden for the probability(double, double) method.
217            // This is intentionally not a public method.
218            return 0;
219        }
220
221        @Override
222        public Sampler createSampler(UniformRandomProvider rng) {
223            // T distribution sampler.
224            return TSampler.of(rng, getDegreesOfFreedom())::sample;
225        }
226    }
227
228    /**
229     * @param degreesOfFreedom Degrees of freedom.
230     */
231    TDistribution(double degreesOfFreedom) {
232        this.degreesOfFreedom = degreesOfFreedom;
233    }
234
235    /**
236     * Creates a Student's t-distribution.
237     *
238     * @param degreesOfFreedom Degrees of freedom.
239     * @return the distribution
240     * @throws IllegalArgumentException if {@code degreesOfFreedom <= 0}
241     */
242    public static TDistribution of(double degreesOfFreedom) {
243        if (degreesOfFreedom <= 0) {
244            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
245                                            degreesOfFreedom);
246        }
247        // If the variance converges to 1 use a NormalDistribution.
248        // Occurs at 2^55 = 3.60e16
249        final double variance = StudentsTDistribution.computeVariance(degreesOfFreedom);
250        if (variance == 1) {
251            return new NormalTDistribution(degreesOfFreedom);
252        }
253        return new StudentsTDistribution(degreesOfFreedom, variance);
254    }
255
256    /**
257     * Gets the degrees of freedom parameter of this distribution.
258     *
259     * @return the degrees of freedom.
260     */
261    public double getDegreesOfFreedom() {
262        return degreesOfFreedom;
263    }
264
265    /** {@inheritDoc} */
266    @Override
267    public double survivalProbability(double x) {
268        // Exploit symmetry
269        return cumulativeProbability(-x);
270    }
271
272    /** {@inheritDoc} */
273    @Override
274    public double inverseSurvivalProbability(double p) {
275        // Exploit symmetry
276        // Subtract from 0 avoids returning -0.0 for p=0.5
277        return 0.0 - inverseCumulativeProbability(p);
278    }
279
280    /**
281     * {@inheritDoc}
282     *
283     * <p>For degrees of freedom parameter \( v \), the mean is:
284     *
285     * <p>\[ \mathbb{E}[X] = \begin{cases}
286     *       0                &amp; \text{for } v \gt 1 \\
287     *       \text{undefined} &amp; \text{otherwise}
288     *       \end{cases} \]
289     *
290     * @return the mean, or {@link Double#NaN NaN} if it is not defined.
291     */
292    @Override
293    public abstract double getMean();
294
295    /**
296     * {@inheritDoc}
297     *
298     * <p>For degrees of freedom parameter \( v \), the variance is:
299     *
300     * <p>\[ \operatorname{var}[X] = \begin{cases}
301     *       \frac{v}{v - 2}  &amp; \text{for } v \gt 2 \\
302     *       \infty           &amp; \text{for } 1 \lt v \le 2 \\
303     *       \text{undefined} &amp; \text{otherwise}
304     *       \end{cases} \]
305     *
306     * @return the variance, or {@link Double#NaN NaN} if it is not defined.
307     */
308    @Override
309    public abstract double getVariance();
310
311    /**
312     * {@inheritDoc}
313     *
314     * <p>The lower bound of the support is always negative infinity.
315     *
316     * @return {@linkplain Double#NEGATIVE_INFINITY negative infinity}.
317     */
318    @Override
319    public double getSupportLowerBound() {
320        return Double.NEGATIVE_INFINITY;
321    }
322
323    /**
324     * {@inheritDoc}
325     *
326     * <p>The upper bound of the support is always positive infinity.
327     *
328     * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
329     */
330    @Override
331    public double getSupportUpperBound() {
332        return Double.POSITIVE_INFINITY;
333    }
334}