001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.statistics.distribution; 018 019import org.apache.commons.numbers.gamma.Beta; 020import org.apache.commons.numbers.gamma.LogBeta; 021import org.apache.commons.numbers.gamma.RegularizedBeta; 022import org.apache.commons.rng.UniformRandomProvider; 023import org.apache.commons.rng.sampling.distribution.TSampler; 024 025/** 026 * Implementation of Student's t-distribution. 027 * 028 * <p>The probability density function of \( X \) is: 029 * 030 * <p>\[ f(x; v) = \frac{\Gamma(\frac{\nu+1}{2})} {\sqrt{\nu\pi}\,\Gamma(\frac{\nu}{2})} \left(1+\frac{t^2}{\nu} \right)^{\!-\frac{\nu+1}{2}} \] 031 * 032 * <p>for \( v > 0 \) the degrees of freedom, 033 * \( \Gamma \) is the gamma function, and 034 * \( x \in (-\infty, \infty) \). 035 * 036 * @see <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">Student's t-distribution (Wikipedia)</a> 037 * @see <a href="https://mathworld.wolfram.com/Studentst-Distribution.html">Student's t-distribution (MathWorld)</a> 038 */ 039public abstract class TDistribution extends AbstractContinuousDistribution { 040 /** The degrees of freedom. */ 041 private final double degreesOfFreedom; 042 043 /** 044 * Specialisation of the T-distribution used when there are infinite degrees of freedom. 045 * In this case the distribution matches a normal distribution. This is used when the 046 * variance is not different from 1.0. 047 * 048 * <p>This delegates all methods to the standard normal distribution. Instances are 049 * allowed to provide access to the degrees of freedom used during construction. 050 */ 051 private static class NormalTDistribution extends TDistribution { 052 /** A standard normal distribution used for calculations. 053 * This is immutable and thread-safe and can be used across instances. */ 054 private static final NormalDistribution STANDARD_NORMAL = NormalDistribution.of(0, 1); 055 056 /** 057 * @param degreesOfFreedom Degrees of freedom. 058 */ 059 NormalTDistribution(double degreesOfFreedom) { 060 super(degreesOfFreedom); 061 } 062 063 @Override 064 public double density(double x) { 065 return STANDARD_NORMAL.density(x); 066 } 067 068 @Override 069 public double probability(double x0, double x1) { 070 return STANDARD_NORMAL.probability(x0, x1); 071 } 072 073 @Override 074 public double logDensity(double x) { 075 return STANDARD_NORMAL.logDensity(x); 076 } 077 078 @Override 079 public double cumulativeProbability(double x) { 080 return STANDARD_NORMAL.cumulativeProbability(x); 081 } 082 083 @Override 084 public double inverseCumulativeProbability(double p) { 085 return STANDARD_NORMAL.inverseCumulativeProbability(p); 086 } 087 088 // Survival probability functions inherit the symmetry operations from the TDistribution 089 090 @Override 091 public double getMean() { 092 return 0; 093 } 094 095 @Override 096 public double getVariance() { 097 return 1.0; 098 } 099 100 @Override 101 public Sampler createSampler(UniformRandomProvider rng) { 102 return STANDARD_NORMAL.createSampler(rng); 103 } 104 } 105 106 /** 107 * Implementation of Student's T-distribution. 108 */ 109 private static class StudentsTDistribution extends TDistribution { 110 /** 2. */ 111 private static final double TWO = 2; 112 /** The threshold for the density function where the 113 * power function base minus 1 is close to zero. */ 114 private static final double CLOSE_TO_ZERO = 0.25; 115 116 /** -(v + 1) / 2, where v = degrees of freedom. */ 117 private final double mvp1Over2; 118 /** Density normalisation factor, sqrt(v) * beta(1/2, v/2), where v = degrees of freedom. */ 119 private final double densityNormalisation; 120 /** Log density normalisation term, 0.5 * log(v) + log(beta(1/2, v/2)), where v = degrees of freedom. */ 121 private final double logDensityNormalisation; 122 /** Cached value for inverse probability function. */ 123 private final double mean; 124 /** Cached value for inverse probability function. */ 125 private final double variance; 126 127 /** 128 * @param degreesOfFreedom Degrees of freedom. 129 * @param variance Precomputed variance 130 */ 131 StudentsTDistribution(double degreesOfFreedom, double variance) { 132 super(degreesOfFreedom); 133 134 mvp1Over2 = -0.5 * (degreesOfFreedom + 1); 135 densityNormalisation = Math.sqrt(degreesOfFreedom) * Beta.value(0.5, degreesOfFreedom / 2); 136 logDensityNormalisation = 0.5 * Math.log(degreesOfFreedom) + LogBeta.value(0.5, degreesOfFreedom / 2); 137 mean = degreesOfFreedom > 1 ? 0 : Double.NaN; 138 this.variance = variance; 139 } 140 141 /** 142 * @param degreesOfFreedom Degrees of freedom. 143 * @return the variance 144 */ 145 static double computeVariance(double degreesOfFreedom) { 146 if (degreesOfFreedom == Double.POSITIVE_INFINITY) { 147 return 1; 148 } else if (degreesOfFreedom > TWO) { 149 return degreesOfFreedom / (degreesOfFreedom - 2); 150 } else if (degreesOfFreedom > 1) { 151 return Double.POSITIVE_INFINITY; 152 } else { 153 return Double.NaN; 154 } 155 } 156 157 @Override 158 public double density(double x) { 159 // 1 -(v+1)/2 160 // ------------------- * (1 + t^2/v) 161 // sqrt(v) B(1/2, v/2) 162 163 final double t2OverV = x * x / getDegreesOfFreedom(); 164 if (t2OverV < CLOSE_TO_ZERO) { 165 // Avoid power function when the base is close to 1 166 return Math.exp(Math.log1p(t2OverV) * mvp1Over2) / densityNormalisation; 167 } 168 return Math.pow(1 + t2OverV, mvp1Over2) / densityNormalisation; 169 } 170 171 @Override 172 public double logDensity(double x) { 173 return Math.log1p(x * x / getDegreesOfFreedom()) * mvp1Over2 - logDensityNormalisation; 174 } 175 176 @Override 177 public double cumulativeProbability(double x) { 178 if (x == 0) { 179 return 0.5; 180 } 181 final double v = getDegreesOfFreedom(); 182 183 // cdf(t) = 1 - 0.5 * I_x(t)(v/2, 1/2) 184 // where x(t) = v / (v + t^2) 185 // 186 // When t^2 << v using the regularized beta results 187 // in loss of precision. Use the complement instead: 188 // I[x](a,b) = 1 - I[1-x](b,a) 189 // x = v / (v + t^2) 190 // 1-x = t^2 / (v + t^2) 191 // Use the threshold documented in the Boost t-distribution: 192 // https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/students_t_dist.html 193 194 final double t2 = x * x; 195 final double z; 196 if (v < 2 * t2) { 197 z = RegularizedBeta.value(v / (v + t2), v / 2, 0.5) / 2; 198 } else { 199 z = RegularizedBeta.complement(t2 / (v + t2), 0.5, v / 2) / 2; 200 } 201 return x > 0 ? 1 - z : z; 202 } 203 204 @Override 205 public double getMean() { 206 return mean; 207 } 208 209 @Override 210 public double getVariance() { 211 return variance; 212 } 213 214 @Override 215 double getMedian() { 216 // Overridden for the probability(double, double) method. 217 // This is intentionally not a public method. 218 return 0; 219 } 220 221 @Override 222 public Sampler createSampler(UniformRandomProvider rng) { 223 // T distribution sampler. 224 return TSampler.of(rng, getDegreesOfFreedom())::sample; 225 } 226 } 227 228 /** 229 * @param degreesOfFreedom Degrees of freedom. 230 */ 231 TDistribution(double degreesOfFreedom) { 232 this.degreesOfFreedom = degreesOfFreedom; 233 } 234 235 /** 236 * Creates a Student's t-distribution. 237 * 238 * @param degreesOfFreedom Degrees of freedom. 239 * @return the distribution 240 * @throws IllegalArgumentException if {@code degreesOfFreedom <= 0} 241 */ 242 public static TDistribution of(double degreesOfFreedom) { 243 if (degreesOfFreedom <= 0) { 244 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, 245 degreesOfFreedom); 246 } 247 // If the variance converges to 1 use a NormalDistribution. 248 // Occurs at 2^55 = 3.60e16 249 final double variance = StudentsTDistribution.computeVariance(degreesOfFreedom); 250 if (variance == 1) { 251 return new NormalTDistribution(degreesOfFreedom); 252 } 253 return new StudentsTDistribution(degreesOfFreedom, variance); 254 } 255 256 /** 257 * Gets the degrees of freedom parameter of this distribution. 258 * 259 * @return the degrees of freedom. 260 */ 261 public double getDegreesOfFreedom() { 262 return degreesOfFreedom; 263 } 264 265 /** {@inheritDoc} */ 266 @Override 267 public double survivalProbability(double x) { 268 // Exploit symmetry 269 return cumulativeProbability(-x); 270 } 271 272 /** {@inheritDoc} */ 273 @Override 274 public double inverseSurvivalProbability(double p) { 275 // Exploit symmetry 276 // Subtract from 0 avoids returning -0.0 for p=0.5 277 return 0.0 - inverseCumulativeProbability(p); 278 } 279 280 /** 281 * {@inheritDoc} 282 * 283 * <p>For degrees of freedom parameter \( v \), the mean is: 284 * 285 * <p>\[ \mathbb{E}[X] = \begin{cases} 286 * 0 & \text{for } v \gt 1 \\ 287 * \text{undefined} & \text{otherwise} 288 * \end{cases} \] 289 * 290 * @return the mean, or {@link Double#NaN NaN} if it is not defined. 291 */ 292 @Override 293 public abstract double getMean(); 294 295 /** 296 * {@inheritDoc} 297 * 298 * <p>For degrees of freedom parameter \( v \), the variance is: 299 * 300 * <p>\[ \operatorname{var}[X] = \begin{cases} 301 * \frac{v}{v - 2} & \text{for } v \gt 2 \\ 302 * \infty & \text{for } 1 \lt v \le 2 \\ 303 * \text{undefined} & \text{otherwise} 304 * \end{cases} \] 305 * 306 * @return the variance, or {@link Double#NaN NaN} if it is not defined. 307 */ 308 @Override 309 public abstract double getVariance(); 310 311 /** 312 * {@inheritDoc} 313 * 314 * <p>The lower bound of the support is always negative infinity. 315 * 316 * @return {@linkplain Double#NEGATIVE_INFINITY negative infinity}. 317 */ 318 @Override 319 public double getSupportLowerBound() { 320 return Double.NEGATIVE_INFINITY; 321 } 322 323 /** 324 * {@inheritDoc} 325 * 326 * <p>The upper bound of the support is always positive infinity. 327 * 328 * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}. 329 */ 330 @Override 331 public double getSupportUpperBound() { 332 return Double.POSITIVE_INFINITY; 333 } 334}