001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.statistics.distribution; 019 020import java.util.function.DoubleUnaryOperator; 021import org.apache.commons.rng.UniformRandomProvider; 022import org.apache.commons.rng.sampling.distribution.InverseTransformParetoSampler; 023 024/** 025 * Implementation of the Pareto (Type I) distribution. 026 * 027 * <p>The probability density function of \( X \) is: 028 * 029 * <p>\[ f(x; k, \alpha) = \frac{\alpha k^\alpha}{x^{\alpha + 1}} \] 030 * 031 * <p>for \( k > 0 \), 032 * \( \alpha > 0 \), and 033 * \( x \in [k, \infty) \). 034 * 035 * <p>\( k \) is a <em>scale</em> parameter: this is the minimum possible value of \( X \). 036 * <br>\( \alpha \) is a <em>shape</em> parameter: this is the Pareto index. 037 * 038 * @see <a href="https://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution (Wikipedia)</a> 039 * @see <a href="https://mathworld.wolfram.com/ParetoDistribution.html">Pareto distribution (MathWorld)</a> 040 */ 041public final class ParetoDistribution extends AbstractContinuousDistribution { 042 /** The minimum value for the shape parameter when computing when computing the variance. */ 043 private static final double MIN_SHAPE_FOR_VARIANCE = 2.0; 044 045 /** The scale parameter of this distribution. Also known as {@code k}; 046 * the minimum possible value for the random variable {@code X}. */ 047 private final double scale; 048 /** The shape parameter of this distribution. */ 049 private final double shape; 050 /** Implementation of PDF(x). Assumes that {@code x >= scale}. */ 051 private final DoubleUnaryOperator pdf; 052 /** Implementation of log PDF(x). Assumes that {@code x >= scale}. */ 053 private final DoubleUnaryOperator logpdf; 054 055 /** 056 * @param scale Scale parameter (minimum possible value of X). 057 * @param shape Shape parameter (Pareto index). 058 */ 059 private ParetoDistribution(double scale, 060 double shape) { 061 this.scale = scale; 062 this.shape = shape; 063 064 // The Pareto distribution approaches a Dirac delta function when shape -> inf. 065 // Parameterisations can also lead to underflow in the standard computation. 066 // Extract the PDF and CDF to specialized implementations to handle edge cases. 067 068 // Pre-compute factors for the standard computation 069 final double shapeByScalePowShape = shape * Math.pow(scale, shape); 070 final double logShapePlusShapeByLogScale = Math.log(shape) + Math.log(scale) * shape; 071 072 if (shapeByScalePowShape < Double.POSITIVE_INFINITY && 073 shapeByScalePowShape >= Double.MIN_NORMAL) { 074 // Standard computation 075 pdf = x -> shapeByScalePowShape / Math.pow(x, shape + 1); 076 logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1); 077 } else { 078 // Standard computation overflow; underflow to sub-normal or zero; or nan (pow(1.0, inf)) 079 if (Double.isFinite(logShapePlusShapeByLogScale)) { 080 // Log computation is valid 081 logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1); 082 pdf = x -> Math.exp(logpdf.applyAsDouble(x)); 083 } else { 084 // Assume Dirac function 085 logpdf = x -> x > scale ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 086 // PDF has infinite value at lower bound 087 pdf = x -> x > scale ? 0 : Double.POSITIVE_INFINITY; 088 } 089 } 090 } 091 092 /** 093 * Creates a Pareto distribution. 094 * 095 * @param scale Scale parameter (minimum possible value of X). 096 * @param shape Shape parameter (Pareto index). 097 * @return the distribution 098 * @throws IllegalArgumentException if {@code scale <= 0}, {@code scale} is 099 * infinite, or {@code shape <= 0}. 100 */ 101 public static ParetoDistribution of(double scale, 102 double shape) { 103 if (scale <= 0 || scale == Double.POSITIVE_INFINITY) { 104 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE_FINITE, scale); 105 } 106 if (shape <= 0) { 107 throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, shape); 108 } 109 return new ParetoDistribution(scale, shape); 110 } 111 112 /** 113 * Gets the scale parameter of this distribution. 114 * This is the minimum possible value of X. 115 * 116 * @return the scale parameter. 117 */ 118 public double getScale() { 119 return scale; 120 } 121 122 /** 123 * Gets the shape parameter of this distribution. 124 * This is the Pareto index. 125 * 126 * @return the shape parameter. 127 */ 128 public double getShape() { 129 return shape; 130 } 131 132 /** 133 * {@inheritDoc} 134 * 135 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the PDF is: 136 * 137 * <p>\[ f(x; k, \alpha) = \begin{cases} 138 * 0 & \text{for } x \lt k \\ 139 * \frac{\alpha k^\alpha}{x^{\alpha + 1}} & \text{for } x \ge k 140 * \end{cases} \] 141 */ 142 @Override 143 public double density(double x) { 144 if (x < scale) { 145 return 0; 146 } 147 return pdf.applyAsDouble(x); 148 } 149 150 /** {@inheritDoc} 151 * 152 * <p>See documentation of {@link #density(double)} for computation details. 153 */ 154 @Override 155 public double logDensity(double x) { 156 if (x < scale) { 157 return Double.NEGATIVE_INFINITY; 158 } 159 return logpdf.applyAsDouble(x); 160 } 161 162 /** 163 * {@inheritDoc} 164 * 165 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the CDF is: 166 * 167 * <p>\[ F(x; k, \alpha) = \begin{cases} 168 * 0 & \text{for } x \le k \\ 169 * 1 - \left( \frac{k}{x} \right)^\alpha & \text{for } x \gt k 170 * \end{cases} \] 171 */ 172 @Override 173 public double cumulativeProbability(double x) { 174 if (x <= scale) { 175 return 0; 176 } 177 // Increase accuracy for CDF close to 0 by using a log calculation: 178 // 1 - exp(α * ln(k / x)) == -(exp(α * ln(k / x)) - 1) 179 return -Math.expm1(shape * Math.log(scale / x)); 180 } 181 182 /** 183 * {@inheritDoc} 184 * 185 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the survival function is: 186 * 187 * <p>\[ S(x; k, \alpha) = \begin{cases} 188 * 1 & \text{for } x \le k \\ 189 * \left( \frac{k}{x} \right)^\alpha & \text{for } x \gt k 190 * \end{cases} \] 191 */ 192 @Override 193 public double survivalProbability(double x) { 194 if (x <= scale) { 195 return 1; 196 } 197 return Math.pow(scale / x, shape); 198 } 199 200 /** {@inheritDoc} */ 201 @Override 202 public double inverseCumulativeProbability(double p) { 203 ArgumentUtils.checkProbability(p); 204 if (p == 0) { 205 return getSupportLowerBound(); 206 } 207 if (p == 1) { 208 return getSupportUpperBound(); 209 } 210 return scale / Math.exp(Math.log1p(-p) / shape); 211 } 212 213 /** {@inheritDoc} */ 214 @Override 215 public double inverseSurvivalProbability(double p) { 216 ArgumentUtils.checkProbability(p); 217 if (p == 1) { 218 return getSupportLowerBound(); 219 } 220 if (p == 0) { 221 return getSupportUpperBound(); 222 } 223 return scale / Math.pow(p, 1 / shape); 224 } 225 226 /** 227 * {@inheritDoc} 228 * 229 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the mean is: 230 * 231 * <p>\[ \mathbb{E}[X] = \begin{cases} 232 * \infty & \text{for } \alpha \le 1 \\ 233 * \frac{k \alpha}{(\alpha-1)} & \text{for } \alpha \gt 1 234 * \end{cases} \] 235 */ 236 @Override 237 public double getMean() { 238 if (shape <= 1) { 239 return Double.POSITIVE_INFINITY; 240 } 241 if (shape == Double.POSITIVE_INFINITY) { 242 return scale; 243 } 244 return scale * (shape / (shape - 1)); 245 } 246 247 /** 248 * {@inheritDoc} 249 * 250 * <p>For scale parameter \( k \) and shape parameter \( \alpha \), the variance is: 251 * 252 * <p>\[ \operatorname{var}[X] = \begin{cases} 253 * \infty & \text{for } \alpha \le 2 \\ 254 * \frac{k^2 \alpha}{(\alpha-1)^2 (\alpha-2)} & \text{for } \alpha \gt 2 255 * \end{cases} \] 256 */ 257 @Override 258 public double getVariance() { 259 if (shape <= MIN_SHAPE_FOR_VARIANCE) { 260 return Double.POSITIVE_INFINITY; 261 } 262 if (shape == Double.POSITIVE_INFINITY) { 263 return 0; 264 } 265 final double s = shape - 1; 266 final double z = shape / s / s / (shape - 2); 267 // Avoid intermediate overflow of scale^2 if z is small 268 return z < 1 ? z * scale * scale : scale * scale * z; 269 } 270 271 /** 272 * {@inheritDoc} 273 * <p> 274 * The lower bound of the support is equal to the scale parameter {@code k}. 275 * 276 * @return scale. 277 */ 278 @Override 279 public double getSupportLowerBound() { 280 return getScale(); 281 } 282 283 /** 284 * {@inheritDoc} 285 * <p> 286 * The upper bound of the support is always positive infinity. 287 * 288 * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}. 289 */ 290 @Override 291 public double getSupportUpperBound() { 292 return Double.POSITIVE_INFINITY; 293 } 294 295 /** {@inheritDoc} */ 296 @Override 297 public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { 298 // Pareto distribution sampler 299 return InverseTransformParetoSampler.of(rng, scale, shape)::sample; 300 } 301}