001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.statistics.distribution; 018 019import java.util.stream.IntStream; 020import org.apache.commons.rng.UniformRandomProvider; 021 022/** 023 * Interface for distributions on the integers. 024 */ 025public interface DiscreteDistribution { 026 027 /** 028 * For a random variable {@code X} whose values are distributed according 029 * to this distribution, this method returns {@code P(X = x)}. 030 * In other words, this method represents the probability mass function (PMF) 031 * for the distribution. 032 * 033 * @param x Point at which the PMF is evaluated. 034 * @return the value of the probability mass function at {@code x}. 035 */ 036 double probability(int x); 037 038 /** 039 * For a random variable {@code X} whose values are distributed according 040 * to this distribution, this method returns {@code P(x0 < X <= x1)}. 041 * The default implementation uses the identity 042 * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)} 043 * 044 * <p>Special cases: 045 * <ul> 046 * <li>returns {@code 0.0} if {@code x0 == x1}; 047 * <li>returns {@code probability(x1)} if {@code x0 + 1 == x1}; 048 * </ul> 049 * 050 * @param x0 Lower bound (exclusive). 051 * @param x1 Upper bound (inclusive). 052 * @return the probability that a random variable with this distribution 053 * takes a value between {@code x0} and {@code x1}, excluding the lower 054 * and including the upper endpoint. 055 * @throws IllegalArgumentException if {@code x0 > x1}. 056 */ 057 default double probability(int x0, 058 int x1) { 059 if (x0 > x1) { 060 throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, x0, x1); 061 } 062 // Long addition avoids overflow 063 if (x0 + 1L >= x1) { 064 return x0 == x1 ? 0.0 : probability(x1); 065 } 066 return cumulativeProbability(x1) - cumulativeProbability(x0); 067 } 068 069 /** 070 * For a random variable {@code X} whose values are distributed according 071 * to this distribution, this method returns {@code log(P(X = x))}, where 072 * {@code log} is the natural logarithm. 073 * 074 * @param x Point at which the PMF is evaluated. 075 * @return the logarithm of the value of the probability mass function at 076 * {@code x}. 077 */ 078 default double logProbability(int x) { 079 return Math.log(probability(x)); 080 } 081 082 /** 083 * For a random variable {@code X} whose values are distributed according 084 * to this distribution, this method returns {@code P(X <= x)}. 085 * In other, words, this method represents the (cumulative) distribution 086 * function (CDF) for this distribution. 087 * 088 * @param x Point at which the CDF is evaluated. 089 * @return the probability that a random variable with this distribution 090 * takes a value less than or equal to {@code x}. 091 */ 092 double cumulativeProbability(int x); 093 094 /** 095 * For a random variable {@code X} whose values are distributed according 096 * to this distribution, this method returns {@code P(X > x)}. 097 * In other words, this method represents the complementary cumulative 098 * distribution function. 099 * 100 * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but 101 * the specific implementation may be more accurate. 102 * 103 * @param x Point at which the survival function is evaluated. 104 * @return the probability that a random variable with this 105 * distribution takes a value greater than {@code x}. 106 */ 107 default double survivalProbability(int x) { 108 return 1.0 - cumulativeProbability(x); 109 } 110 111 /** 112 * Computes the quantile function of this distribution. 113 * For a random variable {@code X} distributed according to this distribution, 114 * the returned value is: 115 * 116 * <p>\[ x = \begin{cases} 117 * \inf \{ x \in \mathbb Z : P(X \le x) \ge p\} & \text{for } 0 \lt p \le 1 \\ 118 * \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} & \text{for } p = 0 119 * \end{cases} \] 120 * 121 * <p>If the result exceeds the range of the data type {@code int}, 122 * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned. 123 * In this case the result of {@link #cumulativeProbability(int) cumulativeProbability(x)} 124 * called using the returned {@code p}-quantile may not compute the original {@code p}. 125 * 126 * @param p Cumulative probability. 127 * @return the smallest {@code p}-quantile of this distribution 128 * (largest 0-quantile for {@code p = 0}). 129 * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}. 130 */ 131 int inverseCumulativeProbability(double p); 132 133 /** 134 * Computes the inverse survival probability function of this distribution. 135 * For a random variable {@code X} distributed according to this distribution, 136 * the returned value is: 137 * 138 * <p>\[ x = \begin{cases} 139 * \inf \{ x \in \mathbb Z : P(X \gt x) \le p\} & \text{for } 0 \le p \lt 1 \\ 140 * \inf \{ x \in \mathbb Z : P(X \gt x) \lt 1 \} & \text{for } p = 1 141 * \end{cases} \] 142 * 143 * <p>If the result exceeds the range of the data type {@code int}, 144 * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned. 145 * In this case the result of {@link #survivalProbability(int) survivalProbability(x)} 146 * called using the returned {@code (1-p)}-quantile may not compute the original {@code p}. 147 * 148 * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but 149 * the specific implementation may be more accurate. 150 * 151 * @param p Cumulative probability. 152 * @return the smallest {@code (1-p)}-quantile of this distribution 153 * (largest 0-quantile for {@code p = 1}). 154 * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}. 155 */ 156 default int inverseSurvivalProbability(double p) { 157 return inverseCumulativeProbability(1 - p); 158 } 159 160 /** 161 * Gets the mean of this distribution. 162 * 163 * @return the mean. 164 */ 165 double getMean(); 166 167 /** 168 * Gets the variance of this distribution. 169 * 170 * @return the variance. 171 */ 172 double getVariance(); 173 174 /** 175 * Gets the lower bound of the support. 176 * This method must return the same value as 177 * {@code inverseCumulativeProbability(0)}, i.e. 178 * \( \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} \). 179 * By convention, {@link Integer#MIN_VALUE} should be substituted 180 * for negative infinity. 181 * 182 * @return the lower bound of the support. 183 */ 184 int getSupportLowerBound(); 185 186 /** 187 * Gets the upper bound of the support. 188 * This method must return the same value as 189 * {@code inverseCumulativeProbability(1)}, i.e. 190 * \( \inf \{ x \in \mathbb Z : P(X \le x) = 1 \} \). 191 * By convention, {@link Integer#MAX_VALUE} should be substituted 192 * for positive infinity. 193 * 194 * @return the upper bound of the support. 195 */ 196 int getSupportUpperBound(); 197 198 /** 199 * Creates a sampler. 200 * 201 * @param rng Generator of uniformly distributed numbers. 202 * @return a sampler that produces random numbers according this 203 * distribution. 204 */ 205 Sampler createSampler(UniformRandomProvider rng); 206 207 /** 208 * Distribution sampling functionality. 209 */ 210 @FunctionalInterface 211 interface Sampler { 212 /** 213 * Generates a random value sampled from this distribution. 214 * 215 * @return a random value. 216 */ 217 int sample(); 218 219 /** 220 * Returns an effectively unlimited stream of {@code int} sample values. 221 * 222 * <p>The default implementation produces a sequential stream that repeatedly 223 * calls {@link #sample sample}(). 224 * 225 * @return a stream of {@code int} values. 226 */ 227 default IntStream samples() { 228 return IntStream.generate(this::sample).sequential(); 229 } 230 231 /** 232 * Returns a stream producing the given {@code streamSize} number of {@code int} 233 * sample values. 234 * 235 * <p>The default implementation produces a sequential stream that repeatedly 236 * calls {@link #sample sample}(); the stream is limited to the given {@code streamSize}. 237 * 238 * @param streamSize Number of values to generate. 239 * @return a stream of {@code int} values. 240 */ 241 default IntStream samples(long streamSize) { 242 return samples().limit(streamSize); 243 } 244 } 245}