001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.statistics.inference; 018 019import java.util.Objects; 020import org.apache.commons.statistics.distribution.BinomialDistribution; 021 022/** 023 * Implements binomial test statistics. 024 * 025 * <p>Performs an exact test for the statistical significance of deviations from a 026 * theoretically expected distribution of observations into two categories. 027 * 028 * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a> 029 * @since 1.1 030 */ 031public final class BinomialTest { 032 /** Default instance. */ 033 private static final BinomialTest DEFAULT = new BinomialTest(AlternativeHypothesis.TWO_SIDED); 034 035 /** Alternative hypothesis. */ 036 private final AlternativeHypothesis alternative; 037 038 /** 039 * @param alternative Alternative hypothesis. 040 */ 041 private BinomialTest(AlternativeHypothesis alternative) { 042 this.alternative = alternative; 043 } 044 045 /** 046 * Return an instance using the default options. 047 * 048 * <ul> 049 * <li>{@link AlternativeHypothesis#TWO_SIDED} 050 * </ul> 051 * 052 * @return default instance 053 */ 054 public static BinomialTest withDefaults() { 055 return DEFAULT; 056 } 057 058 /** 059 * Return an instance with the configured alternative hypothesis. 060 * 061 * @param v Value. 062 * @return an instance 063 */ 064 public BinomialTest with(AlternativeHypothesis v) { 065 return new BinomialTest(Objects.requireNonNull(v)); 066 } 067 068 /** 069 * Performs a binomial test about the probability of success \( \pi \). 070 * 071 * <p>The null hypothesis is \( H_0:\pi=\pi_0 \) where \( \pi_0 \) is between 0 and 1. 072 * 073 * <p>The probability of observing \( k \) successes from \( n \) trials with a given 074 * probability of success \( p \) is: 075 * 076 * <p>\[ \Pr(X=k)=\binom{n}{k}p^k(1-p)^{n-k} \] 077 * 078 * <p>The test is defined by the {@link AlternativeHypothesis}. 079 * 080 * <p>To test \( \pi < \pi_0 \) (less than): 081 * 082 * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=0}^k\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \] 083 * 084 * <p>To test \( \pi > \pi_0 \) (greater than): 085 * 086 * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=k}^n\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \] 087 * 088 * <p>To test \( \pi \ne \pi_0 \) (two-sided) requires finding all \( i \) such that 089 * \( \mathcal{I}=\{i:\Pr(X=i)\leq \Pr(X=k)\} \) and compute the sum: 090 * 091 * <p>\[ p = \sum_{i\in\mathcal{I}}\Pr(X=i)=\sum_{i\in\mathcal{I}}\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \] 092 * 093 * <p>The two-sided p-value represents the likelihood of getting a result at least as 094 * extreme as the sample, given the provided {@code probability} of success on a 095 * single trial. 096 * 097 * <p>The test statistic is equal to the estimated proportion \( \frac{k}{n} \). 098 * 099 * @param numberOfTrials Number of trials performed. 100 * @param numberOfSuccesses Number of successes observed. 101 * @param probability Assumed probability of a single trial under the null 102 * hypothesis. 103 * @return test result 104 * @throws IllegalArgumentException if {@code numberOfTrials} or 105 * {@code numberOfSuccesses} is negative; {@code probability} is not between 0 106 * and 1; or if {@code numberOfTrials < numberOfSuccesses} 107 * @see #with(AlternativeHypothesis) 108 */ 109 public SignificanceResult test(int numberOfTrials, int numberOfSuccesses, double probability) { 110 // Note: The distribution validates number of trials and probability. 111 // Here we only have to validate the number of successes. 112 Arguments.checkNonNegative(numberOfSuccesses); 113 if (numberOfTrials < numberOfSuccesses) { 114 throw new InferenceException( 115 "must have n >= k for binomial coefficient (n, k), got n = %d, k = %d", 116 numberOfSuccesses, numberOfTrials); 117 } 118 119 final BinomialDistribution distribution = BinomialDistribution.of(numberOfTrials, probability); 120 final double p; 121 if (alternative == AlternativeHypothesis.GREATER_THAN) { 122 p = distribution.survivalProbability(numberOfSuccesses - 1); 123 } else if (alternative == AlternativeHypothesis.LESS_THAN) { 124 p = distribution.cumulativeProbability(numberOfSuccesses); 125 } else { 126 p = twoSidedBinomialTest(numberOfTrials, numberOfSuccesses, probability, distribution); 127 } 128 return new BaseSignificanceResult((double) numberOfSuccesses / numberOfTrials, p); 129 } 130 131 /** 132 * Returns the <i>observed significance level</i>, or p-value, associated with a 133 * two-sided binomial test about the probability of success \( \pi \). 134 * 135 * @param n Number of trials performed. 136 * @param k Number of successes observed. 137 * @param probability Assumed probability of a single trial under the null 138 * hypothesis. 139 * @param distribution Binomial distribution. 140 * @return p-value 141 */ 142 private static double twoSidedBinomialTest(int n, int k, double probability, 143 BinomialDistribution distribution) { 144 // Find all i where Pr(X = i) <= Pr(X = k) and sum them. 145 // Exploit the known unimodal distribution to increase the 146 // search speed. Note the search depends only on magnitude differences. 147 // The current BinomialDistribution is faster using log probability 148 // as it omits a call to Math.exp. 149 150 // Use the mode as the point of largest probability. 151 // The lower or upper mode is important for the search below. 152 final int m1 = (int) Math.ceil((n + 1.0) * probability) - 1; 153 final int m2 = (int) Math.floor((n + 1.0) * probability); 154 if (k < m1) { 155 final double pk = distribution.logProbability(k); 156 // Lower half = cdf(k) 157 // Find upper half. As k < lower mode i should never 158 // reach the lower mode based on the probability alone. 159 // Bracket with the upper mode. 160 final int i = Searches.searchDescending(m2, n, pk, distribution::logProbability); 161 return distribution.cumulativeProbability(k) + 162 distribution.survivalProbability(i - 1); 163 } else if (k > m2) { 164 final double pk = distribution.logProbability(k); 165 // Upper half = sf(k - 1) 166 // Find lower half. As k > upper mode i should never 167 // reach the upper mode based on the probability alone. 168 // Bracket with the lower mode. 169 final int i = Searches.searchAscending(0, m1, pk, distribution::logProbability); 170 return distribution.cumulativeProbability(i) + 171 distribution.survivalProbability(k - 1); 172 } 173 // k == mode 174 // Edge case where the sum of probabilities will be either 175 // 1 or 1 - Pr(X = mode) where mode != k 176 final double pk = distribution.probability(k); 177 final double pm = distribution.probability(k == m1 ? m2 : m1); 178 return pm > pk ? 1 - pm : 1; 179 } 180}