001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.inference;
018
019import java.util.Objects;
020import org.apache.commons.statistics.distribution.BinomialDistribution;
021
022/**
023 * Implements binomial test statistics.
024 *
025 * <p>Performs an exact test for the statistical significance of deviations from a
026 * theoretically expected distribution of observations into two categories.
027 *
028 * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a>
029 * @since 1.1
030 */
031public final class BinomialTest {
032    /** Default instance. */
033    private static final BinomialTest DEFAULT = new BinomialTest(AlternativeHypothesis.TWO_SIDED);
034
035    /** Alternative hypothesis. */
036    private final AlternativeHypothesis alternative;
037
038    /**
039     * @param alternative Alternative hypothesis.
040     */
041    private BinomialTest(AlternativeHypothesis alternative) {
042        this.alternative = alternative;
043    }
044
045    /**
046     * Return an instance using the default options.
047     *
048     * <ul>
049     * <li>{@link AlternativeHypothesis#TWO_SIDED}
050     * </ul>
051     *
052     * @return default instance
053     */
054    public static BinomialTest withDefaults() {
055        return DEFAULT;
056    }
057
058    /**
059     * Return an instance with the configured alternative hypothesis.
060     *
061     * @param v Value.
062     * @return an instance
063     */
064    public BinomialTest with(AlternativeHypothesis v) {
065        return new BinomialTest(Objects.requireNonNull(v));
066    }
067
068    /**
069     * Performs a binomial test about the probability of success \( \pi \).
070     *
071     * <p>The null hypothesis is \( H_0:\pi=\pi_0 \) where \( \pi_0 \) is between 0 and 1.
072     *
073     * <p>The probability of observing \( k \) successes from \( n \) trials with a given
074     * probability of success \( p \) is:
075     *
076     * <p>\[ \Pr(X=k)=\binom{n}{k}p^k(1-p)^{n-k} \]
077     *
078     * <p>The test is defined by the {@link AlternativeHypothesis}.
079     *
080     * <p>To test \( \pi &lt; \pi_0 \) (less than):
081     *
082     * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=0}^k\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
083     *
084     * <p>To test \( \pi &gt; \pi_0 \) (greater than):
085     *
086     * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=k}^n\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
087     *
088     * <p>To test \( \pi \ne \pi_0 \) (two-sided) requires finding all \( i \) such that
089     * \( \mathcal{I}=\{i:\Pr(X=i)\leq \Pr(X=k)\} \) and compute the sum:
090     *
091     * <p>\[ p = \sum_{i\in\mathcal{I}}\Pr(X=i)=\sum_{i\in\mathcal{I}}\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
092     *
093     * <p>The two-sided p-value represents the likelihood of getting a result at least as
094     * extreme as the sample, given the provided {@code probability} of success on a
095     * single trial.
096     *
097     * <p>The test statistic is equal to the estimated proportion \( \frac{k}{n} \).
098     *
099     * @param numberOfTrials Number of trials performed.
100     * @param numberOfSuccesses Number of successes observed.
101     * @param probability Assumed probability of a single trial under the null
102     * hypothesis.
103     * @return test result
104     * @throws IllegalArgumentException if {@code numberOfTrials} or
105     * {@code numberOfSuccesses} is negative; {@code probability} is not between 0
106     * and 1; or if {@code numberOfTrials < numberOfSuccesses}
107     * @see #with(AlternativeHypothesis)
108     */
109    public SignificanceResult test(int numberOfTrials, int numberOfSuccesses, double probability) {
110        // Note: The distribution validates number of trials and probability.
111        // Here we only have to validate the number of successes.
112        Arguments.checkNonNegative(numberOfSuccesses);
113        if (numberOfTrials < numberOfSuccesses) {
114            throw new InferenceException(
115                "must have n >= k for binomial coefficient (n, k), got n = %d, k = %d",
116                numberOfSuccesses, numberOfTrials);
117        }
118
119        final BinomialDistribution distribution = BinomialDistribution.of(numberOfTrials, probability);
120        final double p;
121        if (alternative == AlternativeHypothesis.GREATER_THAN) {
122            p = distribution.survivalProbability(numberOfSuccesses - 1);
123        } else if (alternative == AlternativeHypothesis.LESS_THAN) {
124            p = distribution.cumulativeProbability(numberOfSuccesses);
125        } else {
126            p = twoSidedBinomialTest(numberOfTrials, numberOfSuccesses, probability, distribution);
127        }
128        return new BaseSignificanceResult((double) numberOfSuccesses / numberOfTrials, p);
129    }
130
131    /**
132     * Returns the <i>observed significance level</i>, or p-value, associated with a
133     * two-sided binomial test about the probability of success \( \pi \).
134     *
135     * @param n Number of trials performed.
136     * @param k Number of successes observed.
137     * @param probability Assumed probability of a single trial under the null
138     * hypothesis.
139     * @param distribution Binomial distribution.
140     * @return p-value
141     */
142    private static double twoSidedBinomialTest(int n, int k, double probability,
143                                               BinomialDistribution distribution) {
144        // Find all i where Pr(X = i) <= Pr(X = k) and sum them.
145        // Exploit the known unimodal distribution to increase the
146        // search speed. Note the search depends only on magnitude differences.
147        // The current BinomialDistribution is faster using log probability
148        // as it omits a call to Math.exp.
149
150        // Use the mode as the point of largest probability.
151        // The lower or upper mode is important for the search below.
152        final int m1 = (int) Math.ceil((n + 1.0) * probability) - 1;
153        final int m2 = (int) Math.floor((n + 1.0) * probability);
154        if (k < m1) {
155            final double pk = distribution.logProbability(k);
156            // Lower half = cdf(k)
157            // Find upper half. As k < lower mode i should never
158            // reach the lower mode based on the probability alone.
159            // Bracket with the upper mode.
160            final int i = Searches.searchDescending(m2, n, pk, distribution::logProbability);
161            return distribution.cumulativeProbability(k) +
162                   distribution.survivalProbability(i - 1);
163        } else if (k > m2) {
164            final double pk = distribution.logProbability(k);
165            // Upper half = sf(k - 1)
166            // Find lower half. As k > upper mode i should never
167            // reach the upper mode based on the probability alone.
168            // Bracket with the lower mode.
169            final int i = Searches.searchAscending(0, m1, pk, distribution::logProbability);
170            return distribution.cumulativeProbability(i) +
171                   distribution.survivalProbability(k - 1);
172        }
173        // k == mode
174        // Edge case where the sum of probabilities will be either
175        // 1 or 1 - Pr(X = mode) where mode != k
176        final double pk = distribution.probability(k);
177        final double pm = distribution.probability(k == m1 ? m2 : m1);
178        return pm > pk ? 1 - pm : 1;
179    }
180}