IntStatistics.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.statistics.descriptive;

import java.math.BigInteger;
import java.util.Objects;
import java.util.Set;
import java.util.function.DoubleConsumer;
import java.util.function.Function;
import java.util.function.IntConsumer;

/**
 * Statistics for {@code int} values.
 *
 * <p>This class provides combinations of individual statistic implementations in the
 * {@code org.apache.commons.statistics.descriptive} package.
 *
 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
 * This implementation does not check for overflow of the count.
 *
 * @since 1.1
 */
public final class IntStatistics implements IntConsumer {
    /** Error message for non configured statistics. */
    private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
    /** Error message for an unsupported statistic. */
    private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";

    /** Count of values recorded. */
    private long count;
    /** The consumer of values. */
    private final IntConsumer consumer;
    /** The {@link IntMin} implementation. */
    private final IntMin min;
    /** The {@link IntMax} implementation. */
    private final IntMax max;
    /** The moment implementation. May be any instance of {@link FirstMoment}.
     * This implementation uses only the third and fourth moments. */
    private final FirstMoment moment;
    /** The {@link IntSum} implementation. */
    private final IntSum sum;
    /** The {@link Product} implementation. */
    private final Product product;
    /** The {@link IntSumOfSquares} implementation. */
    private final IntSumOfSquares sumOfSquares;
    /** The {@link SumOfLogs} implementation. */
    private final SumOfLogs sumOfLogs;
    /** Configuration options for computation of statistics. */
    private StatisticsConfiguration config;

    /**
     * A builder for {@link IntStatistics}.
     */
    public static final class Builder {
        /** An empty double array. */
        private static final int[] NO_VALUES = {};

        /** The {@link IntMin} constructor. */
        private Function<int[], IntMin> min;
        /** The {@link IntMax} constructor. */
        private Function<int[], IntMax> max;
        /** The moment constructor. May return any instance of {@link FirstMoment}. */
        private Function<int[], FirstMoment> moment;
        /** The {@link IntSum} constructor. */
        private Function<int[], IntSum> sum;
        /** The {@link Product} constructor. */
        private Function<int[], Product> product;
        /** The {@link IntSumOfSquares} constructor. */
        private Function<int[], IntSumOfSquares> sumOfSquares;
        /** The {@link SumOfLogs} constructor. */
        private Function<int[], SumOfLogs> sumOfLogs;
        /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
         * instance constructed by {@link #moment}. This should only be increased from the default
         * of zero (corresponding to no moment computation). */
        private int momentOrder;
        /** Configuration options for computation of statistics. */
        private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();

        /**
         * Create an instance.
         */
        Builder() {
            // Do nothing
        }

        /**
         * Add the statistic to the statistics to compute.
         *
         * @param statistic Statistic to compute.
         * @return {@code this} instance
         */
        Builder add(Statistic statistic) {
            switch (statistic) {
            case GEOMETRIC_MEAN:
            case SUM_OF_LOGS:
                sumOfLogs = SumOfLogs::of;
                break;
            case KURTOSIS:
                createMoment(4);
                break;
            case MAX:
                max = IntMax::of;
                break;
            case MIN:
                min = IntMin::of;
                break;
            case PRODUCT:
                product = Product::of;
                break;
            case SKEWNESS:
                createMoment(3);
                break;
            case STANDARD_DEVIATION:
            case VARIANCE:
                sum = IntSum::of;
                sumOfSquares = IntSumOfSquares::of;
                break;
            case MEAN:
            case SUM:
                sum = IntSum::of;
                break;
            case SUM_OF_SQUARES:
                sumOfSquares = IntSumOfSquares::of;
                break;
            default:
                throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
            }
            return this;
        }

        /**
         * Creates the moment constructor for the specified {@code order},
         * e.g. order=3 is sum of cubed deviations.
         *
         * @param order Order.
         */
        private void createMoment(int order) {
            if (order > momentOrder) {
                momentOrder = order;
                if (order == 4) {
                    moment = SumOfFourthDeviations::of;
                } else {
                    // Assume order == 3
                    moment = SumOfCubedDeviations::of;
                }
            }
        }

        /**
         * Sets the statistics configuration options for computation of statistics.
         *
         * @param v Value.
         * @return the builder
         * @throws NullPointerException if the value is null
         */
        public Builder setConfiguration(StatisticsConfiguration v) {
            config = Objects.requireNonNull(v);
            return this;
        }

        /**
         * Builds a {@code IntStatistics} instance.
         *
         * @return {@code IntStatistics} instance.
         */
        public IntStatistics build() {
            return build(NO_VALUES);
        }

        /**
         * Builds a {@code IntStatistics} instance using the input {@code values}.
         *
         * <p>Note: {@code IntStatistics} computed using
         * {@link IntStatistics#accept(int) accept} may be
         * different from this instance.
         *
         * @param values Values.
         * @return {@code IntStatistics} instance.
         */
        public IntStatistics build(int... values) {
            Objects.requireNonNull(values, "values");
            return new IntStatistics(
                values.length,
                create(min, values),
                create(max, values),
                create(moment, values),
                create(sum, values),
                create(product, values),
                create(sumOfSquares, values),
                create(sumOfLogs, values),
                config);
        }

        /**
         * Creates the object from the {@code values}.
         *
         * @param <T> object type
         * @param constructor Constructor.
         * @param values Values
         * @return the instance
         */
        private static <T> T create(Function<int[], T> constructor, int[] values) {
            if (constructor != null) {
                return constructor.apply(values);
            }
            return null;
        }
    }

    /**
     * Create an instance.
     *
     * @param count Count of values.
     * @param min IntMin implementation.
     * @param max IntMax implementation.
     * @param moment Moment implementation.
     * @param sum IntSum implementation.
     * @param product Product implementation.
     * @param sumOfSquares Sum of squares implementation.
     * @param sumOfLogs Sum of logs implementation.
     * @param config Statistics configuration.
     */
    IntStatistics(long count, IntMin min, IntMax max, FirstMoment moment, IntSum sum,
                  Product product, IntSumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
                  StatisticsConfiguration config) {
        this.count = count;
        this.min = min;
        this.max = max;
        this.moment = moment;
        this.sum = sum;
        this.product = product;
        this.sumOfSquares = sumOfSquares;
        this.sumOfLogs = sumOfLogs;
        this.config = config;
        // The final consumer should never be null as the builder is created
        // with at least one statistic.
        consumer = Statistics.compose(min, max, sum, sumOfSquares,
                                      composeAsInt(moment, product, sumOfLogs));
    }

    /**
     * Chain the {@code consumers} into a single composite {@code IntConsumer}.
     * Ignore any {@code null} consumer.
     *
     * @param consumers Consumers.
     * @return a composed consumer (or null)
     */
    private static IntConsumer composeAsInt(DoubleConsumer... consumers) {
        final DoubleConsumer c = Statistics.compose(consumers);
        if (c != null) {
            return c::accept;
        }
        return null;
    }

    /**
     * Returns a new instance configured to compute the specified {@code statistics}.
     *
     * <p>The statistics will be empty and so will return the default values for each
     * computed statistic.
     *
     * @param statistics Statistics to compute.
     * @return the instance
     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
     */
    public static IntStatistics of(Statistic... statistics) {
        return builder(statistics).build();
    }

    /**
     * Returns a new instance configured to compute the specified {@code statistics}
     * populated using the input {@code values}.
     *
     * <p>Use this method to create an instance populated with a (variable) array of
     * {@code int[]} data:
     *
     * <pre>
     * IntStatistics stats = IntStatistics.of(
     *     EnumSet.of(Statistic.MIN, Statistic.MAX),
     *     1, 1, 2, 3, 5, 8, 13);
     * </pre>
     *
     * @param statistics Statistics to compute.
     * @param values Values.
     * @return the instance
     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
     */
    public static IntStatistics of(Set<Statistic> statistics, int... values) {
        if (statistics.isEmpty()) {
            throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
        }
        final Builder b = new Builder();
        statistics.forEach(b::add);
        return b.build(values);
    }

    /**
     * Returns a new builder configured to create instances to compute the specified
     * {@code statistics}.
     *
     * <p>Use this method to create an instance populated with an array of {@code int[]}
     * data using the {@link Builder#build(int...)} method:
     *
     * <pre>
     * int[] data = ...
     * IntStatistics stats = IntStatistics.builder(
     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
     *     .build(data);
     * </pre>
     *
     * <p>The builder can be used to create multiple instances of {@link IntStatistics}
     * to be used in parallel, or on separate arrays of {@code int[]} data. These may
     * be {@link #combine(IntStatistics) combined}. For example:
     *
     * <pre>
     * int[][] data = ...
     * IntStatistics.Builder builder = IntStatistics.builder(
     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
     * IntStatistics stats = Arrays.stream(data)
     *     .parallel()
     *     .map(builder::build)
     *     .reduce(IntStatistics::combine)
     *     .get();
     * </pre>
     *
     * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
     * use on multiple data:
     *
     * <pre>{@code
     * IntStatistics.Builder builder = IntStatistics.builder(
     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
     * Collector<int[], IntStatistics, IntStatistics> collector =
     *     Collector.of(builder::build,
     *                  (s, d) -> s.combine(builder.build(d)),
     *                  IntStatistics::combine);
     *
     * // Repeated
     * int[][] data = ...
     * IntStatistics stats = Arrays.stream(data).collect(collector);
     * }</pre>
     *
     * @param statistics Statistics to compute.
     * @return the builder
     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
     */
    public static Builder builder(Statistic... statistics) {
        if (statistics.length == 0) {
            throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
        }
        final Builder b = new Builder();
        for (final Statistic s : statistics) {
            b.add(s);
        }
        return b;
    }

    /**
     * Updates the state of the statistics to reflect the addition of {@code value}.
     *
     * @param value Value.
     */
    @Override
    public void accept(int value) {
        count++;
        consumer.accept(value);
    }

    /**
     * Return the count of values recorded.
     *
     * @return the count of values
     */
    public long getCount() {
        return count;
    }

    /**
     * Check if the specified {@code statistic} is supported.
     *
     * <p>Note: This method will not return {@code false} if the argument is {@code null}.
     *
     * @param statistic Statistic.
     * @return {@code true} if supported
     * @throws NullPointerException if the {@code statistic} is {@code null}
     * @see #getResult(Statistic)
     */
    public boolean isSupported(Statistic statistic) {
        // Check for the appropriate underlying implementation
        switch (statistic) {
        case GEOMETRIC_MEAN:
        case SUM_OF_LOGS:
            return sumOfLogs != null;
        case KURTOSIS:
            return moment instanceof SumOfFourthDeviations;
        case MAX:
            return max != null;
        case MIN:
            return min != null;
        case PRODUCT:
            return product != null;
        case SKEWNESS:
            return moment instanceof SumOfCubedDeviations;
        case STANDARD_DEVIATION:
        case VARIANCE:
            return sum != null && sumOfSquares != null;
        case MEAN:
        case SUM:
            return sum != null;
        case SUM_OF_SQUARES:
            return sumOfSquares != null;
        default:
            return false;
        }
    }

    /**
     * Gets the value of the specified {@code statistic} as a {@code double}.
     *
     * @param statistic Statistic.
     * @return the value
     * @throws IllegalArgumentException if the {@code statistic} is not supported
     * @see #isSupported(Statistic)
     * @see #getResult(Statistic)
     */
    public double getAsDouble(Statistic statistic) {
        return getResult(statistic).getAsDouble();
    }

    /**
     * Gets the value of the specified {@code statistic} as an {@code int}.
     *
     * <p>Use this method to access the {@code int} result for exact integer statistics,
     * for example {@link Statistic#MIN}.
     *
     * <p>Note: This method may throw an {@link ArithmeticException} if the result
     * overflows an {@code int}.
     *
     * @param statistic Statistic.
     * @return the value
     * @throws IllegalArgumentException if the {@code statistic} is not supported
     * @throws ArithmeticException if the {@code result} overflows an {@code int} or is not
     * finite
     * @see #isSupported(Statistic)
     * @see #getResult(Statistic)
     */
    public int getAsInt(Statistic statistic) {
        return getResult(statistic).getAsInt();
    }

    /**
     * Gets the value of the specified {@code statistic} as a {@code long}.
     *
     * <p>Use this method to access the {@code long} result for exact integer statistics,
     * for example {@link Statistic#SUM} for a {@link #getCount() count} less than or equal to
     *2<sup>32</sup>.
     *
     * <p>Note: This method may throw an {@link ArithmeticException} if the result
     * overflows an {@code long}.
     *
     * @param statistic Statistic.
     * @return the value
     * @throws IllegalArgumentException if the {@code statistic} is not supported
     * @throws ArithmeticException if the {@code result} overflows an {@code long} or is not
     * finite
     * @see #isSupported(Statistic)
     * @see #getResult(Statistic)
     */
    public long getAsLong(Statistic statistic) {
        return getResult(statistic).getAsLong();
    }

    /**
     * Gets the value of the specified {@code statistic} as a {@code BigInteger}.
     *
     * <p>Use this method to access the {@code BigInteger} result for exact integer statistics,
     * for example {@link Statistic#SUM_OF_SQUARES}.
     *
     * <p>Note: This method may throw an {@link ArithmeticException} if the result
     * is not finite.
     *
     * @param statistic Statistic.
     * @return the value
     * @throws IllegalArgumentException if the {@code statistic} is not supported
     * @throws ArithmeticException if the {@code result} is not finite
     * @see #isSupported(Statistic)
     * @see #getResult(Statistic)
     */
    public BigInteger getAsBigInteger(Statistic statistic) {
        return getResult(statistic).getAsBigInteger();
    }

    /**
     * Gets a supplier for the value of the specified {@code statistic}.
     *
     * <p>The returned function will supply the correct result after
     * calls to {@link #accept(int) accept} or
     * {@link #combine(IntStatistics) combine} further values into
     * {@code this} instance.
     *
     * <p>This method can be used to perform a one-time look-up of the statistic
     * function to compute statistics as values are dynamically added.
     *
     * @param statistic Statistic.
     * @return the supplier
     * @throws IllegalArgumentException if the {@code statistic} is not supported
     * @see #isSupported(Statistic)
     * @see #getAsDouble(Statistic)
     */
    public StatisticResult getResult(Statistic statistic) {
        // Locate the implementation.
        // Statistics that wrap an underlying implementation are created in methods.
        // The return argument should be an interface reference and not an instance
        // of IntStatistic. This ensures the statistic implementation cannot
        // be updated with new values by casting the result and calling accept(int).
        StatisticResult stat = null;
        switch (statistic) {
        case GEOMETRIC_MEAN:
            stat = getGeometricMean();
            break;
        case KURTOSIS:
            stat = getKurtosis();
            break;
        case MAX:
            stat = Statistics.getResultAsIntOrNull(max);
            break;
        case MEAN:
            stat = getMean();
            break;
        case MIN:
            stat = Statistics.getResultAsIntOrNull(min);
            break;
        case PRODUCT:
            stat = Statistics.getResultAsDoubleOrNull(product);
            break;
        case SKEWNESS:
            stat = getSkewness();
            break;
        case STANDARD_DEVIATION:
            stat = getStandardDeviation();
            break;
        case SUM:
            stat = Statistics.getResultAsBigIntegerOrNull(sum);
            break;
        case SUM_OF_LOGS:
            stat = Statistics.getResultAsDoubleOrNull(sumOfLogs);
            break;
        case SUM_OF_SQUARES:
            stat = Statistics.getResultAsBigIntegerOrNull(sumOfSquares);
            break;
        case VARIANCE:
            stat = getVariance();
            break;
        default:
            break;
        }
        if (stat != null) {
            return stat;
        }
        throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
    }

    /**
     * Gets the geometric mean.
     *
     * @return a geometric mean supplier (or null if unsupported)
     */
    private StatisticResult getGeometricMean() {
        if (sumOfLogs != null) {
            // Return a function that has access to the count and sumOfLogs
            return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
        }
        return null;
    }

    /**
     * Gets the kurtosis.
     *
     * @return a kurtosis supplier (or null if unsupported)
     */
    private StatisticResult getKurtosis() {
        if (moment instanceof SumOfFourthDeviations) {
            return new Kurtosis((SumOfFourthDeviations) moment)
                .setBiased(config.isBiased())::getAsDouble;
        }
        return null;
    }

    /**
     * Gets the mean.
     *
     * @return a mean supplier (or null if unsupported)
     */
    private StatisticResult getMean() {
        if (sum != null) {
            // Return a function that has access to the count and sum
            final Int128 s = sum.getSum();
            return () -> IntMean.computeMean(s, count);
        }
        return null;
    }

    /**
     * Gets the skewness.
     *
     * @return a skewness supplier (or null if unsupported)
     */
    private StatisticResult getSkewness() {
        if (moment instanceof SumOfCubedDeviations) {
            return new Skewness((SumOfCubedDeviations) moment)
                .setBiased(config.isBiased())::getAsDouble;
        }
        return null;
    }

    /**
     * Gets the standard deviation.
     *
     * @return a standard deviation supplier (or null if unsupported)
     */
    private StatisticResult getStandardDeviation() {
        return getVarianceOrStd(true);
    }

    /**
     * Gets the variance.
     *
     * @return a variance supplier (or null if unsupported)
     */
    private StatisticResult getVariance() {
        return getVarianceOrStd(false);
    }

    /**
     * Gets the variance or standard deviation.
     *
     * @param std Flag to control if the statistic is the standard deviation.
     * @return a variance/standard deviation supplier (or null if unsupported)
     */
    private StatisticResult getVarianceOrStd(boolean std) {
        if (sum != null && sumOfSquares != null) {
            // Return a function that has access to the count, sum and sum of squares
            final Int128 s = sum.getSum();
            final UInt128 ss = sumOfSquares.getSumOfSquares();
            final boolean biased = config.isBiased();
            return () -> IntVariance.computeVarianceOrStd(ss, s, count, biased, std);
        }
        return null;
    }

    /**
     * Combines the state of the {@code other} statistics into this one.
     * Only {@code this} instance is modified by the {@code combine} operation.
     *
     * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
     * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
     * all values of the {@link Statistic} enum which are supported by {@code this}
     * instance.
     *
     * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
     * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
     * instance is not compatible then an exception is raised before any state is modified.
     *
     * @param other Another set of statistics to be combined.
     * @return {@code this} instance after combining {@code other}.
     * @throws IllegalArgumentException if the {@code other} is not compatible
     */
    public IntStatistics combine(IntStatistics other) {
        // Check compatibility
        Statistics.checkCombineCompatible(min, other.min);
        Statistics.checkCombineCompatible(max, other.max);
        Statistics.checkCombineCompatible(sum, other.sum);
        Statistics.checkCombineCompatible(product, other.product);
        Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
        Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
        Statistics.checkCombineAssignable(moment, other.moment);
        // Combine
        count += other.count;
        Statistics.combine(min, other.min);
        Statistics.combine(max, other.max);
        Statistics.combine(sum, other.sum);
        Statistics.combine(product, other.product);
        Statistics.combine(sumOfSquares, other.sumOfSquares);
        Statistics.combine(sumOfLogs, other.sumOfLogs);
        Statistics.combineMoment(moment, other.moment);
        return this;
    }

    /**
     * Sets the statistics configuration.
     *
     * <p>These options only control the final computation of statistics. The configuration
     * will not affect compatibility between instances during a
     * {@link #combine(IntStatistics) combine} operation.
     *
     * <p>Note: These options will affect any future computation of statistics. Supplier functions
     * that have been previously created will not be updated with the new configuration.
     *
     * @param v Value.
     * @return {@code this} instance
     * @throws NullPointerException if the value is null
     * @see #getResult(Statistic)
     */
    public IntStatistics setConfiguration(StatisticsConfiguration v) {
        config = Objects.requireNonNull(v);
        return this;
    }
}