001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.descriptive;
018
019import java.math.BigInteger;
020import java.util.Objects;
021import java.util.Set;
022import java.util.function.DoubleConsumer;
023import java.util.function.Function;
024import java.util.function.IntConsumer;
025
026/**
027 * Statistics for {@code int} values.
028 *
029 * <p>This class provides combinations of individual statistic implementations in the
030 * {@code org.apache.commons.statistics.descriptive} package.
031 *
032 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
033 * This implementation does not check for overflow of the count.
034 *
035 * @since 1.1
036 */
037public final class IntStatistics implements IntConsumer {
038    /** Error message for non configured statistics. */
039    private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
040    /** Error message for an unsupported statistic. */
041    private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";
042
043    /** Count of values recorded. */
044    private long count;
045    /** The consumer of values. */
046    private final IntConsumer consumer;
047    /** The {@link IntMin} implementation. */
048    private final IntMin min;
049    /** The {@link IntMax} implementation. */
050    private final IntMax max;
051    /** The moment implementation. May be any instance of {@link FirstMoment}.
052     * This implementation uses only the third and fourth moments. */
053    private final FirstMoment moment;
054    /** The {@link IntSum} implementation. */
055    private final IntSum sum;
056    /** The {@link Product} implementation. */
057    private final Product product;
058    /** The {@link IntSumOfSquares} implementation. */
059    private final IntSumOfSquares sumOfSquares;
060    /** The {@link SumOfLogs} implementation. */
061    private final SumOfLogs sumOfLogs;
062    /** Configuration options for computation of statistics. */
063    private StatisticsConfiguration config;
064
065    /**
066     * A builder for {@link IntStatistics}.
067     */
068    public static final class Builder {
069        /** An empty double array. */
070        private static final int[] NO_VALUES = {};
071
072        /** The {@link IntMin} constructor. */
073        private Function<int[], IntMin> min;
074        /** The {@link IntMax} constructor. */
075        private Function<int[], IntMax> max;
076        /** The moment constructor. May return any instance of {@link FirstMoment}. */
077        private Function<int[], FirstMoment> moment;
078        /** The {@link IntSum} constructor. */
079        private Function<int[], IntSum> sum;
080        /** The {@link Product} constructor. */
081        private Function<int[], Product> product;
082        /** The {@link IntSumOfSquares} constructor. */
083        private Function<int[], IntSumOfSquares> sumOfSquares;
084        /** The {@link SumOfLogs} constructor. */
085        private Function<int[], SumOfLogs> sumOfLogs;
086        /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
087         * instance constructed by {@link #moment}. This should only be increased from the default
088         * of zero (corresponding to no moment computation). */
089        private int momentOrder;
090        /** Configuration options for computation of statistics. */
091        private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();
092
093        /**
094         * Create an instance.
095         */
096        Builder() {
097            // Do nothing
098        }
099
100        /**
101         * Add the statistic to the statistics to compute.
102         *
103         * @param statistic Statistic to compute.
104         * @return {@code this} instance
105         */
106        Builder add(Statistic statistic) {
107            switch (statistic) {
108            case GEOMETRIC_MEAN:
109            case SUM_OF_LOGS:
110                sumOfLogs = SumOfLogs::of;
111                break;
112            case KURTOSIS:
113                createMoment(4);
114                break;
115            case MAX:
116                max = IntMax::of;
117                break;
118            case MIN:
119                min = IntMin::of;
120                break;
121            case PRODUCT:
122                product = Product::of;
123                break;
124            case SKEWNESS:
125                createMoment(3);
126                break;
127            case STANDARD_DEVIATION:
128            case VARIANCE:
129                sum = IntSum::of;
130                sumOfSquares = IntSumOfSquares::of;
131                break;
132            case MEAN:
133            case SUM:
134                sum = IntSum::of;
135                break;
136            case SUM_OF_SQUARES:
137                sumOfSquares = IntSumOfSquares::of;
138                break;
139            default:
140                throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
141            }
142            return this;
143        }
144
145        /**
146         * Creates the moment constructor for the specified {@code order},
147         * e.g. order=3 is sum of cubed deviations.
148         *
149         * @param order Order.
150         */
151        private void createMoment(int order) {
152            if (order > momentOrder) {
153                momentOrder = order;
154                if (order == 4) {
155                    moment = SumOfFourthDeviations::of;
156                } else {
157                    // Assume order == 3
158                    moment = SumOfCubedDeviations::of;
159                }
160            }
161        }
162
163        /**
164         * Sets the statistics configuration options for computation of statistics.
165         *
166         * @param v Value.
167         * @return the builder
168         * @throws NullPointerException if the value is null
169         */
170        public Builder setConfiguration(StatisticsConfiguration v) {
171            config = Objects.requireNonNull(v);
172            return this;
173        }
174
175        /**
176         * Builds a {@code IntStatistics} instance.
177         *
178         * @return {@code IntStatistics} instance.
179         */
180        public IntStatistics build() {
181            return build(NO_VALUES);
182        }
183
184        /**
185         * Builds a {@code IntStatistics} instance using the input {@code values}.
186         *
187         * <p>Note: {@code IntStatistics} computed using
188         * {@link IntStatistics#accept(int) accept} may be
189         * different from this instance.
190         *
191         * @param values Values.
192         * @return {@code IntStatistics} instance.
193         */
194        public IntStatistics build(int... values) {
195            Objects.requireNonNull(values, "values");
196            return new IntStatistics(
197                values.length,
198                create(min, values),
199                create(max, values),
200                create(moment, values),
201                create(sum, values),
202                create(product, values),
203                create(sumOfSquares, values),
204                create(sumOfLogs, values),
205                config);
206        }
207
208        /**
209         * Creates the object from the {@code values}.
210         *
211         * @param <T> object type
212         * @param constructor Constructor.
213         * @param values Values
214         * @return the instance
215         */
216        private static <T> T create(Function<int[], T> constructor, int[] values) {
217            if (constructor != null) {
218                return constructor.apply(values);
219            }
220            return null;
221        }
222    }
223
224    /**
225     * Create an instance.
226     *
227     * @param count Count of values.
228     * @param min IntMin implementation.
229     * @param max IntMax implementation.
230     * @param moment Moment implementation.
231     * @param sum IntSum implementation.
232     * @param product Product implementation.
233     * @param sumOfSquares Sum of squares implementation.
234     * @param sumOfLogs Sum of logs implementation.
235     * @param config Statistics configuration.
236     */
237    IntStatistics(long count, IntMin min, IntMax max, FirstMoment moment, IntSum sum,
238                  Product product, IntSumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
239                  StatisticsConfiguration config) {
240        this.count = count;
241        this.min = min;
242        this.max = max;
243        this.moment = moment;
244        this.sum = sum;
245        this.product = product;
246        this.sumOfSquares = sumOfSquares;
247        this.sumOfLogs = sumOfLogs;
248        this.config = config;
249        // The final consumer should never be null as the builder is created
250        // with at least one statistic.
251        consumer = Statistics.compose(min, max, sum, sumOfSquares,
252                                      composeAsInt(moment, product, sumOfLogs));
253    }
254
255    /**
256     * Chain the {@code consumers} into a single composite {@code IntConsumer}.
257     * Ignore any {@code null} consumer.
258     *
259     * @param consumers Consumers.
260     * @return a composed consumer (or null)
261     */
262    private static IntConsumer composeAsInt(DoubleConsumer... consumers) {
263        final DoubleConsumer c = Statistics.compose(consumers);
264        if (c != null) {
265            return c::accept;
266        }
267        return null;
268    }
269
270    /**
271     * Returns a new instance configured to compute the specified {@code statistics}.
272     *
273     * <p>The statistics will be empty and so will return the default values for each
274     * computed statistic.
275     *
276     * @param statistics Statistics to compute.
277     * @return the instance
278     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
279     */
280    public static IntStatistics of(Statistic... statistics) {
281        return builder(statistics).build();
282    }
283
284    /**
285     * Returns a new instance configured to compute the specified {@code statistics}
286     * populated using the input {@code values}.
287     *
288     * <p>Use this method to create an instance populated with a (variable) array of
289     * {@code int[]} data:
290     *
291     * <pre>
292     * IntStatistics stats = IntStatistics.of(
293     *     EnumSet.of(Statistic.MIN, Statistic.MAX),
294     *     1, 1, 2, 3, 5, 8, 13);
295     * </pre>
296     *
297     * @param statistics Statistics to compute.
298     * @param values Values.
299     * @return the instance
300     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
301     */
302    public static IntStatistics of(Set<Statistic> statistics, int... values) {
303        if (statistics.isEmpty()) {
304            throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
305        }
306        final Builder b = new Builder();
307        statistics.forEach(b::add);
308        return b.build(values);
309    }
310
311    /**
312     * Returns a new builder configured to create instances to compute the specified
313     * {@code statistics}.
314     *
315     * <p>Use this method to create an instance populated with an array of {@code int[]}
316     * data using the {@link Builder#build(int...)} method:
317     *
318     * <pre>
319     * int[] data = ...
320     * IntStatistics stats = IntStatistics.builder(
321     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
322     *     .build(data);
323     * </pre>
324     *
325     * <p>The builder can be used to create multiple instances of {@link IntStatistics}
326     * to be used in parallel, or on separate arrays of {@code int[]} data. These may
327     * be {@link #combine(IntStatistics) combined}. For example:
328     *
329     * <pre>
330     * int[][] data = ...
331     * IntStatistics.Builder builder = IntStatistics.builder(
332     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
333     * IntStatistics stats = Arrays.stream(data)
334     *     .parallel()
335     *     .map(builder::build)
336     *     .reduce(IntStatistics::combine)
337     *     .get();
338     * </pre>
339     *
340     * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
341     * use on multiple data:
342     *
343     * <pre>{@code
344     * IntStatistics.Builder builder = IntStatistics.builder(
345     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
346     * Collector<int[], IntStatistics, IntStatistics> collector =
347     *     Collector.of(builder::build,
348     *                  (s, d) -> s.combine(builder.build(d)),
349     *                  IntStatistics::combine);
350     *
351     * // Repeated
352     * int[][] data = ...
353     * IntStatistics stats = Arrays.stream(data).collect(collector);
354     * }</pre>
355     *
356     * @param statistics Statistics to compute.
357     * @return the builder
358     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
359     */
360    public static Builder builder(Statistic... statistics) {
361        if (statistics.length == 0) {
362            throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
363        }
364        final Builder b = new Builder();
365        for (final Statistic s : statistics) {
366            b.add(s);
367        }
368        return b;
369    }
370
371    /**
372     * Updates the state of the statistics to reflect the addition of {@code value}.
373     *
374     * @param value Value.
375     */
376    @Override
377    public void accept(int value) {
378        count++;
379        consumer.accept(value);
380    }
381
382    /**
383     * Return the count of values recorded.
384     *
385     * @return the count of values
386     */
387    public long getCount() {
388        return count;
389    }
390
391    /**
392     * Check if the specified {@code statistic} is supported.
393     *
394     * <p>Note: This method will not return {@code false} if the argument is {@code null}.
395     *
396     * @param statistic Statistic.
397     * @return {@code true} if supported
398     * @throws NullPointerException if the {@code statistic} is {@code null}
399     * @see #getResult(Statistic)
400     */
401    public boolean isSupported(Statistic statistic) {
402        // Check for the appropriate underlying implementation
403        switch (statistic) {
404        case GEOMETRIC_MEAN:
405        case SUM_OF_LOGS:
406            return sumOfLogs != null;
407        case KURTOSIS:
408            return moment instanceof SumOfFourthDeviations;
409        case MAX:
410            return max != null;
411        case MIN:
412            return min != null;
413        case PRODUCT:
414            return product != null;
415        case SKEWNESS:
416            return moment instanceof SumOfCubedDeviations;
417        case STANDARD_DEVIATION:
418        case VARIANCE:
419            return sum != null && sumOfSquares != null;
420        case MEAN:
421        case SUM:
422            return sum != null;
423        case SUM_OF_SQUARES:
424            return sumOfSquares != null;
425        default:
426            return false;
427        }
428    }
429
430    /**
431     * Gets the value of the specified {@code statistic} as a {@code double}.
432     *
433     * @param statistic Statistic.
434     * @return the value
435     * @throws IllegalArgumentException if the {@code statistic} is not supported
436     * @see #isSupported(Statistic)
437     * @see #getResult(Statistic)
438     */
439    public double getAsDouble(Statistic statistic) {
440        return getResult(statistic).getAsDouble();
441    }
442
443    /**
444     * Gets the value of the specified {@code statistic} as an {@code int}.
445     *
446     * <p>Use this method to access the {@code int} result for exact integer statistics,
447     * for example {@link Statistic#MIN}.
448     *
449     * <p>Note: This method may throw an {@link ArithmeticException} if the result
450     * overflows an {@code int}.
451     *
452     * @param statistic Statistic.
453     * @return the value
454     * @throws IllegalArgumentException if the {@code statistic} is not supported
455     * @throws ArithmeticException if the {@code result} overflows an {@code int} or is not
456     * finite
457     * @see #isSupported(Statistic)
458     * @see #getResult(Statistic)
459     */
460    public int getAsInt(Statistic statistic) {
461        return getResult(statistic).getAsInt();
462    }
463
464    /**
465     * Gets the value of the specified {@code statistic} as a {@code long}.
466     *
467     * <p>Use this method to access the {@code long} result for exact integer statistics,
468     * for example {@link Statistic#SUM} for a {@link #getCount() count} less than or equal to
469     *2<sup>32</sup>.
470     *
471     * <p>Note: This method may throw an {@link ArithmeticException} if the result
472     * overflows an {@code long}.
473     *
474     * @param statistic Statistic.
475     * @return the value
476     * @throws IllegalArgumentException if the {@code statistic} is not supported
477     * @throws ArithmeticException if the {@code result} overflows an {@code long} or is not
478     * finite
479     * @see #isSupported(Statistic)
480     * @see #getResult(Statistic)
481     */
482    public long getAsLong(Statistic statistic) {
483        return getResult(statistic).getAsLong();
484    }
485
486    /**
487     * Gets the value of the specified {@code statistic} as a {@code BigInteger}.
488     *
489     * <p>Use this method to access the {@code BigInteger} result for exact integer statistics,
490     * for example {@link Statistic#SUM_OF_SQUARES}.
491     *
492     * <p>Note: This method may throw an {@link ArithmeticException} if the result
493     * is not finite.
494     *
495     * @param statistic Statistic.
496     * @return the value
497     * @throws IllegalArgumentException if the {@code statistic} is not supported
498     * @throws ArithmeticException if the {@code result} is not finite
499     * @see #isSupported(Statistic)
500     * @see #getResult(Statistic)
501     */
502    public BigInteger getAsBigInteger(Statistic statistic) {
503        return getResult(statistic).getAsBigInteger();
504    }
505
506    /**
507     * Gets a supplier for the value of the specified {@code statistic}.
508     *
509     * <p>The returned function will supply the correct result after
510     * calls to {@link #accept(int) accept} or
511     * {@link #combine(IntStatistics) combine} further values into
512     * {@code this} instance.
513     *
514     * <p>This method can be used to perform a one-time look-up of the statistic
515     * function to compute statistics as values are dynamically added.
516     *
517     * @param statistic Statistic.
518     * @return the supplier
519     * @throws IllegalArgumentException if the {@code statistic} is not supported
520     * @see #isSupported(Statistic)
521     * @see #getAsDouble(Statistic)
522     */
523    public StatisticResult getResult(Statistic statistic) {
524        // Locate the implementation.
525        // Statistics that wrap an underlying implementation are created in methods.
526        // The return argument should be an interface reference and not an instance
527        // of IntStatistic. This ensures the statistic implementation cannot
528        // be updated with new values by casting the result and calling accept(int).
529        StatisticResult stat = null;
530        switch (statistic) {
531        case GEOMETRIC_MEAN:
532            stat = getGeometricMean();
533            break;
534        case KURTOSIS:
535            stat = getKurtosis();
536            break;
537        case MAX:
538            stat = Statistics.getResultAsIntOrNull(max);
539            break;
540        case MEAN:
541            stat = getMean();
542            break;
543        case MIN:
544            stat = Statistics.getResultAsIntOrNull(min);
545            break;
546        case PRODUCT:
547            stat = Statistics.getResultAsDoubleOrNull(product);
548            break;
549        case SKEWNESS:
550            stat = getSkewness();
551            break;
552        case STANDARD_DEVIATION:
553            stat = getStandardDeviation();
554            break;
555        case SUM:
556            stat = Statistics.getResultAsBigIntegerOrNull(sum);
557            break;
558        case SUM_OF_LOGS:
559            stat = Statistics.getResultAsDoubleOrNull(sumOfLogs);
560            break;
561        case SUM_OF_SQUARES:
562            stat = Statistics.getResultAsBigIntegerOrNull(sumOfSquares);
563            break;
564        case VARIANCE:
565            stat = getVariance();
566            break;
567        default:
568            break;
569        }
570        if (stat != null) {
571            return stat;
572        }
573        throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
574    }
575
576    /**
577     * Gets the geometric mean.
578     *
579     * @return a geometric mean supplier (or null if unsupported)
580     */
581    private StatisticResult getGeometricMean() {
582        if (sumOfLogs != null) {
583            // Return a function that has access to the count and sumOfLogs
584            return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
585        }
586        return null;
587    }
588
589    /**
590     * Gets the kurtosis.
591     *
592     * @return a kurtosis supplier (or null if unsupported)
593     */
594    private StatisticResult getKurtosis() {
595        if (moment instanceof SumOfFourthDeviations) {
596            return new Kurtosis((SumOfFourthDeviations) moment)
597                .setBiased(config.isBiased())::getAsDouble;
598        }
599        return null;
600    }
601
602    /**
603     * Gets the mean.
604     *
605     * @return a mean supplier (or null if unsupported)
606     */
607    private StatisticResult getMean() {
608        if (sum != null) {
609            // Return a function that has access to the count and sum
610            final Int128 s = sum.getSum();
611            return () -> IntMean.computeMean(s, count);
612        }
613        return null;
614    }
615
616    /**
617     * Gets the skewness.
618     *
619     * @return a skewness supplier (or null if unsupported)
620     */
621    private StatisticResult getSkewness() {
622        if (moment instanceof SumOfCubedDeviations) {
623            return new Skewness((SumOfCubedDeviations) moment)
624                .setBiased(config.isBiased())::getAsDouble;
625        }
626        return null;
627    }
628
629    /**
630     * Gets the standard deviation.
631     *
632     * @return a standard deviation supplier (or null if unsupported)
633     */
634    private StatisticResult getStandardDeviation() {
635        return getVarianceOrStd(true);
636    }
637
638    /**
639     * Gets the variance.
640     *
641     * @return a variance supplier (or null if unsupported)
642     */
643    private StatisticResult getVariance() {
644        return getVarianceOrStd(false);
645    }
646
647    /**
648     * Gets the variance or standard deviation.
649     *
650     * @param std Flag to control if the statistic is the standard deviation.
651     * @return a variance/standard deviation supplier (or null if unsupported)
652     */
653    private StatisticResult getVarianceOrStd(boolean std) {
654        if (sum != null && sumOfSquares != null) {
655            // Return a function that has access to the count, sum and sum of squares
656            final Int128 s = sum.getSum();
657            final UInt128 ss = sumOfSquares.getSumOfSquares();
658            final boolean biased = config.isBiased();
659            return () -> IntVariance.computeVarianceOrStd(ss, s, count, biased, std);
660        }
661        return null;
662    }
663
664    /**
665     * Combines the state of the {@code other} statistics into this one.
666     * Only {@code this} instance is modified by the {@code combine} operation.
667     *
668     * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
669     * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
670     * all values of the {@link Statistic} enum which are supported by {@code this}
671     * instance.
672     *
673     * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
674     * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
675     * instance is not compatible then an exception is raised before any state is modified.
676     *
677     * @param other Another set of statistics to be combined.
678     * @return {@code this} instance after combining {@code other}.
679     * @throws IllegalArgumentException if the {@code other} is not compatible
680     */
681    public IntStatistics combine(IntStatistics other) {
682        // Check compatibility
683        Statistics.checkCombineCompatible(min, other.min);
684        Statistics.checkCombineCompatible(max, other.max);
685        Statistics.checkCombineCompatible(sum, other.sum);
686        Statistics.checkCombineCompatible(product, other.product);
687        Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
688        Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
689        Statistics.checkCombineAssignable(moment, other.moment);
690        // Combine
691        count += other.count;
692        Statistics.combine(min, other.min);
693        Statistics.combine(max, other.max);
694        Statistics.combine(sum, other.sum);
695        Statistics.combine(product, other.product);
696        Statistics.combine(sumOfSquares, other.sumOfSquares);
697        Statistics.combine(sumOfLogs, other.sumOfLogs);
698        Statistics.combineMoment(moment, other.moment);
699        return this;
700    }
701
702    /**
703     * Sets the statistics configuration.
704     *
705     * <p>These options only control the final computation of statistics. The configuration
706     * will not affect compatibility between instances during a
707     * {@link #combine(IntStatistics) combine} operation.
708     *
709     * <p>Note: These options will affect any future computation of statistics. Supplier functions
710     * that have been previously created will not be updated with the new configuration.
711     *
712     * @param v Value.
713     * @return {@code this} instance
714     * @throws NullPointerException if the value is null
715     * @see #getResult(Statistic)
716     */
717    public IntStatistics setConfiguration(StatisticsConfiguration v) {
718        config = Objects.requireNonNull(v);
719        return this;
720    }
721}