View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  import java.math.BigInteger;
20  import java.util.Objects;
21  import java.util.Set;
22  import java.util.function.DoubleConsumer;
23  import java.util.function.Function;
24  import java.util.function.IntConsumer;
25  
26  /**
27   * Statistics for {@code int} values.
28   *
29   * <p>This class provides combinations of individual statistic implementations in the
30   * {@code org.apache.commons.statistics.descriptive} package.
31   *
32   * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
33   * This implementation does not check for overflow of the count.
34   *
35   * @since 1.1
36   */
37  public final class IntStatistics implements IntConsumer {
38      /** Error message for non configured statistics. */
39      private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
40      /** Error message for an unsupported statistic. */
41      private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";
42  
43      /** Count of values recorded. */
44      private long count;
45      /** The consumer of values. */
46      private final IntConsumer consumer;
47      /** The {@link IntMin} implementation. */
48      private final IntMin min;
49      /** The {@link IntMax} implementation. */
50      private final IntMax max;
51      /** The moment implementation. May be any instance of {@link FirstMoment}.
52       * This implementation uses only the third and fourth moments. */
53      private final FirstMoment moment;
54      /** The {@link IntSum} implementation. */
55      private final IntSum sum;
56      /** The {@link Product} implementation. */
57      private final Product product;
58      /** The {@link IntSumOfSquares} implementation. */
59      private final IntSumOfSquares sumOfSquares;
60      /** The {@link SumOfLogs} implementation. */
61      private final SumOfLogs sumOfLogs;
62      /** Configuration options for computation of statistics. */
63      private StatisticsConfiguration config;
64  
65      /**
66       * A builder for {@link IntStatistics}.
67       */
68      public static final class Builder {
69          /** An empty double array. */
70          private static final int[] NO_VALUES = {};
71  
72          /** The {@link IntMin} constructor. */
73          private Function<int[], IntMin> min;
74          /** The {@link IntMax} constructor. */
75          private Function<int[], IntMax> max;
76          /** The moment constructor. May return any instance of {@link FirstMoment}. */
77          private Function<int[], FirstMoment> moment;
78          /** The {@link IntSum} constructor. */
79          private Function<int[], IntSum> sum;
80          /** The {@link Product} constructor. */
81          private Function<int[], Product> product;
82          /** The {@link IntSumOfSquares} constructor. */
83          private Function<int[], IntSumOfSquares> sumOfSquares;
84          /** The {@link SumOfLogs} constructor. */
85          private Function<int[], SumOfLogs> sumOfLogs;
86          /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
87           * instance constructed by {@link #moment}. This should only be increased from the default
88           * of zero (corresponding to no moment computation). */
89          private int momentOrder;
90          /** Configuration options for computation of statistics. */
91          private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();
92  
93          /**
94           * Create an instance.
95           */
96          Builder() {
97              // Do nothing
98          }
99  
100         /**
101          * Add the statistic to the statistics to compute.
102          *
103          * @param statistic Statistic to compute.
104          * @return {@code this} instance
105          */
106         Builder add(Statistic statistic) {
107             switch (statistic) {
108             case GEOMETRIC_MEAN:
109             case SUM_OF_LOGS:
110                 sumOfLogs = SumOfLogs::of;
111                 break;
112             case KURTOSIS:
113                 createMoment(4);
114                 break;
115             case MAX:
116                 max = IntMax::of;
117                 break;
118             case MIN:
119                 min = IntMin::of;
120                 break;
121             case PRODUCT:
122                 product = Product::of;
123                 break;
124             case SKEWNESS:
125                 createMoment(3);
126                 break;
127             case STANDARD_DEVIATION:
128             case VARIANCE:
129                 sum = IntSum::of;
130                 sumOfSquares = IntSumOfSquares::of;
131                 break;
132             case MEAN:
133             case SUM:
134                 sum = IntSum::of;
135                 break;
136             case SUM_OF_SQUARES:
137                 sumOfSquares = IntSumOfSquares::of;
138                 break;
139             default:
140                 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
141             }
142             return this;
143         }
144 
145         /**
146          * Creates the moment constructor for the specified {@code order},
147          * e.g. order=3 is sum of cubed deviations.
148          *
149          * @param order Order.
150          */
151         private void createMoment(int order) {
152             if (order > momentOrder) {
153                 momentOrder = order;
154                 if (order == 4) {
155                     moment = SumOfFourthDeviations::of;
156                 } else {
157                     // Assume order == 3
158                     moment = SumOfCubedDeviations::of;
159                 }
160             }
161         }
162 
163         /**
164          * Sets the statistics configuration options for computation of statistics.
165          *
166          * @param v Value.
167          * @return the builder
168          * @throws NullPointerException if the value is null
169          */
170         public Builder setConfiguration(StatisticsConfiguration v) {
171             config = Objects.requireNonNull(v);
172             return this;
173         }
174 
175         /**
176          * Builds a {@code IntStatistics} instance.
177          *
178          * @return {@code IntStatistics} instance.
179          */
180         public IntStatistics build() {
181             return build(NO_VALUES);
182         }
183 
184         /**
185          * Builds a {@code IntStatistics} instance using the input {@code values}.
186          *
187          * <p>Note: {@code IntStatistics} computed using
188          * {@link IntStatistics#accept(int) accept} may be
189          * different from this instance.
190          *
191          * @param values Values.
192          * @return {@code IntStatistics} instance.
193          */
194         public IntStatistics build(int... values) {
195             Objects.requireNonNull(values, "values");
196             return new IntStatistics(
197                 values.length,
198                 create(min, values),
199                 create(max, values),
200                 create(moment, values),
201                 create(sum, values),
202                 create(product, values),
203                 create(sumOfSquares, values),
204                 create(sumOfLogs, values),
205                 config);
206         }
207 
208         /**
209          * Creates the object from the {@code values}.
210          *
211          * @param <T> object type
212          * @param constructor Constructor.
213          * @param values Values
214          * @return the instance
215          */
216         private static <T> T create(Function<int[], T> constructor, int[] values) {
217             if (constructor != null) {
218                 return constructor.apply(values);
219             }
220             return null;
221         }
222     }
223 
224     /**
225      * Create an instance.
226      *
227      * @param count Count of values.
228      * @param min IntMin implementation.
229      * @param max IntMax implementation.
230      * @param moment Moment implementation.
231      * @param sum IntSum implementation.
232      * @param product Product implementation.
233      * @param sumOfSquares Sum of squares implementation.
234      * @param sumOfLogs Sum of logs implementation.
235      * @param config Statistics configuration.
236      */
237     IntStatistics(long count, IntMin min, IntMax max, FirstMoment moment, IntSum sum,
238                   Product product, IntSumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
239                   StatisticsConfiguration config) {
240         this.count = count;
241         this.min = min;
242         this.max = max;
243         this.moment = moment;
244         this.sum = sum;
245         this.product = product;
246         this.sumOfSquares = sumOfSquares;
247         this.sumOfLogs = sumOfLogs;
248         this.config = config;
249         // The final consumer should never be null as the builder is created
250         // with at least one statistic.
251         consumer = Statistics.compose(min, max, sum, sumOfSquares,
252                                       composeAsInt(moment, product, sumOfLogs));
253     }
254 
255     /**
256      * Chain the {@code consumers} into a single composite {@code IntConsumer}.
257      * Ignore any {@code null} consumer.
258      *
259      * @param consumers Consumers.
260      * @return a composed consumer (or null)
261      */
262     private static IntConsumer composeAsInt(DoubleConsumer... consumers) {
263         final DoubleConsumer c = Statistics.compose(consumers);
264         if (c != null) {
265             return c::accept;
266         }
267         return null;
268     }
269 
270     /**
271      * Returns a new instance configured to compute the specified {@code statistics}.
272      *
273      * <p>The statistics will be empty and so will return the default values for each
274      * computed statistic.
275      *
276      * @param statistics Statistics to compute.
277      * @return the instance
278      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
279      */
280     public static IntStatistics of(Statistic... statistics) {
281         return builder(statistics).build();
282     }
283 
284     /**
285      * Returns a new instance configured to compute the specified {@code statistics}
286      * populated using the input {@code values}.
287      *
288      * <p>Use this method to create an instance populated with a (variable) array of
289      * {@code int[]} data:
290      *
291      * <pre>
292      * IntStatistics stats = IntStatistics.of(
293      *     EnumSet.of(Statistic.MIN, Statistic.MAX),
294      *     1, 1, 2, 3, 5, 8, 13);
295      * </pre>
296      *
297      * @param statistics Statistics to compute.
298      * @param values Values.
299      * @return the instance
300      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
301      */
302     public static IntStatistics of(Set<Statistic> statistics, int... values) {
303         if (statistics.isEmpty()) {
304             throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
305         }
306         final Builder b = new Builder();
307         statistics.forEach(b::add);
308         return b.build(values);
309     }
310 
311     /**
312      * Returns a new builder configured to create instances to compute the specified
313      * {@code statistics}.
314      *
315      * <p>Use this method to create an instance populated with an array of {@code int[]}
316      * data using the {@link Builder#build(int...)} method:
317      *
318      * <pre>
319      * int[] data = ...
320      * IntStatistics stats = IntStatistics.builder(
321      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
322      *     .build(data);
323      * </pre>
324      *
325      * <p>The builder can be used to create multiple instances of {@link IntStatistics}
326      * to be used in parallel, or on separate arrays of {@code int[]} data. These may
327      * be {@link #combine(IntStatistics) combined}. For example:
328      *
329      * <pre>
330      * int[][] data = ...
331      * IntStatistics.Builder builder = IntStatistics.builder(
332      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
333      * IntStatistics stats = Arrays.stream(data)
334      *     .parallel()
335      *     .map(builder::build)
336      *     .reduce(IntStatistics::combine)
337      *     .get();
338      * </pre>
339      *
340      * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
341      * use on multiple data:
342      *
343      * <pre>{@code
344      * IntStatistics.Builder builder = IntStatistics.builder(
345      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
346      * Collector<int[], IntStatistics, IntStatistics> collector =
347      *     Collector.of(builder::build,
348      *                  (s, d) -> s.combine(builder.build(d)),
349      *                  IntStatistics::combine);
350      *
351      * // Repeated
352      * int[][] data = ...
353      * IntStatistics stats = Arrays.stream(data).collect(collector);
354      * }</pre>
355      *
356      * @param statistics Statistics to compute.
357      * @return the builder
358      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
359      */
360     public static Builder builder(Statistic... statistics) {
361         if (statistics.length == 0) {
362             throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
363         }
364         final Builder b = new Builder();
365         for (final Statistic s : statistics) {
366             b.add(s);
367         }
368         return b;
369     }
370 
371     /**
372      * Updates the state of the statistics to reflect the addition of {@code value}.
373      *
374      * @param value Value.
375      */
376     @Override
377     public void accept(int value) {
378         count++;
379         consumer.accept(value);
380     }
381 
382     /**
383      * Return the count of values recorded.
384      *
385      * @return the count of values
386      */
387     public long getCount() {
388         return count;
389     }
390 
391     /**
392      * Check if the specified {@code statistic} is supported.
393      *
394      * <p>Note: This method will not return {@code false} if the argument is {@code null}.
395      *
396      * @param statistic Statistic.
397      * @return {@code true} if supported
398      * @throws NullPointerException if the {@code statistic} is {@code null}
399      * @see #getResult(Statistic)
400      */
401     public boolean isSupported(Statistic statistic) {
402         // Check for the appropriate underlying implementation
403         switch (statistic) {
404         case GEOMETRIC_MEAN:
405         case SUM_OF_LOGS:
406             return sumOfLogs != null;
407         case KURTOSIS:
408             return moment instanceof SumOfFourthDeviations;
409         case MAX:
410             return max != null;
411         case MIN:
412             return min != null;
413         case PRODUCT:
414             return product != null;
415         case SKEWNESS:
416             return moment instanceof SumOfCubedDeviations;
417         case STANDARD_DEVIATION:
418         case VARIANCE:
419             return sum != null && sumOfSquares != null;
420         case MEAN:
421         case SUM:
422             return sum != null;
423         case SUM_OF_SQUARES:
424             return sumOfSquares != null;
425         default:
426             return false;
427         }
428     }
429 
430     /**
431      * Gets the value of the specified {@code statistic} as a {@code double}.
432      *
433      * @param statistic Statistic.
434      * @return the value
435      * @throws IllegalArgumentException if the {@code statistic} is not supported
436      * @see #isSupported(Statistic)
437      * @see #getResult(Statistic)
438      */
439     public double getAsDouble(Statistic statistic) {
440         return getResult(statistic).getAsDouble();
441     }
442 
443     /**
444      * Gets the value of the specified {@code statistic} as an {@code int}.
445      *
446      * <p>Use this method to access the {@code int} result for exact integer statistics,
447      * for example {@link Statistic#MIN}.
448      *
449      * <p>Note: This method may throw an {@link ArithmeticException} if the result
450      * overflows an {@code int}.
451      *
452      * @param statistic Statistic.
453      * @return the value
454      * @throws IllegalArgumentException if the {@code statistic} is not supported
455      * @throws ArithmeticException if the {@code result} overflows an {@code int} or is not
456      * finite
457      * @see #isSupported(Statistic)
458      * @see #getResult(Statistic)
459      */
460     public int getAsInt(Statistic statistic) {
461         return getResult(statistic).getAsInt();
462     }
463 
464     /**
465      * Gets the value of the specified {@code statistic} as a {@code long}.
466      *
467      * <p>Use this method to access the {@code long} result for exact integer statistics,
468      * for example {@link Statistic#SUM} for a {@link #getCount() count} less than or equal to
469      *2<sup>32</sup>.
470      *
471      * <p>Note: This method may throw an {@link ArithmeticException} if the result
472      * overflows an {@code long}.
473      *
474      * @param statistic Statistic.
475      * @return the value
476      * @throws IllegalArgumentException if the {@code statistic} is not supported
477      * @throws ArithmeticException if the {@code result} overflows an {@code long} or is not
478      * finite
479      * @see #isSupported(Statistic)
480      * @see #getResult(Statistic)
481      */
482     public long getAsLong(Statistic statistic) {
483         return getResult(statistic).getAsLong();
484     }
485 
486     /**
487      * Gets the value of the specified {@code statistic} as a {@code BigInteger}.
488      *
489      * <p>Use this method to access the {@code BigInteger} result for exact integer statistics,
490      * for example {@link Statistic#SUM_OF_SQUARES}.
491      *
492      * <p>Note: This method may throw an {@link ArithmeticException} if the result
493      * is not finite.
494      *
495      * @param statistic Statistic.
496      * @return the value
497      * @throws IllegalArgumentException if the {@code statistic} is not supported
498      * @throws ArithmeticException if the {@code result} is not finite
499      * @see #isSupported(Statistic)
500      * @see #getResult(Statistic)
501      */
502     public BigInteger getAsBigInteger(Statistic statistic) {
503         return getResult(statistic).getAsBigInteger();
504     }
505 
506     /**
507      * Gets a supplier for the value of the specified {@code statistic}.
508      *
509      * <p>The returned function will supply the correct result after
510      * calls to {@link #accept(int) accept} or
511      * {@link #combine(IntStatistics) combine} further values into
512      * {@code this} instance.
513      *
514      * <p>This method can be used to perform a one-time look-up of the statistic
515      * function to compute statistics as values are dynamically added.
516      *
517      * @param statistic Statistic.
518      * @return the supplier
519      * @throws IllegalArgumentException if the {@code statistic} is not supported
520      * @see #isSupported(Statistic)
521      * @see #getAsDouble(Statistic)
522      */
523     public StatisticResult getResult(Statistic statistic) {
524         // Locate the implementation.
525         // Statistics that wrap an underlying implementation are created in methods.
526         // The return argument should be an interface reference and not an instance
527         // of IntStatistic. This ensures the statistic implementation cannot
528         // be updated with new values by casting the result and calling accept(int).
529         StatisticResult stat = null;
530         switch (statistic) {
531         case GEOMETRIC_MEAN:
532             stat = getGeometricMean();
533             break;
534         case KURTOSIS:
535             stat = getKurtosis();
536             break;
537         case MAX:
538             stat = Statistics.getResultAsIntOrNull(max);
539             break;
540         case MEAN:
541             stat = getMean();
542             break;
543         case MIN:
544             stat = Statistics.getResultAsIntOrNull(min);
545             break;
546         case PRODUCT:
547             stat = Statistics.getResultAsDoubleOrNull(product);
548             break;
549         case SKEWNESS:
550             stat = getSkewness();
551             break;
552         case STANDARD_DEVIATION:
553             stat = getStandardDeviation();
554             break;
555         case SUM:
556             stat = Statistics.getResultAsBigIntegerOrNull(sum);
557             break;
558         case SUM_OF_LOGS:
559             stat = Statistics.getResultAsDoubleOrNull(sumOfLogs);
560             break;
561         case SUM_OF_SQUARES:
562             stat = Statistics.getResultAsBigIntegerOrNull(sumOfSquares);
563             break;
564         case VARIANCE:
565             stat = getVariance();
566             break;
567         default:
568             break;
569         }
570         if (stat != null) {
571             return stat;
572         }
573         throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
574     }
575 
576     /**
577      * Gets the geometric mean.
578      *
579      * @return a geometric mean supplier (or null if unsupported)
580      */
581     private StatisticResult getGeometricMean() {
582         if (sumOfLogs != null) {
583             // Return a function that has access to the count and sumOfLogs
584             return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
585         }
586         return null;
587     }
588 
589     /**
590      * Gets the kurtosis.
591      *
592      * @return a kurtosis supplier (or null if unsupported)
593      */
594     private StatisticResult getKurtosis() {
595         if (moment instanceof SumOfFourthDeviations) {
596             return new Kurtosis((SumOfFourthDeviations) moment)
597                 .setBiased(config.isBiased())::getAsDouble;
598         }
599         return null;
600     }
601 
602     /**
603      * Gets the mean.
604      *
605      * @return a mean supplier (or null if unsupported)
606      */
607     private StatisticResult getMean() {
608         if (sum != null) {
609             // Return a function that has access to the count and sum
610             final Int128 s = sum.getSum();
611             return () -> IntMean.computeMean(s, count);
612         }
613         return null;
614     }
615 
616     /**
617      * Gets the skewness.
618      *
619      * @return a skewness supplier (or null if unsupported)
620      */
621     private StatisticResult getSkewness() {
622         if (moment instanceof SumOfCubedDeviations) {
623             return new Skewness((SumOfCubedDeviations) moment)
624                 .setBiased(config.isBiased())::getAsDouble;
625         }
626         return null;
627     }
628 
629     /**
630      * Gets the standard deviation.
631      *
632      * @return a standard deviation supplier (or null if unsupported)
633      */
634     private StatisticResult getStandardDeviation() {
635         return getVarianceOrStd(true);
636     }
637 
638     /**
639      * Gets the variance.
640      *
641      * @return a variance supplier (or null if unsupported)
642      */
643     private StatisticResult getVariance() {
644         return getVarianceOrStd(false);
645     }
646 
647     /**
648      * Gets the variance or standard deviation.
649      *
650      * @param std Flag to control if the statistic is the standard deviation.
651      * @return a variance/standard deviation supplier (or null if unsupported)
652      */
653     private StatisticResult getVarianceOrStd(boolean std) {
654         if (sum != null && sumOfSquares != null) {
655             // Return a function that has access to the count, sum and sum of squares
656             final Int128 s = sum.getSum();
657             final UInt128 ss = sumOfSquares.getSumOfSquares();
658             final boolean biased = config.isBiased();
659             return () -> IntVariance.computeVarianceOrStd(ss, s, count, biased, std);
660         }
661         return null;
662     }
663 
664     /**
665      * Combines the state of the {@code other} statistics into this one.
666      * Only {@code this} instance is modified by the {@code combine} operation.
667      *
668      * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
669      * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
670      * all values of the {@link Statistic} enum which are supported by {@code this}
671      * instance.
672      *
673      * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
674      * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
675      * instance is not compatible then an exception is raised before any state is modified.
676      *
677      * @param other Another set of statistics to be combined.
678      * @return {@code this} instance after combining {@code other}.
679      * @throws IllegalArgumentException if the {@code other} is not compatible
680      */
681     public IntStatistics combine(IntStatistics other) {
682         // Check compatibility
683         Statistics.checkCombineCompatible(min, other.min);
684         Statistics.checkCombineCompatible(max, other.max);
685         Statistics.checkCombineCompatible(sum, other.sum);
686         Statistics.checkCombineCompatible(product, other.product);
687         Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
688         Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
689         Statistics.checkCombineAssignable(moment, other.moment);
690         // Combine
691         count += other.count;
692         Statistics.combine(min, other.min);
693         Statistics.combine(max, other.max);
694         Statistics.combine(sum, other.sum);
695         Statistics.combine(product, other.product);
696         Statistics.combine(sumOfSquares, other.sumOfSquares);
697         Statistics.combine(sumOfLogs, other.sumOfLogs);
698         Statistics.combineMoment(moment, other.moment);
699         return this;
700     }
701 
702     /**
703      * Sets the statistics configuration.
704      *
705      * <p>These options only control the final computation of statistics. The configuration
706      * will not affect compatibility between instances during a
707      * {@link #combine(IntStatistics) combine} operation.
708      *
709      * <p>Note: These options will affect any future computation of statistics. Supplier functions
710      * that have been previously created will not be updated with the new configuration.
711      *
712      * @param v Value.
713      * @return {@code this} instance
714      * @throws NullPointerException if the value is null
715      * @see #getResult(Statistic)
716      */
717     public IntStatistics setConfiguration(StatisticsConfiguration v) {
718         config = Objects.requireNonNull(v);
719         return this;
720     }
721 }