001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.statistics.descriptive;
018
019import java.util.Objects;
020import java.util.Set;
021import java.util.function.BiFunction;
022import java.util.function.DoubleConsumer;
023import java.util.function.Function;
024
025/**
026 * Statistics for {@code double} values.
027 *
028 * <p>This class provides combinations of individual statistic implementations in the
029 * {@code org.apache.commons.statistics.descriptive} package.
030 *
031 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
032 * This implementation does not check for overflow of the count.
033 *
034 * @since 1.1
035 */
036public final class DoubleStatistics implements DoubleConsumer {
037    /** Error message for non configured statistics. */
038    private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
039    /** Error message for an unsupported statistic. */
040    private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";
041
042    /** Count of values recorded. */
043    private long count;
044    /** The consumer of values. */
045    private final DoubleConsumer consumer;
046    /** The {@link Min} implementation. */
047    private final Min min;
048    /** The {@link Max} implementation. */
049    private final Max max;
050    /** The moment implementation. May be any instance of {@link FirstMoment}. */
051    private final FirstMoment moment;
052    /** The {@link Sum} implementation. */
053    private final Sum sum;
054    /** The {@link Product} implementation. */
055    private final Product product;
056    /** The {@link SumOfSquares} implementation. */
057    private final SumOfSquares sumOfSquares;
058    /** The {@link SumOfLogs} implementation. */
059    private final SumOfLogs sumOfLogs;
060    /** Configuration options for computation of statistics. */
061    private StatisticsConfiguration config;
062
063    /**
064     * A builder for {@link DoubleStatistics}.
065     */
066    public static final class Builder {
067        /** An empty double array. */
068        private static final double[] NO_VALUES = {};
069
070        /** The {@link Min} constructor. */
071        private Function<double[], Min> min;
072        /** The {@link Max} constructor. */
073        private Function<double[], Max> max;
074        /** The moment constructor. May return any instance of {@link FirstMoment}. */
075        private BiFunction<org.apache.commons.numbers.core.Sum, double[], FirstMoment> moment;
076        /** The {@link Sum} constructor. */
077        private Function<org.apache.commons.numbers.core.Sum, Sum> sum;
078        /** The {@link Product} constructor. */
079        private Function<double[], Product> product;
080        /** The {@link SumOfSquares} constructor. */
081        private Function<double[], SumOfSquares> sumOfSquares;
082        /** The {@link SumOfLogs} constructor. */
083        private Function<double[], SumOfLogs> sumOfLogs;
084        /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
085         * instance constructed by {@link #moment}. This should only be increased from the default
086         * of zero (corresponding to no moment computation). */
087        private int momentOrder;
088        /** Configuration options for computation of statistics. */
089        private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();
090
091        /**
092         * Create an instance.
093         */
094        Builder() {
095            // Do nothing
096        }
097
098        /**
099         * Add the statistic to the statistics to compute.
100         *
101         * @param statistic Statistic to compute.
102         * @return {@code this} instance
103         */
104        Builder add(Statistic statistic) {
105            switch (statistic) {
106            case GEOMETRIC_MEAN:
107            case SUM_OF_LOGS:
108                sumOfLogs = SumOfLogs::of;
109                break;
110            case KURTOSIS:
111                createMoment(4);
112                break;
113            case MAX:
114                max = Max::of;
115                break;
116            case MEAN:
117                createMoment(1);
118                break;
119            case MIN:
120                min = Min::of;
121                break;
122            case PRODUCT:
123                product = Product::of;
124                break;
125            case SKEWNESS:
126                createMoment(3);
127                break;
128            case STANDARD_DEVIATION:
129            case VARIANCE:
130                createMoment(2);
131                break;
132            case SUM:
133                sum = Sum::new;
134                break;
135            case SUM_OF_SQUARES:
136                sumOfSquares = SumOfSquares::of;
137                break;
138            default:
139                throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
140            }
141            return this;
142        }
143
144        /**
145         * Creates the moment constructor for the specified {@code order},
146         * e.g. order=2 is sum of squared deviations.
147         *
148         * @param order Order.
149         */
150        private void createMoment(int order) {
151            if (order > momentOrder) {
152                momentOrder = order;
153                if (order == 4) {
154                    moment = SumOfFourthDeviations::create;
155                } else if (order == 3) {
156                    moment = SumOfCubedDeviations::create;
157                } else if (order == 2) {
158                    moment = SumOfSquaredDeviations::create;
159                } else {
160                    // Assume order == 1
161                    moment = FirstMoment::create;
162                }
163            }
164        }
165
166        /**
167         * Sets the statistics configuration options for computation of statistics.
168         *
169         * @param v Value.
170         * @return the builder
171         * @throws NullPointerException if the value is null
172         */
173        public Builder setConfiguration(StatisticsConfiguration v) {
174            config = Objects.requireNonNull(v);
175            return this;
176        }
177
178        /**
179         * Builds a {@code DoubleStatistics} instance.
180         *
181         * @return {@code DoubleStatistics} instance.
182         */
183        public DoubleStatistics build() {
184            return build(NO_VALUES);
185        }
186
187        /**
188         * Builds a {@code DoubleStatistics} instance using the input {@code values}.
189         *
190         * <p>Note: {@code DoubleStatistics} computed using
191         * {@link DoubleStatistics#accept(double) accept} may be
192         * different from this instance.
193         *
194         * @param values Values.
195         * @return {@code DoubleStatistics} instance.
196         */
197        public DoubleStatistics build(double... values) {
198            Objects.requireNonNull(values, "values");
199            // Create related statistics
200            FirstMoment m = null;
201            Sum sumStat = null;
202            if (moment != null || sum != null) {
203                final org.apache.commons.numbers.core.Sum s =
204                    org.apache.commons.numbers.core.Sum.of(values);
205                m = create(moment, s, values);
206                sumStat = create(sum, s);
207            }
208            return new DoubleStatistics(
209                values.length,
210                create(min, values),
211                create(max, values),
212                m,
213                sumStat,
214                create(product, values),
215                create(sumOfSquares, values),
216                create(sumOfLogs, values),
217                config);
218        }
219
220        /**
221         * Creates the object from the {@code values}.
222         *
223         * @param <S> value type
224         * @param <T> object type
225         * @param constructor Constructor.
226         * @param values Values
227         * @return the instance
228         */
229        private static <S, T> T create(Function<S, T> constructor, S values) {
230            if (constructor != null) {
231                return constructor.apply(values);
232            }
233            return null;
234        }
235
236        /**
237         * Creates the object from the values {@code r} and {@code s}.
238         *
239         * @param <R> value type
240         * @param <S> value type
241         * @param <T> object type
242         * @param constructor Constructor.
243         * @param r Value.
244         * @param s Value.
245         * @return the instance
246         */
247        private static <R, S, T> T create(BiFunction<R, S, T> constructor, R r, S s) {
248            if (constructor != null) {
249                return constructor.apply(r, s);
250            }
251            return null;
252        }
253    }
254
255    /**
256     * Create an instance.
257     *
258     * @param count Count of values.
259     * @param min Min implementation.
260     * @param max Max implementation.
261     * @param moment Moment implementation.
262     * @param sum Sum implementation.
263     * @param product Product implementation.
264     * @param sumOfSquares Sum of squares implementation.
265     * @param sumOfLogs Sum of logs implementation.
266     * @param config Statistics configuration.
267     */
268    DoubleStatistics(long count, Min min, Max max, FirstMoment moment, Sum sum,
269                     Product product, SumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
270                     StatisticsConfiguration config) {
271        this.count = count;
272        this.min = min;
273        this.max = max;
274        this.moment = moment;
275        this.sum = sum;
276        this.product = product;
277        this.sumOfSquares = sumOfSquares;
278        this.sumOfLogs = sumOfLogs;
279        this.config = config;
280        consumer = Statistics.compose(min, max, moment, sum, product, sumOfSquares, sumOfLogs);
281    }
282
283    /**
284     * Returns a new instance configured to compute the specified {@code statistics}.
285     *
286     * <p>The statistics will be empty and so will return the default values for each
287     * computed statistic.
288     *
289     * @param statistics Statistics to compute.
290     * @return the instance
291     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
292     */
293    public static DoubleStatistics of(Statistic... statistics) {
294        return builder(statistics).build();
295    }
296
297    /**
298     * Returns a new instance configured to compute the specified {@code statistics}
299     * populated using the input {@code values}.
300     *
301     * <p>Use this method to create an instance populated with a (variable) array of
302     * {@code double[]} data:
303     *
304     * <pre>
305     * DoubleStatistics stats = DoubleStatistics.of(
306     *     EnumSet.of(Statistic.MIN, Statistic.MAX),
307     *     1, 1, 2, 3, 5, 8, 13);
308     * </pre>
309     *
310     * @param statistics Statistics to compute.
311     * @param values Values.
312     * @return the instance
313     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
314     */
315    public static DoubleStatistics of(Set<Statistic> statistics, double... values) {
316        if (statistics.isEmpty()) {
317            throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
318        }
319        final Builder b = new Builder();
320        statistics.forEach(b::add);
321        return b.build(values);
322    }
323
324    /**
325     * Returns a new builder configured to create instances to compute the specified
326     * {@code statistics}.
327     *
328     * <p>Use this method to create an instance populated with an array of {@code double[]}
329     * data using the {@link Builder#build(double...)} method:
330     *
331     * <pre>
332     * double[] data = ...
333     * DoubleStatistics stats = DoubleStatistics.builder(
334     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
335     *     .build(data);
336     * </pre>
337     *
338     * <p>The builder can be used to create multiple instances of {@link DoubleStatistics}
339     * to be used in parallel, or on separate arrays of {@code double[]} data. These may
340     * be {@link #combine(DoubleStatistics) combined}. For example:
341     *
342     * <pre>
343     * double[][] data = ...
344     * DoubleStatistics.Builder builder = DoubleStatistics.builder(
345     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
346     * DoubleStatistics stats = Arrays.stream(data)
347     *     .parallel()
348     *     .map(builder::build)
349     *     .reduce(DoubleStatistics::combine)
350     *     .get();
351     * </pre>
352     *
353     * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
354     * use on multiple data:
355     *
356     * <pre>{@code
357     * DoubleStatistics.Builder builder = DoubleStatistics.builder(
358     *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
359     * Collector<double[], DoubleStatistics, DoubleStatistics> collector =
360     *     Collector.of(builder::build,
361     *                  (s, d) -> s.combine(builder.build(d)),
362     *                  DoubleStatistics::combine);
363     *
364     * // Repeated
365     * double[][] data = ...
366     * DoubleStatistics stats = Arrays.stream(data).collect(collector);
367     * }</pre>
368     *
369     * @param statistics Statistics to compute.
370     * @return the builder
371     * @throws IllegalArgumentException if there are no {@code statistics} to compute.
372     */
373    public static Builder builder(Statistic... statistics) {
374        if (statistics.length == 0) {
375            throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
376        }
377        final Builder b = new Builder();
378        for (final Statistic s : statistics) {
379            b.add(s);
380        }
381        return b;
382    }
383
384    /**
385     * Updates the state of the statistics to reflect the addition of {@code value}.
386     *
387     * @param value Value.
388     */
389    @Override
390    public void accept(double value) {
391        count++;
392        consumer.accept(value);
393    }
394
395    /**
396     * Return the count of values recorded.
397     *
398     * @return the count of values
399     */
400    public long getCount() {
401        return count;
402    }
403
404    /**
405     * Check if the specified {@code statistic} is supported.
406     *
407     * <p>Note: This method will not return {@code false} if the argument is {@code null}.
408     *
409     * @param statistic Statistic.
410     * @return {@code true} if supported
411     * @throws NullPointerException if the {@code statistic} is {@code null}
412     * @see #getAsDouble(Statistic)
413     */
414    public boolean isSupported(Statistic statistic) {
415        // Check for the appropriate underlying implementation
416        switch (statistic) {
417        case GEOMETRIC_MEAN:
418        case SUM_OF_LOGS:
419            return sumOfLogs != null;
420        case KURTOSIS:
421            return moment instanceof SumOfFourthDeviations;
422        case MAX:
423            return max != null;
424        case MEAN:
425            return moment != null;
426        case MIN:
427            return min != null;
428        case PRODUCT:
429            return product != null;
430        case SKEWNESS:
431            return moment instanceof SumOfCubedDeviations;
432        case STANDARD_DEVIATION:
433        case VARIANCE:
434            return moment instanceof SumOfSquaredDeviations;
435        case SUM:
436            return sum != null;
437        case SUM_OF_SQUARES:
438            return sumOfSquares != null;
439        default:
440            return false;
441        }
442    }
443
444    /**
445     * Gets the value of the specified {@code statistic} as a {@code double}.
446     *
447     * @param statistic Statistic.
448     * @return the value
449     * @throws IllegalArgumentException if the {@code statistic} is not supported
450     * @see #isSupported(Statistic)
451     * @see #getResult(Statistic)
452     */
453    public double getAsDouble(Statistic statistic) {
454        return getResult(statistic).getAsDouble();
455    }
456
457    /**
458     * Gets a supplier for the value of the specified {@code statistic}.
459     *
460     * <p>The returned function will supply the correct result after
461     * calls to {@link #accept(double) accept} or
462     * {@link #combine(DoubleStatistics) combine} further values into
463     * {@code this} instance.
464     *
465     * <p>This method can be used to perform a one-time look-up of the statistic
466     * function to compute statistics as values are dynamically added.
467     *
468     * @param statistic Statistic.
469     * @return the supplier
470     * @throws IllegalArgumentException if the {@code statistic} is not supported
471     * @see #isSupported(Statistic)
472     * @see #getAsDouble(Statistic)
473     */
474    public StatisticResult getResult(Statistic statistic) {
475        // Locate the implementation.
476        // Statistics that wrap an underlying implementation are created in methods.
477        // The return argument should be a method reference and not an instance
478        // of DoubleStatistic. This ensures the statistic implementation cannot
479        // be updated with new values by casting the result and calling accept(double).
480        StatisticResult stat = null;
481        switch (statistic) {
482        case GEOMETRIC_MEAN:
483            stat = getGeometricMean();
484            break;
485        case KURTOSIS:
486            stat = getKurtosis();
487            break;
488        case MAX:
489            stat = max;
490            break;
491        case MEAN:
492            stat = getMean();
493            break;
494        case MIN:
495            stat = min;
496            break;
497        case PRODUCT:
498            stat = product;
499            break;
500        case SKEWNESS:
501            stat = getSkewness();
502            break;
503        case STANDARD_DEVIATION:
504            stat = getStandardDeviation();
505            break;
506        case SUM:
507            stat = sum;
508            break;
509        case SUM_OF_LOGS:
510            stat = sumOfLogs;
511            break;
512        case SUM_OF_SQUARES:
513            stat = sumOfSquares;
514            break;
515        case VARIANCE:
516            stat = getVariance();
517            break;
518        default:
519            break;
520        }
521        if (stat != null) {
522            return stat instanceof DoubleStatistic ?
523                ((DoubleStatistic) stat)::getAsDouble :
524                stat;
525        }
526        throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
527    }
528
529    /**
530     * Gets the geometric mean.
531     *
532     * @return a geometric mean supplier (or null if unsupported)
533     */
534    private StatisticResult getGeometricMean() {
535        if (sumOfLogs != null) {
536            // Return a function that has access to the count and sumOfLogs
537            return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
538        }
539        return null;
540    }
541
542    /**
543     * Gets the kurtosis.
544     *
545     * @return a kurtosis supplier (or null if unsupported)
546     */
547    private StatisticResult getKurtosis() {
548        if (moment instanceof SumOfFourthDeviations) {
549            return new Kurtosis((SumOfFourthDeviations) moment)
550                .setBiased(config.isBiased())::getAsDouble;
551        }
552        return null;
553    }
554
555    /**
556     * Gets the mean.
557     *
558     * @return a mean supplier (or null if unsupported)
559     */
560    private StatisticResult getMean() {
561        if (moment != null) {
562            // Special case where wrapping with a Mean is not required
563            return moment::getFirstMoment;
564        }
565        return null;
566    }
567
568    /**
569     * Gets the skewness.
570     *
571     * @return a skewness supplier (or null if unsupported)
572     */
573    private StatisticResult getSkewness() {
574        if (moment instanceof SumOfCubedDeviations) {
575            return new Skewness((SumOfCubedDeviations) moment)
576                .setBiased(config.isBiased())::getAsDouble;
577        }
578        return null;
579    }
580
581    /**
582     * Gets the standard deviation.
583     *
584     * @return a standard deviation supplier (or null if unsupported)
585     */
586    private StatisticResult getStandardDeviation() {
587        if (moment instanceof SumOfSquaredDeviations) {
588            return new StandardDeviation((SumOfSquaredDeviations) moment)
589                .setBiased(config.isBiased())::getAsDouble;
590        }
591        return null;
592    }
593
594    /**
595     * Gets the variance.
596     *
597     * @return a variance supplier (or null if unsupported)
598     */
599    private StatisticResult getVariance() {
600        if (moment instanceof SumOfSquaredDeviations) {
601            return new Variance((SumOfSquaredDeviations) moment)
602                .setBiased(config.isBiased())::getAsDouble;
603        }
604        return null;
605    }
606
607    /**
608     * Combines the state of the {@code other} statistics into this one.
609     * Only {@code this} instance is modified by the {@code combine} operation.
610     *
611     * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
612     * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
613     * all values of the {@link Statistic} enum which are supported by {@code this}
614     * instance.
615     *
616     * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
617     * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
618     * instance is not compatible then an exception is raised before any state is modified.
619     *
620     * @param other Another set of statistics to be combined.
621     * @return {@code this} instance after combining {@code other}.
622     * @throws IllegalArgumentException if the {@code other} is not compatible
623     */
624    public DoubleStatistics combine(DoubleStatistics other) {
625        // Check compatibility
626        Statistics.checkCombineCompatible(min, other.min);
627        Statistics.checkCombineCompatible(max, other.max);
628        Statistics.checkCombineCompatible(sum, other.sum);
629        Statistics.checkCombineCompatible(product, other.product);
630        Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
631        Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
632        Statistics.checkCombineAssignable(moment, other.moment);
633        // Combine
634        count += other.count;
635        Statistics.combine(min, other.min);
636        Statistics.combine(max, other.max);
637        Statistics.combine(sum, other.sum);
638        Statistics.combine(product, other.product);
639        Statistics.combine(sumOfSquares, other.sumOfSquares);
640        Statistics.combine(sumOfLogs, other.sumOfLogs);
641        Statistics.combineMoment(moment, other.moment);
642        return this;
643    }
644
645    /**
646     * Sets the statistics configuration.
647     *
648     * <p>These options only control the final computation of statistics. The configuration
649     * will not affect compatibility between instances during a
650     * {@link #combine(DoubleStatistics) combine} operation.
651     *
652     * <p>Note: These options will affect any future computation of statistics. Supplier functions
653     * that have been previously created will not be updated with the new configuration.
654     *
655     * @param v Value.
656     * @return {@code this} instance
657     * @throws NullPointerException if the value is null
658     * @see #getResult(Statistic)
659     */
660    public DoubleStatistics setConfiguration(StatisticsConfiguration v) {
661        config = Objects.requireNonNull(v);
662        return this;
663    }
664}