1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 import java.util.Objects; 20 import java.util.Set; 21 import java.util.function.BiFunction; 22 import java.util.function.DoubleConsumer; 23 import java.util.function.Function; 24 25 /** 26 * Statistics for {@code double} values. 27 * 28 * <p>This class provides combinations of individual statistic implementations in the 29 * {@code org.apache.commons.statistics.descriptive} package. 30 * 31 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 32 * This implementation does not check for overflow of the count. 33 * 34 * @since 1.1 35 */ 36 public final class DoubleStatistics implements DoubleConsumer { 37 /** Error message for non configured statistics. */ 38 private static final String NO_CONFIGURED_STATISTICS = "No configured statistics"; 39 /** Error message for an unsupported statistic. */ 40 private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: "; 41 42 /** Count of values recorded. */ 43 private long count; 44 /** The consumer of values. */ 45 private final DoubleConsumer consumer; 46 /** The {@link Min} implementation. */ 47 private final Min min; 48 /** The {@link Max} implementation. */ 49 private final Max max; 50 /** The moment implementation. May be any instance of {@link FirstMoment}. */ 51 private final FirstMoment moment; 52 /** The {@link Sum} implementation. */ 53 private final Sum sum; 54 /** The {@link Product} implementation. */ 55 private final Product product; 56 /** The {@link SumOfSquares} implementation. */ 57 private final SumOfSquares sumOfSquares; 58 /** The {@link SumOfLogs} implementation. */ 59 private final SumOfLogs sumOfLogs; 60 /** Configuration options for computation of statistics. */ 61 private StatisticsConfiguration config; 62 63 /** 64 * A builder for {@link DoubleStatistics}. 65 */ 66 public static final class Builder { 67 /** An empty double array. */ 68 private static final double[] NO_VALUES = {}; 69 70 /** The {@link Min} constructor. */ 71 private Function<double[], Min> min; 72 /** The {@link Max} constructor. */ 73 private Function<double[], Max> max; 74 /** The moment constructor. May return any instance of {@link FirstMoment}. */ 75 private BiFunction<org.apache.commons.numbers.core.Sum, double[], FirstMoment> moment; 76 /** The {@link Sum} constructor. */ 77 private Function<org.apache.commons.numbers.core.Sum, Sum> sum; 78 /** The {@link Product} constructor. */ 79 private Function<double[], Product> product; 80 /** The {@link SumOfSquares} constructor. */ 81 private Function<double[], SumOfSquares> sumOfSquares; 82 /** The {@link SumOfLogs} constructor. */ 83 private Function<double[], SumOfLogs> sumOfLogs; 84 /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment} 85 * instance constructed by {@link #moment}. This should only be increased from the default 86 * of zero (corresponding to no moment computation). */ 87 private int momentOrder; 88 /** Configuration options for computation of statistics. */ 89 private StatisticsConfiguration config = StatisticsConfiguration.withDefaults(); 90 91 /** 92 * Create an instance. 93 */ 94 Builder() { 95 // Do nothing 96 } 97 98 /** 99 * Add the statistic to the statistics to compute. 100 * 101 * @param statistic Statistic to compute. 102 * @return {@code this} instance 103 */ 104 Builder add(Statistic statistic) { 105 switch (statistic) { 106 case GEOMETRIC_MEAN: 107 case SUM_OF_LOGS: 108 sumOfLogs = SumOfLogs::of; 109 break; 110 case KURTOSIS: 111 createMoment(4); 112 break; 113 case MAX: 114 max = Max::of; 115 break; 116 case MEAN: 117 createMoment(1); 118 break; 119 case MIN: 120 min = Min::of; 121 break; 122 case PRODUCT: 123 product = Product::of; 124 break; 125 case SKEWNESS: 126 createMoment(3); 127 break; 128 case STANDARD_DEVIATION: 129 case VARIANCE: 130 createMoment(2); 131 break; 132 case SUM: 133 sum = Sum::new; 134 break; 135 case SUM_OF_SQUARES: 136 sumOfSquares = SumOfSquares::of; 137 break; 138 default: 139 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic); 140 } 141 return this; 142 } 143 144 /** 145 * Creates the moment constructor for the specified {@code order}, 146 * e.g. order=2 is sum of squared deviations. 147 * 148 * @param order Order. 149 */ 150 private void createMoment(int order) { 151 if (order > momentOrder) { 152 momentOrder = order; 153 if (order == 4) { 154 moment = SumOfFourthDeviations::create; 155 } else if (order == 3) { 156 moment = SumOfCubedDeviations::create; 157 } else if (order == 2) { 158 moment = SumOfSquaredDeviations::create; 159 } else { 160 // Assume order == 1 161 moment = FirstMoment::create; 162 } 163 } 164 } 165 166 /** 167 * Sets the statistics configuration options for computation of statistics. 168 * 169 * @param v Value. 170 * @return the builder 171 * @throws NullPointerException if the value is null 172 */ 173 public Builder setConfiguration(StatisticsConfiguration v) { 174 config = Objects.requireNonNull(v); 175 return this; 176 } 177 178 /** 179 * Builds a {@code DoubleStatistics} instance. 180 * 181 * @return {@code DoubleStatistics} instance. 182 */ 183 public DoubleStatistics build() { 184 return build(NO_VALUES); 185 } 186 187 /** 188 * Builds a {@code DoubleStatistics} instance using the input {@code values}. 189 * 190 * <p>Note: {@code DoubleStatistics} computed using 191 * {@link DoubleStatistics#accept(double) accept} may be 192 * different from this instance. 193 * 194 * @param values Values. 195 * @return {@code DoubleStatistics} instance. 196 */ 197 public DoubleStatistics build(double... values) { 198 Objects.requireNonNull(values, "values"); 199 // Create related statistics 200 FirstMoment m = null; 201 Sum sumStat = null; 202 if (moment != null || sum != null) { 203 final org.apache.commons.numbers.core.Sum s = 204 org.apache.commons.numbers.core.Sum.of(values); 205 m = create(moment, s, values); 206 sumStat = create(sum, s); 207 } 208 return new DoubleStatistics( 209 values.length, 210 create(min, values), 211 create(max, values), 212 m, 213 sumStat, 214 create(product, values), 215 create(sumOfSquares, values), 216 create(sumOfLogs, values), 217 config); 218 } 219 220 /** 221 * Creates the object from the {@code values}. 222 * 223 * @param <S> value type 224 * @param <T> object type 225 * @param constructor Constructor. 226 * @param values Values 227 * @return the instance 228 */ 229 private static <S, T> T create(Function<S, T> constructor, S values) { 230 if (constructor != null) { 231 return constructor.apply(values); 232 } 233 return null; 234 } 235 236 /** 237 * Creates the object from the values {@code r} and {@code s}. 238 * 239 * @param <R> value type 240 * @param <S> value type 241 * @param <T> object type 242 * @param constructor Constructor. 243 * @param r Value. 244 * @param s Value. 245 * @return the instance 246 */ 247 private static <R, S, T> T create(BiFunction<R, S, T> constructor, R r, S s) { 248 if (constructor != null) { 249 return constructor.apply(r, s); 250 } 251 return null; 252 } 253 } 254 255 /** 256 * Create an instance. 257 * 258 * @param count Count of values. 259 * @param min Min implementation. 260 * @param max Max implementation. 261 * @param moment Moment implementation. 262 * @param sum Sum implementation. 263 * @param product Product implementation. 264 * @param sumOfSquares Sum of squares implementation. 265 * @param sumOfLogs Sum of logs implementation. 266 * @param config Statistics configuration. 267 */ 268 DoubleStatistics(long count, Min min, Max max, FirstMoment moment, Sum sum, 269 Product product, SumOfSquares sumOfSquares, SumOfLogs sumOfLogs, 270 StatisticsConfiguration config) { 271 this.count = count; 272 this.min = min; 273 this.max = max; 274 this.moment = moment; 275 this.sum = sum; 276 this.product = product; 277 this.sumOfSquares = sumOfSquares; 278 this.sumOfLogs = sumOfLogs; 279 this.config = config; 280 consumer = Statistics.compose(min, max, moment, sum, product, sumOfSquares, sumOfLogs); 281 } 282 283 /** 284 * Returns a new instance configured to compute the specified {@code statistics}. 285 * 286 * <p>The statistics will be empty and so will return the default values for each 287 * computed statistic. 288 * 289 * @param statistics Statistics to compute. 290 * @return the instance 291 * @throws IllegalArgumentException if there are no {@code statistics} to compute. 292 */ 293 public static DoubleStatistics of(Statistic... statistics) { 294 return builder(statistics).build(); 295 } 296 297 /** 298 * Returns a new instance configured to compute the specified {@code statistics} 299 * populated using the input {@code values}. 300 * 301 * <p>Use this method to create an instance populated with a (variable) array of 302 * {@code double[]} data: 303 * 304 * <pre> 305 * DoubleStatistics stats = DoubleStatistics.of( 306 * EnumSet.of(Statistic.MIN, Statistic.MAX), 307 * 1, 1, 2, 3, 5, 8, 13); 308 * </pre> 309 * 310 * @param statistics Statistics to compute. 311 * @param values Values. 312 * @return the instance 313 * @throws IllegalArgumentException if there are no {@code statistics} to compute. 314 */ 315 public static DoubleStatistics of(Set<Statistic> statistics, double... values) { 316 if (statistics.isEmpty()) { 317 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS); 318 } 319 final Builder b = new Builder(); 320 statistics.forEach(b::add); 321 return b.build(values); 322 } 323 324 /** 325 * Returns a new builder configured to create instances to compute the specified 326 * {@code statistics}. 327 * 328 * <p>Use this method to create an instance populated with an array of {@code double[]} 329 * data using the {@link Builder#build(double...)} method: 330 * 331 * <pre> 332 * double[] data = ... 333 * DoubleStatistics stats = DoubleStatistics.builder( 334 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE) 335 * .build(data); 336 * </pre> 337 * 338 * <p>The builder can be used to create multiple instances of {@link DoubleStatistics} 339 * to be used in parallel, or on separate arrays of {@code double[]} data. These may 340 * be {@link #combine(DoubleStatistics) combined}. For example: 341 * 342 * <pre> 343 * double[][] data = ... 344 * DoubleStatistics.Builder builder = DoubleStatistics.builder( 345 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE); 346 * DoubleStatistics stats = Arrays.stream(data) 347 * .parallel() 348 * .map(builder::build) 349 * .reduce(DoubleStatistics::combine) 350 * .get(); 351 * </pre> 352 * 353 * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat 354 * use on multiple data: 355 * 356 * <pre>{@code 357 * DoubleStatistics.Builder builder = DoubleStatistics.builder( 358 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE); 359 * Collector<double[], DoubleStatistics, DoubleStatistics> collector = 360 * Collector.of(builder::build, 361 * (s, d) -> s.combine(builder.build(d)), 362 * DoubleStatistics::combine); 363 * 364 * // Repeated 365 * double[][] data = ... 366 * DoubleStatistics stats = Arrays.stream(data).collect(collector); 367 * }</pre> 368 * 369 * @param statistics Statistics to compute. 370 * @return the builder 371 * @throws IllegalArgumentException if there are no {@code statistics} to compute. 372 */ 373 public static Builder builder(Statistic... statistics) { 374 if (statistics.length == 0) { 375 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS); 376 } 377 final Builder b = new Builder(); 378 for (final Statistic s : statistics) { 379 b.add(s); 380 } 381 return b; 382 } 383 384 /** 385 * Updates the state of the statistics to reflect the addition of {@code value}. 386 * 387 * @param value Value. 388 */ 389 @Override 390 public void accept(double value) { 391 count++; 392 consumer.accept(value); 393 } 394 395 /** 396 * Return the count of values recorded. 397 * 398 * @return the count of values 399 */ 400 public long getCount() { 401 return count; 402 } 403 404 /** 405 * Check if the specified {@code statistic} is supported. 406 * 407 * <p>Note: This method will not return {@code false} if the argument is {@code null}. 408 * 409 * @param statistic Statistic. 410 * @return {@code true} if supported 411 * @throws NullPointerException if the {@code statistic} is {@code null} 412 * @see #getAsDouble(Statistic) 413 */ 414 public boolean isSupported(Statistic statistic) { 415 // Check for the appropriate underlying implementation 416 switch (statistic) { 417 case GEOMETRIC_MEAN: 418 case SUM_OF_LOGS: 419 return sumOfLogs != null; 420 case KURTOSIS: 421 return moment instanceof SumOfFourthDeviations; 422 case MAX: 423 return max != null; 424 case MEAN: 425 return moment != null; 426 case MIN: 427 return min != null; 428 case PRODUCT: 429 return product != null; 430 case SKEWNESS: 431 return moment instanceof SumOfCubedDeviations; 432 case STANDARD_DEVIATION: 433 case VARIANCE: 434 return moment instanceof SumOfSquaredDeviations; 435 case SUM: 436 return sum != null; 437 case SUM_OF_SQUARES: 438 return sumOfSquares != null; 439 default: 440 return false; 441 } 442 } 443 444 /** 445 * Gets the value of the specified {@code statistic} as a {@code double}. 446 * 447 * @param statistic Statistic. 448 * @return the value 449 * @throws IllegalArgumentException if the {@code statistic} is not supported 450 * @see #isSupported(Statistic) 451 * @see #getResult(Statistic) 452 */ 453 public double getAsDouble(Statistic statistic) { 454 return getResult(statistic).getAsDouble(); 455 } 456 457 /** 458 * Gets a supplier for the value of the specified {@code statistic}. 459 * 460 * <p>The returned function will supply the correct result after 461 * calls to {@link #accept(double) accept} or 462 * {@link #combine(DoubleStatistics) combine} further values into 463 * {@code this} instance. 464 * 465 * <p>This method can be used to perform a one-time look-up of the statistic 466 * function to compute statistics as values are dynamically added. 467 * 468 * @param statistic Statistic. 469 * @return the supplier 470 * @throws IllegalArgumentException if the {@code statistic} is not supported 471 * @see #isSupported(Statistic) 472 * @see #getAsDouble(Statistic) 473 */ 474 public StatisticResult getResult(Statistic statistic) { 475 // Locate the implementation. 476 // Statistics that wrap an underlying implementation are created in methods. 477 // The return argument should be a method reference and not an instance 478 // of DoubleStatistic. This ensures the statistic implementation cannot 479 // be updated with new values by casting the result and calling accept(double). 480 StatisticResult stat = null; 481 switch (statistic) { 482 case GEOMETRIC_MEAN: 483 stat = getGeometricMean(); 484 break; 485 case KURTOSIS: 486 stat = getKurtosis(); 487 break; 488 case MAX: 489 stat = max; 490 break; 491 case MEAN: 492 stat = getMean(); 493 break; 494 case MIN: 495 stat = min; 496 break; 497 case PRODUCT: 498 stat = product; 499 break; 500 case SKEWNESS: 501 stat = getSkewness(); 502 break; 503 case STANDARD_DEVIATION: 504 stat = getStandardDeviation(); 505 break; 506 case SUM: 507 stat = sum; 508 break; 509 case SUM_OF_LOGS: 510 stat = sumOfLogs; 511 break; 512 case SUM_OF_SQUARES: 513 stat = sumOfSquares; 514 break; 515 case VARIANCE: 516 stat = getVariance(); 517 break; 518 default: 519 break; 520 } 521 if (stat != null) { 522 return stat instanceof DoubleStatistic ? 523 ((DoubleStatistic) stat)::getAsDouble : 524 stat; 525 } 526 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic); 527 } 528 529 /** 530 * Gets the geometric mean. 531 * 532 * @return a geometric mean supplier (or null if unsupported) 533 */ 534 private StatisticResult getGeometricMean() { 535 if (sumOfLogs != null) { 536 // Return a function that has access to the count and sumOfLogs 537 return () -> GeometricMean.computeGeometricMean(count, sumOfLogs); 538 } 539 return null; 540 } 541 542 /** 543 * Gets the kurtosis. 544 * 545 * @return a kurtosis supplier (or null if unsupported) 546 */ 547 private StatisticResult getKurtosis() { 548 if (moment instanceof SumOfFourthDeviations) { 549 return new Kurtosis((SumOfFourthDeviations) moment) 550 .setBiased(config.isBiased())::getAsDouble; 551 } 552 return null; 553 } 554 555 /** 556 * Gets the mean. 557 * 558 * @return a mean supplier (or null if unsupported) 559 */ 560 private StatisticResult getMean() { 561 if (moment != null) { 562 // Special case where wrapping with a Mean is not required 563 return moment::getFirstMoment; 564 } 565 return null; 566 } 567 568 /** 569 * Gets the skewness. 570 * 571 * @return a skewness supplier (or null if unsupported) 572 */ 573 private StatisticResult getSkewness() { 574 if (moment instanceof SumOfCubedDeviations) { 575 return new Skewness((SumOfCubedDeviations) moment) 576 .setBiased(config.isBiased())::getAsDouble; 577 } 578 return null; 579 } 580 581 /** 582 * Gets the standard deviation. 583 * 584 * @return a standard deviation supplier (or null if unsupported) 585 */ 586 private StatisticResult getStandardDeviation() { 587 if (moment instanceof SumOfSquaredDeviations) { 588 return new StandardDeviation((SumOfSquaredDeviations) moment) 589 .setBiased(config.isBiased())::getAsDouble; 590 } 591 return null; 592 } 593 594 /** 595 * Gets the variance. 596 * 597 * @return a variance supplier (or null if unsupported) 598 */ 599 private StatisticResult getVariance() { 600 if (moment instanceof SumOfSquaredDeviations) { 601 return new Variance((SumOfSquaredDeviations) moment) 602 .setBiased(config.isBiased())::getAsDouble; 603 } 604 return null; 605 } 606 607 /** 608 * Combines the state of the {@code other} statistics into this one. 609 * Only {@code this} instance is modified by the {@code combine} operation. 610 * 611 * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the 612 * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for 613 * all values of the {@link Statistic} enum which are supported by {@code this} 614 * instance. 615 * 616 * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform 617 * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other} 618 * instance is not compatible then an exception is raised before any state is modified. 619 * 620 * @param other Another set of statistics to be combined. 621 * @return {@code this} instance after combining {@code other}. 622 * @throws IllegalArgumentException if the {@code other} is not compatible 623 */ 624 public DoubleStatistics combine(DoubleStatistics other) { 625 // Check compatibility 626 Statistics.checkCombineCompatible(min, other.min); 627 Statistics.checkCombineCompatible(max, other.max); 628 Statistics.checkCombineCompatible(sum, other.sum); 629 Statistics.checkCombineCompatible(product, other.product); 630 Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares); 631 Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs); 632 Statistics.checkCombineAssignable(moment, other.moment); 633 // Combine 634 count += other.count; 635 Statistics.combine(min, other.min); 636 Statistics.combine(max, other.max); 637 Statistics.combine(sum, other.sum); 638 Statistics.combine(product, other.product); 639 Statistics.combine(sumOfSquares, other.sumOfSquares); 640 Statistics.combine(sumOfLogs, other.sumOfLogs); 641 Statistics.combineMoment(moment, other.moment); 642 return this; 643 } 644 645 /** 646 * Sets the statistics configuration. 647 * 648 * <p>These options only control the final computation of statistics. The configuration 649 * will not affect compatibility between instances during a 650 * {@link #combine(DoubleStatistics) combine} operation. 651 * 652 * <p>Note: These options will affect any future computation of statistics. Supplier functions 653 * that have been previously created will not be updated with the new configuration. 654 * 655 * @param v Value. 656 * @return {@code this} instance 657 * @throws NullPointerException if the value is null 658 * @see #getResult(Statistic) 659 */ 660 public DoubleStatistics setConfiguration(StatisticsConfiguration v) { 661 config = Objects.requireNonNull(v); 662 return this; 663 } 664 }