1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 import java.math.BigInteger; 20 import java.util.Objects; 21 import java.util.Set; 22 import java.util.function.DoubleConsumer; 23 import java.util.function.Function; 24 import java.util.function.IntConsumer; 25 26 /** 27 * Statistics for {@code int} values. 28 * 29 * <p>This class provides combinations of individual statistic implementations in the 30 * {@code org.apache.commons.statistics.descriptive} package. 31 * 32 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 33 * This implementation does not check for overflow of the count. 34 * 35 * @since 1.1 36 */ 37 public final class IntStatistics implements IntConsumer { 38 /** Error message for non configured statistics. */ 39 private static final String NO_CONFIGURED_STATISTICS = "No configured statistics"; 40 /** Error message for an unsupported statistic. */ 41 private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: "; 42 43 /** Count of values recorded. */ 44 private long count; 45 /** The consumer of values. */ 46 private final IntConsumer consumer; 47 /** The {@link IntMin} implementation. */ 48 private final IntMin min; 49 /** The {@link IntMax} implementation. */ 50 private final IntMax max; 51 /** The moment implementation. May be any instance of {@link FirstMoment}. 52 * This implementation uses only the third and fourth moments. */ 53 private final FirstMoment moment; 54 /** The {@link IntSum} implementation. */ 55 private final IntSum sum; 56 /** The {@link Product} implementation. */ 57 private final Product product; 58 /** The {@link IntSumOfSquares} implementation. */ 59 private final IntSumOfSquares sumOfSquares; 60 /** The {@link SumOfLogs} implementation. */ 61 private final SumOfLogs sumOfLogs; 62 /** Configuration options for computation of statistics. */ 63 private StatisticsConfiguration config; 64 65 /** 66 * A builder for {@link IntStatistics}. 67 */ 68 public static final class Builder { 69 /** An empty double array. */ 70 private static final int[] NO_VALUES = {}; 71 72 /** The {@link IntMin} constructor. */ 73 private Function<int[], IntMin> min; 74 /** The {@link IntMax} constructor. */ 75 private Function<int[], IntMax> max; 76 /** The moment constructor. May return any instance of {@link FirstMoment}. */ 77 private Function<int[], FirstMoment> moment; 78 /** The {@link IntSum} constructor. */ 79 private Function<int[], IntSum> sum; 80 /** The {@link Product} constructor. */ 81 private Function<int[], Product> product; 82 /** The {@link IntSumOfSquares} constructor. */ 83 private Function<int[], IntSumOfSquares> sumOfSquares; 84 /** The {@link SumOfLogs} constructor. */ 85 private Function<int[], SumOfLogs> sumOfLogs; 86 /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment} 87 * instance constructed by {@link #moment}. This should only be increased from the default 88 * of zero (corresponding to no moment computation). */ 89 private int momentOrder; 90 /** Configuration options for computation of statistics. */ 91 private StatisticsConfiguration config = StatisticsConfiguration.withDefaults(); 92 93 /** 94 * Create an instance. 95 */ 96 Builder() { 97 // Do nothing 98 } 99 100 /** 101 * Add the statistic to the statistics to compute. 102 * 103 * @param statistic Statistic to compute. 104 * @return {@code this} instance 105 */ 106 Builder add(Statistic statistic) { 107 switch (statistic) { 108 case GEOMETRIC_MEAN: 109 case SUM_OF_LOGS: 110 sumOfLogs = SumOfLogs::of; 111 break; 112 case KURTOSIS: 113 createMoment(4); 114 break; 115 case MAX: 116 max = IntMax::of; 117 break; 118 case MIN: 119 min = IntMin::of; 120 break; 121 case PRODUCT: 122 product = Product::of; 123 break; 124 case SKEWNESS: 125 createMoment(3); 126 break; 127 case STANDARD_DEVIATION: 128 case VARIANCE: 129 sum = IntSum::of; 130 sumOfSquares = IntSumOfSquares::of; 131 break; 132 case MEAN: 133 case SUM: 134 sum = IntSum::of; 135 break; 136 case SUM_OF_SQUARES: 137 sumOfSquares = IntSumOfSquares::of; 138 break; 139 default: 140 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic); 141 } 142 return this; 143 } 144 145 /** 146 * Creates the moment constructor for the specified {@code order}, 147 * e.g. order=3 is sum of cubed deviations. 148 * 149 * @param order Order. 150 */ 151 private void createMoment(int order) { 152 if (order > momentOrder) { 153 momentOrder = order; 154 if (order == 4) { 155 moment = SumOfFourthDeviations::of; 156 } else { 157 // Assume order == 3 158 moment = SumOfCubedDeviations::of; 159 } 160 } 161 } 162 163 /** 164 * Sets the statistics configuration options for computation of statistics. 165 * 166 * @param v Value. 167 * @return the builder 168 * @throws NullPointerException if the value is null 169 */ 170 public Builder setConfiguration(StatisticsConfiguration v) { 171 config = Objects.requireNonNull(v); 172 return this; 173 } 174 175 /** 176 * Builds a {@code IntStatistics} instance. 177 * 178 * @return {@code IntStatistics} instance. 179 */ 180 public IntStatistics build() { 181 return build(NO_VALUES); 182 } 183 184 /** 185 * Builds a {@code IntStatistics} instance using the input {@code values}. 186 * 187 * <p>Note: {@code IntStatistics} computed using 188 * {@link IntStatistics#accept(int) accept} may be 189 * different from this instance. 190 * 191 * @param values Values. 192 * @return {@code IntStatistics} instance. 193 */ 194 public IntStatistics build(int... values) { 195 Objects.requireNonNull(values, "values"); 196 return new IntStatistics( 197 values.length, 198 create(min, values), 199 create(max, values), 200 create(moment, values), 201 create(sum, values), 202 create(product, values), 203 create(sumOfSquares, values), 204 create(sumOfLogs, values), 205 config); 206 } 207 208 /** 209 * Creates the object from the {@code values}. 210 * 211 * @param <T> object type 212 * @param constructor Constructor. 213 * @param values Values 214 * @return the instance 215 */ 216 private static <T> T create(Function<int[], T> constructor, int[] values) { 217 if (constructor != null) { 218 return constructor.apply(values); 219 } 220 return null; 221 } 222 } 223 224 /** 225 * Create an instance. 226 * 227 * @param count Count of values. 228 * @param min IntMin implementation. 229 * @param max IntMax implementation. 230 * @param moment Moment implementation. 231 * @param sum IntSum implementation. 232 * @param product Product implementation. 233 * @param sumOfSquares Sum of squares implementation. 234 * @param sumOfLogs Sum of logs implementation. 235 * @param config Statistics configuration. 236 */ 237 IntStatistics(long count, IntMin min, IntMax max, FirstMoment moment, IntSum sum, 238 Product product, IntSumOfSquares sumOfSquares, SumOfLogs sumOfLogs, 239 StatisticsConfiguration config) { 240 this.count = count; 241 this.min = min; 242 this.max = max; 243 this.moment = moment; 244 this.sum = sum; 245 this.product = product; 246 this.sumOfSquares = sumOfSquares; 247 this.sumOfLogs = sumOfLogs; 248 this.config = config; 249 // The final consumer should never be null as the builder is created 250 // with at least one statistic. 251 consumer = Statistics.compose(min, max, sum, sumOfSquares, 252 composeAsInt(moment, product, sumOfLogs)); 253 } 254 255 /** 256 * Chain the {@code consumers} into a single composite {@code IntConsumer}. 257 * Ignore any {@code null} consumer. 258 * 259 * @param consumers Consumers. 260 * @return a composed consumer (or null) 261 */ 262 private static IntConsumer composeAsInt(DoubleConsumer... consumers) { 263 final DoubleConsumer c = Statistics.compose(consumers); 264 if (c != null) { 265 return c::accept; 266 } 267 return null; 268 } 269 270 /** 271 * Returns a new instance configured to compute the specified {@code statistics}. 272 * 273 * <p>The statistics will be empty and so will return the default values for each 274 * computed statistic. 275 * 276 * @param statistics Statistics to compute. 277 * @return the instance 278 * @throws IllegalArgumentException if there are no {@code statistics} to compute. 279 */ 280 public static IntStatistics of(Statistic... statistics) { 281 return builder(statistics).build(); 282 } 283 284 /** 285 * Returns a new instance configured to compute the specified {@code statistics} 286 * populated using the input {@code values}. 287 * 288 * <p>Use this method to create an instance populated with a (variable) array of 289 * {@code int[]} data: 290 * 291 * <pre> 292 * IntStatistics stats = IntStatistics.of( 293 * EnumSet.of(Statistic.MIN, Statistic.MAX), 294 * 1, 1, 2, 3, 5, 8, 13); 295 * </pre> 296 * 297 * @param statistics Statistics to compute. 298 * @param values Values. 299 * @return the instance 300 * @throws IllegalArgumentException if there are no {@code statistics} to compute. 301 */ 302 public static IntStatistics of(Set<Statistic> statistics, int... values) { 303 if (statistics.isEmpty()) { 304 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS); 305 } 306 final Builder b = new Builder(); 307 statistics.forEach(b::add); 308 return b.build(values); 309 } 310 311 /** 312 * Returns a new builder configured to create instances to compute the specified 313 * {@code statistics}. 314 * 315 * <p>Use this method to create an instance populated with an array of {@code int[]} 316 * data using the {@link Builder#build(int...)} method: 317 * 318 * <pre> 319 * int[] data = ... 320 * IntStatistics stats = IntStatistics.builder( 321 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE) 322 * .build(data); 323 * </pre> 324 * 325 * <p>The builder can be used to create multiple instances of {@link IntStatistics} 326 * to be used in parallel, or on separate arrays of {@code int[]} data. These may 327 * be {@link #combine(IntStatistics) combined}. For example: 328 * 329 * <pre> 330 * int[][] data = ... 331 * IntStatistics.Builder builder = IntStatistics.builder( 332 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE); 333 * IntStatistics stats = Arrays.stream(data) 334 * .parallel() 335 * .map(builder::build) 336 * .reduce(IntStatistics::combine) 337 * .get(); 338 * </pre> 339 * 340 * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat 341 * use on multiple data: 342 * 343 * <pre>{@code 344 * IntStatistics.Builder builder = IntStatistics.builder( 345 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE); 346 * Collector<int[], IntStatistics, IntStatistics> collector = 347 * Collector.of(builder::build, 348 * (s, d) -> s.combine(builder.build(d)), 349 * IntStatistics::combine); 350 * 351 * // Repeated 352 * int[][] data = ... 353 * IntStatistics stats = Arrays.stream(data).collect(collector); 354 * }</pre> 355 * 356 * @param statistics Statistics to compute. 357 * @return the builder 358 * @throws IllegalArgumentException if there are no {@code statistics} to compute. 359 */ 360 public static Builder builder(Statistic... statistics) { 361 if (statistics.length == 0) { 362 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS); 363 } 364 final Builder b = new Builder(); 365 for (final Statistic s : statistics) { 366 b.add(s); 367 } 368 return b; 369 } 370 371 /** 372 * Updates the state of the statistics to reflect the addition of {@code value}. 373 * 374 * @param value Value. 375 */ 376 @Override 377 public void accept(int value) { 378 count++; 379 consumer.accept(value); 380 } 381 382 /** 383 * Return the count of values recorded. 384 * 385 * @return the count of values 386 */ 387 public long getCount() { 388 return count; 389 } 390 391 /** 392 * Check if the specified {@code statistic} is supported. 393 * 394 * <p>Note: This method will not return {@code false} if the argument is {@code null}. 395 * 396 * @param statistic Statistic. 397 * @return {@code true} if supported 398 * @throws NullPointerException if the {@code statistic} is {@code null} 399 * @see #getResult(Statistic) 400 */ 401 public boolean isSupported(Statistic statistic) { 402 // Check for the appropriate underlying implementation 403 switch (statistic) { 404 case GEOMETRIC_MEAN: 405 case SUM_OF_LOGS: 406 return sumOfLogs != null; 407 case KURTOSIS: 408 return moment instanceof SumOfFourthDeviations; 409 case MAX: 410 return max != null; 411 case MIN: 412 return min != null; 413 case PRODUCT: 414 return product != null; 415 case SKEWNESS: 416 return moment instanceof SumOfCubedDeviations; 417 case STANDARD_DEVIATION: 418 case VARIANCE: 419 return sum != null && sumOfSquares != null; 420 case MEAN: 421 case SUM: 422 return sum != null; 423 case SUM_OF_SQUARES: 424 return sumOfSquares != null; 425 default: 426 return false; 427 } 428 } 429 430 /** 431 * Gets the value of the specified {@code statistic} as a {@code double}. 432 * 433 * @param statistic Statistic. 434 * @return the value 435 * @throws IllegalArgumentException if the {@code statistic} is not supported 436 * @see #isSupported(Statistic) 437 * @see #getResult(Statistic) 438 */ 439 public double getAsDouble(Statistic statistic) { 440 return getResult(statistic).getAsDouble(); 441 } 442 443 /** 444 * Gets the value of the specified {@code statistic} as an {@code int}. 445 * 446 * <p>Use this method to access the {@code int} result for exact integer statistics, 447 * for example {@link Statistic#MIN}. 448 * 449 * <p>Note: This method may throw an {@link ArithmeticException} if the result 450 * overflows an {@code int}. 451 * 452 * @param statistic Statistic. 453 * @return the value 454 * @throws IllegalArgumentException if the {@code statistic} is not supported 455 * @throws ArithmeticException if the {@code result} overflows an {@code int} or is not 456 * finite 457 * @see #isSupported(Statistic) 458 * @see #getResult(Statistic) 459 */ 460 public int getAsInt(Statistic statistic) { 461 return getResult(statistic).getAsInt(); 462 } 463 464 /** 465 * Gets the value of the specified {@code statistic} as a {@code long}. 466 * 467 * <p>Use this method to access the {@code long} result for exact integer statistics, 468 * for example {@link Statistic#SUM} for a {@link #getCount() count} less than or equal to 469 *2<sup>32</sup>. 470 * 471 * <p>Note: This method may throw an {@link ArithmeticException} if the result 472 * overflows an {@code long}. 473 * 474 * @param statistic Statistic. 475 * @return the value 476 * @throws IllegalArgumentException if the {@code statistic} is not supported 477 * @throws ArithmeticException if the {@code result} overflows an {@code long} or is not 478 * finite 479 * @see #isSupported(Statistic) 480 * @see #getResult(Statistic) 481 */ 482 public long getAsLong(Statistic statistic) { 483 return getResult(statistic).getAsLong(); 484 } 485 486 /** 487 * Gets the value of the specified {@code statistic} as a {@code BigInteger}. 488 * 489 * <p>Use this method to access the {@code BigInteger} result for exact integer statistics, 490 * for example {@link Statistic#SUM_OF_SQUARES}. 491 * 492 * <p>Note: This method may throw an {@link ArithmeticException} if the result 493 * is not finite. 494 * 495 * @param statistic Statistic. 496 * @return the value 497 * @throws IllegalArgumentException if the {@code statistic} is not supported 498 * @throws ArithmeticException if the {@code result} is not finite 499 * @see #isSupported(Statistic) 500 * @see #getResult(Statistic) 501 */ 502 public BigInteger getAsBigInteger(Statistic statistic) { 503 return getResult(statistic).getAsBigInteger(); 504 } 505 506 /** 507 * Gets a supplier for the value of the specified {@code statistic}. 508 * 509 * <p>The returned function will supply the correct result after 510 * calls to {@link #accept(int) accept} or 511 * {@link #combine(IntStatistics) combine} further values into 512 * {@code this} instance. 513 * 514 * <p>This method can be used to perform a one-time look-up of the statistic 515 * function to compute statistics as values are dynamically added. 516 * 517 * @param statistic Statistic. 518 * @return the supplier 519 * @throws IllegalArgumentException if the {@code statistic} is not supported 520 * @see #isSupported(Statistic) 521 * @see #getAsDouble(Statistic) 522 */ 523 public StatisticResult getResult(Statistic statistic) { 524 // Locate the implementation. 525 // Statistics that wrap an underlying implementation are created in methods. 526 // The return argument should be an interface reference and not an instance 527 // of IntStatistic. This ensures the statistic implementation cannot 528 // be updated with new values by casting the result and calling accept(int). 529 StatisticResult stat = null; 530 switch (statistic) { 531 case GEOMETRIC_MEAN: 532 stat = getGeometricMean(); 533 break; 534 case KURTOSIS: 535 stat = getKurtosis(); 536 break; 537 case MAX: 538 stat = Statistics.getResultAsIntOrNull(max); 539 break; 540 case MEAN: 541 stat = getMean(); 542 break; 543 case MIN: 544 stat = Statistics.getResultAsIntOrNull(min); 545 break; 546 case PRODUCT: 547 stat = Statistics.getResultAsDoubleOrNull(product); 548 break; 549 case SKEWNESS: 550 stat = getSkewness(); 551 break; 552 case STANDARD_DEVIATION: 553 stat = getStandardDeviation(); 554 break; 555 case SUM: 556 stat = Statistics.getResultAsBigIntegerOrNull(sum); 557 break; 558 case SUM_OF_LOGS: 559 stat = Statistics.getResultAsDoubleOrNull(sumOfLogs); 560 break; 561 case SUM_OF_SQUARES: 562 stat = Statistics.getResultAsBigIntegerOrNull(sumOfSquares); 563 break; 564 case VARIANCE: 565 stat = getVariance(); 566 break; 567 default: 568 break; 569 } 570 if (stat != null) { 571 return stat; 572 } 573 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic); 574 } 575 576 /** 577 * Gets the geometric mean. 578 * 579 * @return a geometric mean supplier (or null if unsupported) 580 */ 581 private StatisticResult getGeometricMean() { 582 if (sumOfLogs != null) { 583 // Return a function that has access to the count and sumOfLogs 584 return () -> GeometricMean.computeGeometricMean(count, sumOfLogs); 585 } 586 return null; 587 } 588 589 /** 590 * Gets the kurtosis. 591 * 592 * @return a kurtosis supplier (or null if unsupported) 593 */ 594 private StatisticResult getKurtosis() { 595 if (moment instanceof SumOfFourthDeviations) { 596 return new Kurtosis((SumOfFourthDeviations) moment) 597 .setBiased(config.isBiased())::getAsDouble; 598 } 599 return null; 600 } 601 602 /** 603 * Gets the mean. 604 * 605 * @return a mean supplier (or null if unsupported) 606 */ 607 private StatisticResult getMean() { 608 if (sum != null) { 609 // Return a function that has access to the count and sum 610 final Int128 s = sum.getSum(); 611 return () -> IntMean.computeMean(s, count); 612 } 613 return null; 614 } 615 616 /** 617 * Gets the skewness. 618 * 619 * @return a skewness supplier (or null if unsupported) 620 */ 621 private StatisticResult getSkewness() { 622 if (moment instanceof SumOfCubedDeviations) { 623 return new Skewness((SumOfCubedDeviations) moment) 624 .setBiased(config.isBiased())::getAsDouble; 625 } 626 return null; 627 } 628 629 /** 630 * Gets the standard deviation. 631 * 632 * @return a standard deviation supplier (or null if unsupported) 633 */ 634 private StatisticResult getStandardDeviation() { 635 return getVarianceOrStd(true); 636 } 637 638 /** 639 * Gets the variance. 640 * 641 * @return a variance supplier (or null if unsupported) 642 */ 643 private StatisticResult getVariance() { 644 return getVarianceOrStd(false); 645 } 646 647 /** 648 * Gets the variance or standard deviation. 649 * 650 * @param std Flag to control if the statistic is the standard deviation. 651 * @return a variance/standard deviation supplier (or null if unsupported) 652 */ 653 private StatisticResult getVarianceOrStd(boolean std) { 654 if (sum != null && sumOfSquares != null) { 655 // Return a function that has access to the count, sum and sum of squares 656 final Int128 s = sum.getSum(); 657 final UInt128 ss = sumOfSquares.getSumOfSquares(); 658 final boolean biased = config.isBiased(); 659 return () -> IntVariance.computeVarianceOrStd(ss, s, count, biased, std); 660 } 661 return null; 662 } 663 664 /** 665 * Combines the state of the {@code other} statistics into this one. 666 * Only {@code this} instance is modified by the {@code combine} operation. 667 * 668 * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the 669 * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for 670 * all values of the {@link Statistic} enum which are supported by {@code this} 671 * instance. 672 * 673 * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform 674 * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other} 675 * instance is not compatible then an exception is raised before any state is modified. 676 * 677 * @param other Another set of statistics to be combined. 678 * @return {@code this} instance after combining {@code other}. 679 * @throws IllegalArgumentException if the {@code other} is not compatible 680 */ 681 public IntStatistics combine(IntStatistics other) { 682 // Check compatibility 683 Statistics.checkCombineCompatible(min, other.min); 684 Statistics.checkCombineCompatible(max, other.max); 685 Statistics.checkCombineCompatible(sum, other.sum); 686 Statistics.checkCombineCompatible(product, other.product); 687 Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares); 688 Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs); 689 Statistics.checkCombineAssignable(moment, other.moment); 690 // Combine 691 count += other.count; 692 Statistics.combine(min, other.min); 693 Statistics.combine(max, other.max); 694 Statistics.combine(sum, other.sum); 695 Statistics.combine(product, other.product); 696 Statistics.combine(sumOfSquares, other.sumOfSquares); 697 Statistics.combine(sumOfLogs, other.sumOfLogs); 698 Statistics.combineMoment(moment, other.moment); 699 return this; 700 } 701 702 /** 703 * Sets the statistics configuration. 704 * 705 * <p>These options only control the final computation of statistics. The configuration 706 * will not affect compatibility between instances during a 707 * {@link #combine(IntStatistics) combine} operation. 708 * 709 * <p>Note: These options will affect any future computation of statistics. Supplier functions 710 * that have been previously created will not be updated with the new configuration. 711 * 712 * @param v Value. 713 * @return {@code this} instance 714 * @throws NullPointerException if the value is null 715 * @see #getResult(Statistic) 716 */ 717 public IntStatistics setConfiguration(StatisticsConfiguration v) { 718 config = Objects.requireNonNull(v); 719 return this; 720 } 721 }