1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 /** 20 * Computes the sum of squared deviations from the sample mean. This 21 * statistic is related to the second moment. 22 * 23 * <p>The following recursive updating formula is used: 24 * <p>Let 25 * <ul> 26 * <li> dev = (current obs - previous mean) </li> 27 * <li> n = number of observations (including current obs) </li> 28 * </ul> 29 * <p>Then 30 * <p>new value = old value + dev^2 * (n - 1) / n 31 * <p>returns the sum of squared deviations of all values seen so far. 32 * 33 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 34 * This implementation does not check for overflow of the count. 35 * 36 * <p><strong>Note that this implementation is not synchronized.</strong> If 37 * multiple threads access an instance of this class concurrently, and at least 38 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or 39 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 40 * 41 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} 42 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 43 * as {@code accumulator} and {@code combiner} functions of 44 * {@link java.util.stream.Collector Collector} on a parallel stream, 45 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 46 * provides the necessary partitioning, isolation, and merging of results for 47 * safe and efficient parallel execution. 48 * 49 * <p>References: 50 * <ul> 51 * <li>Chan, Golub and Levesque (1983) 52 * Algorithms for Computing the Sample Variance: Analysis and Recommendations. 53 * American Statistician, 37, 242-247. 54 * <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a> 55 * </ul> 56 * 57 * @since 1.1 58 */ 59 class SumOfSquaredDeviations extends FirstMoment { 60 /** Sum of squared deviations of the values that have been added. */ 61 protected double sumSquaredDev; 62 63 /** 64 * Create an instance. 65 */ 66 SumOfSquaredDeviations() { 67 // No-op 68 } 69 70 /** 71 * Copy constructor. 72 * 73 * @param source Source to copy. 74 */ 75 SumOfSquaredDeviations(SumOfSquaredDeviations source) { 76 super(source); 77 sumSquaredDev = source.sumSquaredDev; 78 } 79 80 /** 81 * Create an instance with the given sum of squared deviations and first moment. 82 * 83 * @param sumSquaredDev Sum of squared deviations. 84 * @param m1 First moment. 85 */ 86 private SumOfSquaredDeviations(double sumSquaredDev, FirstMoment m1) { 87 super(m1); 88 this.sumSquaredDev = sumSquaredDev; 89 } 90 91 /** 92 * Create an instance with the given sum of squared deviations and first moment. 93 * 94 * <p>This constructor is used when creating the moment from integer values. 95 * 96 * @param sumSquaredDev Sum of squared deviations. 97 * @param m1 First moment. 98 * @param n Count of values. 99 */ 100 SumOfSquaredDeviations(double sumSquaredDev, double m1, long n) { 101 super(m1, n); 102 this.sumSquaredDev = sumSquaredDev; 103 } 104 105 /** 106 * Returns an instance populated using the input {@code values}. 107 * 108 * <p>Note: {@code SumOfSquaredDeviations} computed using {@link #accept accept} may be 109 * different from this instance. 110 * 111 * @param values Values. 112 * @return {@code SumOfSquaredDeviations} instance. 113 */ 114 static SumOfSquaredDeviations of(double... values) { 115 if (values.length == 0) { 116 return new SumOfSquaredDeviations(); 117 } 118 return create(FirstMoment.of(values), values); 119 } 120 121 /** 122 * Creates the sum of squared deviations. 123 * 124 * <p>Uses the provided {@code sum} to create the first moment. 125 * This method is used by {@link DoubleStatistics} using a sum that can be reused 126 * for the {@link Sum} statistic. 127 * 128 * @param sum Sum of the values. 129 * @param values Values. 130 * @return {@code SumOfSquaredDeviations} instance. 131 */ 132 static SumOfSquaredDeviations create(org.apache.commons.numbers.core.Sum sum, double[] values) { 133 if (values.length == 0) { 134 return new SumOfSquaredDeviations(); 135 } 136 return create(FirstMoment.create(sum, values), values); 137 } 138 139 /** 140 * Creates the sum of squared deviations. 141 * 142 * @param m1 First moment. 143 * @param values Values. 144 * @return {@code SumOfSquaredDeviations} instance. 145 */ 146 private static SumOfSquaredDeviations create(FirstMoment m1, double[] values) { 147 // "Corrected two-pass algorithm" 148 // See: Chan et al (1983) Equation 1.7 149 150 final double xbar = m1.getFirstMoment(); 151 if (!Double.isFinite(xbar)) { 152 return new SumOfSquaredDeviations(Double.NaN, m1); 153 } 154 double s = 0; 155 double ss = 0; 156 for (final double x : values) { 157 final double dx = x - xbar; 158 s += dx; 159 ss += dx * dx; 160 } 161 // The sum of squared deviations is ss - (s * s / n). 162 // The second term ideally should be zero; in practice it is a good approximation 163 // of the error in the first term. 164 // To prevent sumSquaredDev from spuriously attaining a NaN value 165 // when ss is infinite, assign it an infinite value which is its intended value. 166 final double sumSquaredDev = ss == Double.POSITIVE_INFINITY ? 167 Double.POSITIVE_INFINITY : 168 ss - (s * s / values.length); 169 return new SumOfSquaredDeviations(sumSquaredDev, m1); 170 } 171 172 /** 173 * Updates the state of the statistic to reflect the addition of {@code value}. 174 * 175 * @param value Value. 176 */ 177 @Override 178 public void accept(double value) { 179 // "Updating one-pass algorithm" 180 // See: Chan et al (1983) Equation 1.3b 181 super.accept(value); 182 // Note: account for the half-deviation representation by scaling by 4=2^2 183 sumSquaredDev += (n - 1) * dev * nDev * 4; 184 } 185 186 /** 187 * Gets the sum of squared deviations of all input values. 188 * 189 * @return sum of squared deviations of all values. 190 */ 191 double getSumOfSquaredDeviations() { 192 return Double.isFinite(getFirstMoment()) ? sumSquaredDev : Double.NaN; 193 } 194 195 /** 196 * Combines the state of another {@code SumOfSquaredDeviations} into this one. 197 * 198 * @param other Another {@code SumOfSquaredDeviations} to be combined. 199 * @return {@code this} instance after combining {@code other}. 200 */ 201 SumOfSquaredDeviations combine(SumOfSquaredDeviations other) { 202 final long m = other.n; 203 if (n == 0) { 204 sumSquaredDev = other.sumSquaredDev; 205 } else if (m != 0) { 206 // "Updating one-pass algorithm" 207 // See: Chan et al (1983) Equation 1.5b (modified for the mean) 208 final double diffOfMean = getFirstMomentDifference(other); 209 final double sqDiffOfMean = diffOfMean * diffOfMean; 210 // Enforce symmetry 211 sumSquaredDev = (sumSquaredDev + other.sumSquaredDev) + 212 sqDiffOfMean * (((double) n * m) / ((double) n + m)); 213 } 214 super.combine(other); 215 return this; 216 } 217 }