1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 /** 20 * Computes the standard deviation of the available values. The default implementation uses the 21 * following definition of the <em>sample standard deviation</em>: 22 * 23 * <p>\[ \sqrt{ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 } \] 24 * 25 * <p>where \( \overline{x} \) is the sample mean, and \( n \) is the number of samples. 26 * 27 * <ul> 28 * <li>The result is {@code NaN} if no values are added. 29 * <li>The result is zero if there is one value in the data set. 30 * </ul> 31 * 32 * <p>The use of the term \( n − 1 \) is called Bessel's correction. Omitting the square root, 33 * this provides an unbiased estimator of the variance of a hypothetical infinite population. If the 34 * {@link #setBiased(boolean) biased} option is enabled the normalisation factor is 35 * changed to \( \frac{1}{n} \) for a biased estimator of the <em>sample variance</em>. 36 * Note however that square root is a concave function and thus introduces negative bias 37 * (by Jensen's inequality), which depends on the distribution, and thus the corrected sample 38 * standard deviation (using Bessel's correction) is less biased, but still biased. 39 * 40 * <p>The implementation uses an exact integer sum to compute the scaled (by \( n \)) 41 * sum of squared deviations from the mean; this is normalised by the scaled correction factor. 42 * 43 * <p>\[ \frac {n \times \sum_{i=1}^n x_i^2 - (\sum_{i=1}^n x_i)^2}{n \times (n - 1)} \] 44 * 45 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 46 * This implementation does not check for overflow of the count. 47 * 48 * <p>This class is designed to work with (though does not require) 49 * {@linkplain java.util.stream streams}. 50 * 51 * <p><strong>This implementation is not thread safe.</strong> 52 * If multiple threads access an instance of this class concurrently, 53 * and at least one of the threads invokes the {@link java.util.function.IntConsumer#accept(int) accept} or 54 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 55 * 56 * <p>However, it is safe to use {@link java.util.function.IntConsumer#accept(int) accept} 57 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 58 * as {@code accumulator} and {@code combiner} functions of 59 * {@link java.util.stream.Collector Collector} on a parallel stream, 60 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 61 * provides the necessary partitioning, isolation, and merging of results for 62 * safe and efficient parallel execution. 63 * 64 * @see <a href="https://en.wikipedia.org/wiki/Standard_deviation">Standard deviation (Wikipedia)</a> 65 * @see <a href="https://en.wikipedia.org/wiki/Bessel%27s_correction">Bessel's correction</a> 66 * @see <a href="https://en.wikipedia.org/wiki/Jensen%27s_inequality">Jensen's inequality</a> 67 * @see IntVariance 68 * @since 1.1 69 */ 70 public final class IntStandardDeviation implements IntStatistic, StatisticAccumulator<IntStandardDeviation> { 71 72 /** Sum of the squared values. */ 73 private final UInt128 sumSq; 74 /** Sum of the values. */ 75 private final Int128 sum; 76 /** Count of values that have been added. */ 77 private long n; 78 79 /** Flag to control if the statistic is biased, or should use a bias correction. */ 80 private boolean biased; 81 82 /** 83 * Create an instance. 84 */ 85 private IntStandardDeviation() { 86 this(UInt128.create(), Int128.create(), 0); 87 } 88 89 /** 90 * Create an instance. 91 * 92 * @param sumSq Sum of the squared values. 93 * @param sum Sum of the values. 94 * @param n Count of values that have been added. 95 */ 96 private IntStandardDeviation(UInt128 sumSq, Int128 sum, int n) { 97 this.sumSq = sumSq; 98 this.sum = sum; 99 this.n = n; 100 } 101 102 /** 103 * Creates an instance. 104 * 105 * <p>The initial result is {@code NaN}. 106 * 107 * @return {@code IntStandardDeviation} instance. 108 */ 109 public static IntStandardDeviation create() { 110 return new IntStandardDeviation(); 111 } 112 113 /** 114 * Returns an instance populated using the input {@code values}. 115 * 116 * @param values Values. 117 * @return {@code IntStandardDeviation} instance. 118 */ 119 public static IntStandardDeviation of(int... values) { 120 // Small arrays can be processed using the object 121 if (values.length < IntVariance.SMALL_SAMPLE) { 122 final IntStandardDeviation stat = new IntStandardDeviation(); 123 for (final int x : values) { 124 stat.accept(x); 125 } 126 return stat; 127 } 128 129 // Arrays can be processed using specialised counts knowing the maximum limit 130 // for an array is 2^31 values. 131 long s = 0; 132 final UInt96 ss = UInt96.create(); 133 // Process pairs as we know two maximum value int^2 will not overflow 134 // an unsigned long. 135 final int end = values.length & ~0x1; 136 for (int i = 0; i < end; i += 2) { 137 final long x = values[i]; 138 final long y = values[i + 1]; 139 s += x + y; 140 ss.addPositive(x * x + y * y); 141 } 142 if (end < values.length) { 143 final long x = values[end]; 144 s += x; 145 ss.addPositive(x * x); 146 } 147 148 // Convert 149 return new IntStandardDeviation(UInt128.of(ss), Int128.of(s), values.length); 150 } 151 152 /** 153 * Updates the state of the statistic to reflect the addition of {@code value}. 154 * 155 * @param value Value. 156 */ 157 @Override 158 public void accept(int value) { 159 sumSq.addPositive((long) value * value); 160 sum.add(value); 161 n++; 162 } 163 164 /** 165 * Gets the standard deviation of all input values. 166 * 167 * <p>When no values have been added, the result is {@code NaN}. 168 * 169 * @return standard deviation of all values. 170 */ 171 @Override 172 public double getAsDouble() { 173 return IntVariance.computeVarianceOrStd(sumSq, sum, n, biased, true); 174 } 175 176 @Override 177 public IntStandardDeviation combine(IntStandardDeviation other) { 178 sumSq.add(other.sumSq); 179 sum.add(other.sum); 180 n += other.n; 181 return this; 182 } 183 184 /** 185 * Sets the value of the biased flag. The default value is {@code false}. The bias 186 * term refers to the computation of the variance; the standard deviation is returned 187 * as the square root of the biased or unbiased <em>sample variance</em>. For further 188 * details see {@link IntVariance#setBiased(boolean) IntVarianceVariance.setBiased}. 189 * 190 * <p>This flag only controls the final computation of the statistic. The value of 191 * this flag will not affect compatibility between instances during a 192 * {@link #combine(IntStandardDeviation) combine} operation. 193 * 194 * @param v Value. 195 * @return {@code this} instance 196 * @see IntVariance#setBiased(boolean) 197 */ 198 public IntStandardDeviation setBiased(boolean v) { 199 biased = v; 200 return this; 201 } 202 }