1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.statistics.descriptive; 18 19 /** 20 * Computes the sum of fourth deviations from the sample mean. This 21 * statistic is related to the fourth moment. 22 * 23 * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 16. 24 * Note that third term in that equation has been corrected by expansion of the same term 25 * from equation 15. Two sum of fourth (quad) deviations (Sq) can be combined using: 26 * 27 * <p>\[ Sq(X) = {Sq}_1 + {Sq}_2 + \frac{4(m_1 - m_2)(g_1 - g_2) N_1 N_2}{N_1 + N_2} 28 * + \frac{6(m_1 - m_2)^2(N_2^2 ss_1 + N_1^2 ss_2)}{(N_1 + N_2)^2} 29 * + \frac{(m_1 - m_2)^4((N_1^2 - N_1 N_2 + N_2^2) N_1 N_2}{(N_1 + N_2)^3} \] 30 * 31 * <p>where \( N \) is the group size, \( m \) is the mean, \( ss \) is 32 * the sum of squared deviations from the mean, and \( g \) 33 * is the asymmetrical index where \( g * N \) is the sum of fourth deviations from the mean. 34 * Note the term \( ({g_1} - {g_2}) N_1 N_2 == (sc_1 * N_2 - sc_2 * N_1 \) 35 * where \( sc \) is the sum of fourth deviations. 36 * 37 * <p>If \( N_1 \) is size 1 this reduces to: 38 * 39 * <p>\[ SC_{N+1} = {SC}_N + \frac{4(x - m) -sc}{N + 1} 40 * + \frac{6(x - m)^2 ss}{(N + 1)^2} 41 * + \frac{(x - m)^4((1 - N + N^2) N}{(N + 1)^3} \] 42 * 43 * <p>where \( ss \) is the sum of squared deviations, and \( sc \) is the sum of 44 * fourth deviations. This updating formula is identical to that used in 45 * {@code org.apache.commons.math3.stat.descriptive.moment.FourthMoment}. The final term 46 * uses a rearrangement \( (1 - N + N^2) = (N+1)^2 - 3N \). 47 * 48 * <p>Supports up to 2<sup>63</sup> (exclusive) observations. 49 * This implementation does not check for overflow of the count. 50 * 51 * <p><strong>Note that this implementation is not synchronized.</strong> If 52 * multiple threads access an instance of this class concurrently, and at least 53 * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or 54 * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally. 55 * 56 * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} 57 * and {@link StatisticAccumulator#combine(StatisticResult) combine} 58 * as {@code accumulator} and {@code combiner} functions of 59 * {@link java.util.stream.Collector Collector} on a parallel stream, 60 * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} 61 * provides the necessary partitioning, isolation, and merging of results for 62 * safe and efficient parallel execution. 63 * 64 * <p>References: 65 * <ul> 66 * <li>Manca and Marin (2020) 67 * Decomposition of the Sum of Cubes, the Sum Raised to the 68 * Power of Four and Codeviance. 69 * Applied Mathematics, 11, 1013-1020. 70 * <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a> 71 * </ul> 72 * 73 * @since 1.1 74 */ 75 class SumOfFourthDeviations extends SumOfCubedDeviations { 76 /** Sum of forth deviations of the values that have been added. */ 77 private double sumFourthDev; 78 79 /** 80 * Create an instance. 81 */ 82 SumOfFourthDeviations() { 83 // No-op 84 } 85 86 /** 87 * Create an instance with the given sum of fourth and squared deviations. 88 * 89 * @param sq Sum of fourth (quad) deviations. 90 * @param sc Sum of fourth deviations. 91 */ 92 private SumOfFourthDeviations(double sq, SumOfCubedDeviations sc) { 93 super(sc); 94 this.sumFourthDev = sq; 95 } 96 97 /** 98 * Create an instance with the given sum of cubed and squared deviations, 99 * and first moment. 100 * 101 * @param sq Sum of fouth deviations. 102 * @param sc Sum of cubed deviations. 103 * @param ss Sum of squared deviations. 104 * @param m1 First moment. 105 * @param n Count of values. 106 */ 107 private SumOfFourthDeviations(double sq, double sc, double ss, double m1, long n) { 108 super(sc, ss, m1, n); 109 this.sumFourthDev = sq; 110 } 111 112 /** 113 * Returns an instance populated using the input {@code values}. 114 * 115 * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept accept} may be 116 * different from this instance. 117 * 118 * @param values Values. 119 * @return {@code SumOfFourthDeviations} instance. 120 */ 121 static SumOfFourthDeviations of(double... values) { 122 if (values.length == 0) { 123 return new SumOfFourthDeviations(); 124 } 125 return create(SumOfCubedDeviations.of(values), values); 126 } 127 128 /** 129 * Creates the sum of fourth deviations. 130 * 131 * <p>Uses the provided {@code sum} to create the first moment. 132 * This method is used by {@link DoubleStatistics} using a sum that can be reused 133 * for the {@link Sum} statistic. 134 * 135 * @param sum Sum of the values. 136 * @param values Values. 137 * @return {@code SumOfFourthDeviations} instance. 138 */ 139 static SumOfFourthDeviations create(org.apache.commons.numbers.core.Sum sum, double[] values) { 140 if (values.length == 0) { 141 return new SumOfFourthDeviations(); 142 } 143 return create(SumOfCubedDeviations.create(sum, values), values); 144 } 145 146 /** 147 * Creates the sum of fourth deviations. 148 * 149 * @param sc Sum of cubed deviations. 150 * @param values Values. 151 * @return {@code SumOfFourthDeviations} instance. 152 */ 153 private static SumOfFourthDeviations create(SumOfCubedDeviations sc, double[] values) { 154 // Edge cases 155 final double xbar = sc.getFirstMoment(); 156 if (!Double.isFinite(xbar) || 157 !Double.isFinite(sc.sumSquaredDev) || 158 !Double.isFinite(sc.sumCubedDev)) { 159 // Overflow computing lower order deviations will overflow 160 return new SumOfFourthDeviations(Double.NaN, sc); 161 } 162 // Compute the sum of fourth (quad) deviations. 163 // Note: This handles n=1. 164 double s = 0; 165 for (final double x : values) { 166 s += pow4(x - xbar); 167 } 168 return new SumOfFourthDeviations(s, sc); 169 } 170 171 /** 172 * Compute {@code x^4}. 173 * Uses compound multiplication. 174 * 175 * @param x Value. 176 * @return x^4 177 */ 178 private static double pow4(double x) { 179 final double x2 = x * x; 180 return x2 * x2; 181 } 182 183 /** 184 * Returns an instance populated using the input {@code values}. 185 * 186 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 187 * different from this instance. 188 * 189 * @param values Values. 190 * @return {@code SumOfCubedDeviations} instance. 191 */ 192 static SumOfFourthDeviations of(int... values) { 193 // Logic shared with the double[] version with int[] lower order moments 194 if (values.length == 0) { 195 return new SumOfFourthDeviations(); 196 } 197 final IntVariance variance = IntVariance.of(values); 198 final double xbar = variance.computeMean(); 199 final double ss = variance.computeSumOfSquaredDeviations(); 200 // Unlike the double[] case, overflow/NaN is not possible: 201 // (max value)^4 times max array length ~ (2^31)^4 * 2^31 ~ 2^155. 202 // Compute sum of cubed and fourth deviations together. 203 double sc = 0; 204 double sq = 0; 205 for (final double y : values) { 206 final double x = y - xbar; 207 final double x2 = x * x; 208 sc += x2 * x; 209 sq += x2 * x2; 210 } 211 // Edge case to avoid floating-point error for zero 212 if (values.length <= LENGTH_TWO) { 213 sc = 0; 214 } 215 return new SumOfFourthDeviations(sq, sc, ss, xbar, values.length); 216 } 217 218 /** 219 * Returns an instance populated using the input {@code values}. 220 * 221 * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be 222 * different from this instance. 223 * 224 * @param values Values. 225 * @return {@code SumOfCubedDeviations} instance. 226 */ 227 static SumOfFourthDeviations of(long... values) { 228 // Logic shared with the double[] version with long[] lower order moments 229 if (values.length == 0) { 230 return new SumOfFourthDeviations(); 231 } 232 final LongVariance variance = LongVariance.of(values); 233 final double xbar = variance.computeMean(); 234 final double ss = variance.computeSumOfSquaredDeviations(); 235 // Unlike the double[] case, overflow/NaN is not possible: 236 // (max value)^4 times max array length ~ (2^63)^4 * 2^31 ~ 2^283. 237 // Compute sum of cubed and fourth deviations together. 238 double sc = 0; 239 double sq = 0; 240 for (final double y : values) { 241 final double x = y - xbar; 242 final double x2 = x * x; 243 sc += x2 * x; 244 sq += x2 * x2; 245 } 246 // Edge case to avoid floating-point error for zero 247 if (values.length <= LENGTH_TWO) { 248 sc = 0; 249 } 250 return new SumOfFourthDeviations(sq, sc, ss, xbar, values.length); 251 } 252 253 /** 254 * Updates the state of the statistic to reflect the addition of {@code value}. 255 * 256 * @param value Value. 257 */ 258 @Override 259 public void accept(double value) { 260 // Require current s^2 * N == sum-of-square deviations 261 // Require current g * N == sum-of-fourth deviations 262 final double ss = sumSquaredDev; 263 final double sc = sumCubedDev; 264 final double np = n; 265 super.accept(value); 266 // Terms are arranged so that values that may be zero 267 // (np, ss, sc) are first. This will cancel any overflow in 268 // multiplication of later terms (nDev * 4, nDev^2, nDev^4). 269 // This handles initialisation when np in {0, 1) to zero 270 // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE). 271 // Note: (np1 * np1 - 3 * np) = (np+1)^2 - 3np = np^2 - np + 1 272 // Note: account for the half-deviation representation by scaling by 8=4*2; 24=6*2^2; 16=2^4 273 final double np1 = n; 274 sumFourthDev = sumFourthDev - 275 sc * nDev * 8 + 276 ss * nDev * nDev * 24 + 277 np * (np1 * np1 - 3 * np) * nDev * nDev * nDev * dev * 16; 278 } 279 280 /** 281 * Gets the sum of fourth deviations of all input values. 282 * 283 * <p>Note that the result should be positive. However the updating sum is subject to 284 * cancellation of potentially large positive and negative terms. Overflow of these 285 * terms can result in a sum of opposite signed infinities and a {@code NaN} result 286 * for finite input values where the correct result is positive infinity. 287 * 288 * <p>Note: Any non-finite result should be considered a failed computation. The 289 * result is returned as computed and not consolidated to a single NaN. This is done 290 * for testing purposes to allow the result to be reported. It is possible to track 291 * input values to finite/non-finite (e.g. using bit mask manipulation of the exponent 292 * field). However this statistic in currently used in the kurtosis and in the case 293 * of failed computation distinguishing a non-finite result is not useful. 294 * 295 * @return sum of fourth deviations of all values. 296 */ 297 double getSumOfFourthDeviations() { 298 return Double.isFinite(getFirstMoment()) ? sumFourthDev : Double.NaN; 299 } 300 301 /** 302 * Combines the state of another {@code SumOfFourthDeviations} into this one. 303 * 304 * @param other Another {@code SumOfFourthDeviations} to be combined. 305 * @return {@code this} instance after combining {@code other}. 306 */ 307 SumOfFourthDeviations combine(SumOfFourthDeviations other) { 308 if (n == 0) { 309 sumFourthDev = other.sumFourthDev; 310 } else if (other.n != 0) { 311 // Avoid overflow to compute the difference. 312 final double halfDiffOfMean = getFirstMomentHalfDifference(other); 313 sumFourthDev += other.sumFourthDev; 314 // Add additional terms that do not cancel to zero 315 if (halfDiffOfMean != 0) { 316 final double n1 = n; 317 final double n2 = other.n; 318 if (n1 == n2) { 319 // Optimisation where sizes are equal in double-precision. 320 // This is of use in JDK streams as spliterators use a divide by two 321 // strategy for parallel streams. 322 // Note: (n1 * n2) * ((n1+n2)^2 - 3 * (n1 * n2)) == n^4 323 sumFourthDev += 324 (sumCubedDev - other.sumCubedDev) * halfDiffOfMean * 4 + 325 (sumSquaredDev + other.sumSquaredDev) * (halfDiffOfMean * halfDiffOfMean) * 6 + 326 pow4(halfDiffOfMean) * n1 * 2; 327 } else { 328 final double n1n2 = n1 + n2; 329 final double dm = 2 * (halfDiffOfMean / n1n2); 330 // Use the rearrangement for parity with the accept method 331 // n1*n1 - n1*n2 + n2*n2 == (n1+n2)^2 - 3*n1*n2 332 sumFourthDev += 333 (sumCubedDev * n2 - other.sumCubedDev * n1) * dm * 4 + 334 (n2 * n2 * sumSquaredDev + n1 * n1 * other.sumSquaredDev) * (dm * dm) * 6 + 335 (n1 * n2) * (n1n2 * n1n2 - 3 * (n1 * n2)) * pow4(dm) * n1n2; 336 } 337 } 338 } 339 super.combine(other); 340 return this; 341 } 342 }