1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math4.legacy.stat.descriptive.rank; 18 19 import java.util.Arrays; 20 import java.util.BitSet; 21 22 import org.apache.commons.numbers.core.Precision; 23 import org.apache.commons.numbers.arrays.SortInPlace; 24 import org.apache.commons.math4.legacy.exception.NullArgumentException; 25 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException; 26 import org.apache.commons.math4.legacy.exception.NotPositiveException; 27 import org.apache.commons.math4.legacy.exception.NumberIsTooLargeException; 28 import org.apache.commons.math4.legacy.exception.OutOfRangeException; 29 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 30 import org.apache.commons.math4.legacy.stat.descriptive.AbstractUnivariateStatistic; 31 import org.apache.commons.math4.legacy.stat.ranking.NaNStrategy; 32 import org.apache.commons.math4.core.jdkmath.JdkMath; 33 import org.apache.commons.math4.legacy.core.MathArrays; 34 35 /** 36 * Provides percentile computation. 37 * <p> 38 * There are several commonly used methods for estimating percentiles (a.k.a. 39 * quantiles) based on sample data. For large samples, the different methods 40 * agree closely, but when sample sizes are small, different methods will give 41 * significantly different results. The algorithm implemented here works as follows: 42 * <ol> 43 * <li>Let <code>n</code> be the length of the (sorted) array and 44 * <code>0 < p <= 100</code> be the desired percentile.</li> 45 * <li>If <code> n = 1 </code> return the unique array element (regardless of 46 * the value of <code>p</code>); otherwise </li> 47 * <li>Compute the estimated percentile position 48 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> 49 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional 50 * part of <code>pos</code>).</li> 51 * <li> If <code>pos < 1</code> return the smallest element in the array.</li> 52 * <li> Else if <code>pos >= n</code> return the largest element in the array.</li> 53 * <li> Else let <code>lower</code> be the element in position 54 * <code>floor(pos)</code> in the array and let <code>upper</code> be the 55 * next element in the array. Return <code>lower + d * (upper - lower)</code> 56 * </li> 57 * </ol> 58 * <p> 59 * To compute percentiles, the data must be at least partially ordered. Input 60 * arrays are copied and recursively partitioned using an ordering definition. 61 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined 62 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes 63 * <code>Double.NaN</code> larger than any other value (including 64 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median 65 * (50th percentile) of 66 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p> 67 * <p> 68 * Since percentile estimation usually involves interpolation between array 69 * elements, arrays containing <code>NaN</code> or infinite values will often 70 * result in <code>NaN</code> or infinite values returned.</p> 71 * <p> 72 * Further, to include different estimation types such as R1, R2 as mentioned in 73 * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>, 74 * a type specific NaN handling strategy is used to closely match with the 75 * typically observed results from popular tools like R(R1-R9), Excel(R7).</p> 76 * <p> 77 * Since 2.2, Percentile uses only selection instead of complete sorting 78 * and caches selection algorithm state between calls to the various 79 * {@code evaluate} methods. This greatly improves efficiency, both for a single 80 * percentile and multiple percentile computations. To maximize performance when 81 * multiple percentiles are computed based on the same data, users should set the 82 * data array once using either one of the {@link #evaluate(double[], double)} or 83 * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)} 84 * with just the percentile provided. 85 * </p> 86 * <p> 87 * <strong>Note that this implementation is not synchronized.</strong> If 88 * multiple threads access an instance of this class concurrently, and at least 89 * one of the threads invokes the <code>increment()</code> or 90 * <code>clear()</code> method, it must be synchronized externally.</p> 91 */ 92 public class Percentile extends AbstractUnivariateStatistic { 93 /** Maximum number of partitioning pivots cached (each level double the number of pivots). */ 94 private static final int MAX_CACHED_LEVELS = 10; 95 96 /** Maximum number of cached pivots in the pivots cached array. */ 97 private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1; 98 99 /** Default KthSelector used with default pivoting strategy. */ 100 private final KthSelector kthSelector; 101 102 /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */ 103 private final EstimationType estimationType; 104 105 /** NaN Handling of the input as defined by {@link NaNStrategy}. */ 106 private final NaNStrategy nanStrategy; 107 108 /** 109 * Determines what percentile is computed when evaluate() is activated 110 * with no quantile argument. 111 */ 112 private double quantile; 113 114 /** Cached pivots. */ 115 private int[] cachedPivots; 116 117 /** Weight. */ 118 private double[] weights; 119 /** 120 * Constructs a Percentile with the following defaults. 121 * <ul> 122 * <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li> 123 * <li>default estimation type: {@link EstimationType#LEGACY}, 124 * can be reset with {@link #withEstimationType(EstimationType)}</li> 125 * <li>default NaN strategy: {@link NaNStrategy#REMOVED}, 126 * can be reset with {@link #withNaNStrategy(NaNStrategy)}</li> 127 * <li>a KthSelector that makes use of {@link MedianOf3PivotingStrategy}, 128 * can be reset with {@link #withKthSelector(KthSelector)}</li> 129 * </ul> 130 */ 131 public Percentile() { 132 // No try-catch or advertised exception here - arg is valid 133 this(50.0); 134 } 135 136 /** 137 * Constructs a Percentile with the specific quantile value and the following. 138 * <ul> 139 * <li>default method type: {@link EstimationType#LEGACY}</li> 140 * <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li> 141 * <li>a Kth Selector : {@link KthSelector}</li> 142 * </ul> 143 * @param quantile the quantile 144 * @throws MathIllegalArgumentException if p is not greater than 0 and less 145 * than or equal to 100 146 */ 147 public Percentile(final double quantile) { 148 this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED, 149 new KthSelector(new MedianOf3PivotingStrategy())); 150 } 151 152 /** 153 * Copy constructor, creates a new {@code Percentile} identical. 154 * to the {@code original} 155 * 156 * @param original the {@code Percentile} instance to copy. 157 * Cannot be {@code null}. 158 */ 159 public Percentile(final Percentile original) { 160 NullArgumentException.check(original); 161 estimationType = original.getEstimationType(); 162 nanStrategy = original.getNaNStrategy(); 163 kthSelector = original.getKthSelector(); 164 165 setData(original.getDataRef()); 166 if (original.cachedPivots != null) { 167 System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length); 168 } 169 setQuantile(original.quantile); 170 } 171 172 /** 173 * Constructs a Percentile with the specific quantile value, 174 * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}. 175 * 176 * @param quantile the quantile to be computed 177 * @param estimationType one of the percentile {@link EstimationType estimation types} 178 * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs. 179 * Cannot be {@code null}. 180 * @param kthSelector a {@link KthSelector} to use for pivoting during search 181 * @throws MathIllegalArgumentException if p is not within (0,100] 182 */ 183 protected Percentile(final double quantile, 184 final EstimationType estimationType, 185 final NaNStrategy nanStrategy, 186 final KthSelector kthSelector) { 187 setQuantile(quantile); 188 cachedPivots = null; 189 NullArgumentException.check(estimationType); 190 NullArgumentException.check(nanStrategy); 191 NullArgumentException.check(kthSelector); 192 this.estimationType = estimationType; 193 this.nanStrategy = nanStrategy; 194 this.kthSelector = kthSelector; 195 } 196 197 /** {@inheritDoc} */ 198 @Override 199 public void setData(final double[] values) { 200 if (values == null) { 201 cachedPivots = null; 202 } else { 203 cachedPivots = new int[PIVOTS_HEAP_LENGTH]; 204 Arrays.fill(cachedPivots, -1); 205 } 206 super.setData(values); 207 } 208 /** 209 * Set the data array. 210 * @param values Data array. 211 * Cannot be {@code null}. 212 * @param sampleWeights corresponding positive and non-NaN weights. 213 * Cannot be {@code null}. 214 * @throws MathIllegalArgumentException if lengths of values and weights are not equal. 215 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN 216 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive 217 */ 218 public void setData(final double[] values, 219 final double[] sampleWeights) { 220 setData(values, sampleWeights, 0, values.length); 221 } 222 223 /** {@inheritDoc} */ 224 @Override 225 public void setData(final double[] values, final int begin, final int length) { 226 if (values == null) { 227 cachedPivots = null; 228 } else { 229 cachedPivots = new int[PIVOTS_HEAP_LENGTH]; 230 Arrays.fill(cachedPivots, -1); 231 } 232 super.setData(values, begin, length); 233 } 234 /** 235 * Set the data and weights arrays. The input array is copied, not referenced. 236 * @param values Data array. 237 * Cannot be {@code null}. 238 * @param sampleWeights corresponding positive and non-NaN weights. 239 * Cannot be {@code null}. 240 * @param begin the index of the first element to include 241 * @param length the number of elements to include 242 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null 243 * @throws NotPositiveException if begin or length is not positive 244 * @throws NumberIsTooLargeException if begin + length is greater than values.length 245 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN 246 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive 247 */ 248 public void setData(final double[] values, 249 final double[] sampleWeights, 250 final int begin, 251 final int length) { 252 if (begin < 0) { 253 throw new NotPositiveException(LocalizedFormats.START_POSITION, begin); 254 } 255 256 if (length < 0) { 257 throw new NotPositiveException(LocalizedFormats.LENGTH, length); 258 } 259 260 if (begin + length > values.length) { 261 throw new NumberIsTooLargeException(LocalizedFormats.SUBARRAY_ENDS_AFTER_ARRAY_END, 262 begin + length, values.length, true); 263 } 264 265 if (sampleWeights == null) { 266 throw new MathIllegalArgumentException(LocalizedFormats.NULL_NOT_ALLOWED); 267 } 268 cachedPivots = new int[PIVOTS_HEAP_LENGTH]; 269 Arrays.fill(cachedPivots, -1); 270 271 // Check length 272 if (values.length != sampleWeights.length) { 273 throw new MathIllegalArgumentException(LocalizedFormats.LENGTH, 274 values, sampleWeights); 275 } 276 // Check weights > 0 277 MathArrays.checkPositive(sampleWeights); 278 MathArrays.checkNotNaN(sampleWeights); 279 280 super.setData(values, begin, length); 281 weights = new double[length]; 282 System.arraycopy(sampleWeights, begin, weights, 0, length); 283 } 284 /** 285 * Returns the result of evaluating the statistic over the stored data. 286 * If weights have been set, it will compute weighted percentile. 287 * <p> 288 * The stored array is the one which was set by previous calls to 289 * {@link #setData(double[])} or {@link #setData(double[], double[], int, int)} 290 * </p> 291 * @param p the percentile value to compute 292 * @return the value of the statistic applied to the stored data 293 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null 294 * @throws NotPositiveException if begin, length is negative 295 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive 296 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN 297 * @throws OutOfRangeException if p is invalid 298 * @throws NumberIsTooLargeException if begin + length is greater than values.length 299 * (p must be greater than 0 and less than or equal to 100) 300 */ 301 public double evaluate(final double p) { 302 if (weights == null) { 303 return evaluate(getDataRef(), p); 304 } else { 305 return evaluate(getDataRef(), weights, p); 306 } 307 } 308 309 /** 310 * Returns an estimate of the <code>p</code>th percentile of the values 311 * in the <code>values</code> array. 312 * <p> 313 * Calls to this method do not modify the internal <code>quantile</code> 314 * state of this statistic.</p> 315 * <ul> 316 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 317 * <code>0</code></li> 318 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 319 * if <code>values</code> has length <code>1</code></li> 320 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 321 * is null or p is not a valid quantile value (p must be greater than 0 322 * and less than or equal to 100) </li> 323 * </ul> 324 * <p> 325 * See {@link Percentile} for a description of the percentile estimation 326 * algorithm used.</p> 327 * 328 * @param values input array of values 329 * @param p the percentile value to compute 330 * @return the percentile value or Double.NaN if the array is empty 331 * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid 332 */ 333 public double evaluate(final double[] values, final double p) { 334 MathArrays.verifyValues(values, 0, 0); 335 return evaluate(values, 0, values.length, p); 336 } 337 /** 338 * Returns an estimate of the <code>p</code>th percentile of the values 339 * in the <code>values</code> array with their weights. 340 * <p> 341 * See {@link Percentile} for a description of the percentile estimation 342 * algorithm used.</p> 343 * @param values input array of values 344 * @param sampleWeights weights of values 345 * @param p the percentile value to compute 346 * @return the weighted percentile value or Double.NaN if the array is empty 347 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null 348 * @throws NotPositiveException if begin, length is negative 349 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive 350 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN 351 * @throws OutOfRangeException if p is invalid 352 * @throws NumberIsTooLargeException if begin + length is greater than values.length 353 */ 354 public double evaluate(final double[] values, final double[] sampleWeights, final double p) { 355 MathArrays.verifyValues(values, 0, 0); 356 MathArrays.verifyValues(sampleWeights, 0, 0); 357 return evaluate(values, sampleWeights, 0, values.length, p); 358 } 359 360 /** 361 * Returns an estimate of the <code>quantile</code>th percentile of the 362 * designated values in the <code>values</code> array. The quantile 363 * estimated is determined by the <code>quantile</code> property. 364 * <ul> 365 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 366 * <li>Returns (for any value of <code>quantile</code>) 367 * <code>values[begin]</code> if <code>length = 1 </code></li> 368 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 369 * is null, or <code>start</code> or <code>length</code> is invalid</li> 370 * </ul> 371 * <p> 372 * See {@link Percentile} for a description of the percentile estimation 373 * algorithm used.</p> 374 * 375 * @param values the input array 376 * @param start index of the first array element to include 377 * @param length the number of elements to include 378 * @return the percentile value 379 * @throws MathIllegalArgumentException if the parameters are not valid 380 * 381 */ 382 @Override 383 public double evaluate(final double[] values, final int start, final int length) { 384 return evaluate(values, start, length, quantile); 385 } 386 /** 387 * Returns an estimate of the weighted <code>quantile</code>th percentile of the 388 * designated values in the <code>values</code> array. The quantile 389 * estimated is determined by the <code>quantile</code> property. 390 * <p> 391 * See {@link Percentile} for a description of the percentile estimation 392 * algorithm used.</p> 393 * 394 * @param values the input array 395 * @param sampleWeights the weights of values 396 * @param start index of the first array element to include 397 * @param length the number of elements to include 398 * @return the percentile value 399 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null 400 * @throws NotPositiveException if begin, length is negative 401 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive 402 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN 403 * @throws OutOfRangeException if p is invalid 404 * @throws NumberIsTooLargeException if begin + length is greater than values.length 405 */ 406 public double evaluate(final double[] values, final double[] sampleWeights, 407 final int start, final int length) { 408 return evaluate(values, sampleWeights, start, length, quantile); 409 } 410 411 /** 412 * Returns an estimate of the <code>p</code>th percentile of the values 413 * in the <code>values</code> array, starting with the element in (0-based) 414 * position <code>begin</code> in the array and including <code>length</code> 415 * values. 416 * <p> 417 * Calls to this method do not modify the internal <code>quantile</code> 418 * state of this statistic.</p> 419 * <ul> 420 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 421 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 422 * if <code>length = 1 </code></li> 423 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 424 * is null , <code>begin</code> or <code>length</code> is invalid, or 425 * <code>p</code> is not a valid quantile value (p must be greater than 0 426 * and less than or equal to 100)</li> 427 * </ul> 428 * <p> 429 * See {@link Percentile} for a description of the percentile estimation 430 * algorithm used.</p> 431 * 432 * @param values array of input values 433 * @param p the percentile to compute 434 * @param begin the first (0-based) element to include in the computation 435 * @param length the number of array elements to include 436 * @return the percentile value. 437 * @throws MathIllegalArgumentException if the parameters are not valid. 438 */ 439 public double evaluate(final double[] values, final int begin, 440 final int length, final double p) { 441 MathArrays.verifyValues(values, begin, length); 442 if (p > 100 || p <= 0) { 443 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, 444 p, 0, 100); 445 } 446 if (length == 0) { 447 return Double.NaN; 448 } 449 if (length == 1) { 450 return values[begin]; // always return single value for n = 1 451 } 452 453 final double[] work = getWorkArray(values, begin, length); 454 final int[] pivotsHeap = getPivots(values); 455 return work.length == 0 ? 456 Double.NaN : 457 estimationType.evaluate(work, pivotsHeap, p, kthSelector); 458 } 459 /** 460 * Returns an estimate of the <code>p</code>th percentile of the values 461 * in the <code>values</code> array with <code>sampleWeights</code>, starting with the element in (0-based) 462 * position <code>begin</code> in the array and including <code>length</code> 463 * values. 464 * <p> 465 * See {@link Percentile} for a description of the percentile estimation 466 * algorithm used.</p> 467 * 468 * @param values array of input values 469 * @param sampleWeights positive and non-NaN weights of values 470 * @param begin the first (0-based) element to include in the computation 471 * @param length the number of array elements to include 472 * @param p percentile to compute 473 * @return the weighted percentile value 474 * @throws MathIllegalArgumentException if lengths of values and weights are not equal or values or weights is null 475 * @throws NotPositiveException if begin, length is negative 476 * @throws org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException if any weight is not positive 477 * @throws org.apache.commons.math4.legacy.exception.NotANumberException if any weight is NaN 478 * @throws OutOfRangeException if p is invalid 479 * @throws NumberIsTooLargeException if begin + length is greater than values.length 480 */ 481 public double evaluate(final double[] values, final double[] sampleWeights, final int begin, 482 final int length, final double p) { 483 if (values == null || sampleWeights == null) { 484 throw new MathIllegalArgumentException(LocalizedFormats.NULL_NOT_ALLOWED); 485 } 486 // Check length 487 if (values.length != sampleWeights.length) { 488 throw new MathIllegalArgumentException(LocalizedFormats.LENGTH, 489 values, sampleWeights); 490 } 491 MathArrays.verifyValues(values, begin, length); 492 MathArrays.verifyValues(sampleWeights, begin, length); 493 MathArrays.checkPositive(sampleWeights); 494 MathArrays.checkNotNaN(sampleWeights); 495 496 if (p > 100 || p <= 0) { 497 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, 498 p, 0, 100); 499 } 500 if (length == 0) { 501 return Double.NaN; 502 } 503 if (length == 1) { 504 // Always return single value for n = 1 505 return values[begin]; 506 } 507 508 final double[] work = getWorkArray(values, begin, length); 509 final double[] workWeights = getWorkArray(values, sampleWeights, begin, length); 510 return work.length == 0 ? Double.NaN : 511 estimationType.evaluate(work, workWeights, p); 512 } 513 /** 514 * Returns the value of the quantile field (determines what percentile is 515 * computed when evaluate() is called with no quantile argument). 516 * 517 * @return quantile set while construction or {@link #setQuantile(double)} 518 */ 519 public double getQuantile() { 520 return quantile; 521 } 522 523 /** 524 * Sets the value of the quantile field (determines what percentile is 525 * computed when evaluate() is called with no quantile argument). 526 * 527 * @param p a value between 0 < p <= 100 528 * @throws MathIllegalArgumentException if p is not greater than 0 and less 529 * than or equal to 100 530 */ 531 public void setQuantile(final double p) { 532 if (p <= 0 || p > 100) { 533 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, 534 p, 0, 100); 535 } 536 quantile = p; 537 } 538 539 /** 540 * {@inheritDoc} 541 */ 542 @Override 543 public Percentile copy() { 544 return new Percentile(this); 545 } 546 547 /** 548 * Get the work array to operate. Makes use of prior {@code storedData} if 549 * it exists or else do a check on NaNs and copy a subset of the array 550 * defined by begin and length parameters. The set {@link #nanStrategy} will 551 * be used to either retain/remove/replace any NaNs present before returning 552 * the resultant array. 553 * 554 * @param values the array of numbers 555 * @param begin index to start reading the array 556 * @param length the length of array to be read from the begin index 557 * @return work array sliced from values in the range [begin,begin+length) 558 * @throws MathIllegalArgumentException if values or indices are invalid 559 */ 560 private double[] getWorkArray(final double[] values, final int begin, final int length) { 561 final double[] work; 562 if (values == getDataRef()) { 563 work = getDataRef(); 564 } else { 565 switch (nanStrategy) { 566 case MAXIMAL: // Replace NaNs with +INFs 567 work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY); 568 break; 569 case MINIMAL: // Replace NaNs with -INFs 570 work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY); 571 break; 572 case REMOVED: // Drop NaNs from data 573 work = removeAndSlice(values, begin, length, Double.NaN); 574 break; 575 case FAILED: // NaN is not acceptable 576 work = copyOf(values, begin, length); 577 MathArrays.checkNotNaN(work); 578 break; 579 default: // FIXED 580 work = copyOf(values,begin,length); 581 break; 582 } 583 } 584 return work; 585 } 586 /** 587 * Get the work arrays of weights to operate. 588 * 589 * @param values the array of numbers 590 * @param sampleWeights the array of weights 591 * @param begin index to start reading the array 592 * @param length the length of array to be read from the begin index 593 * @return work array sliced from values in the range [begin,begin+length) 594 */ 595 protected double[] getWorkArray(final double[] values, final double[] sampleWeights, 596 final int begin, final int length) { 597 final double[] work; 598 if (values == getDataRef()) { 599 work = this.weights; 600 } else { 601 switch (nanStrategy) { 602 case REMOVED: // Drop weight if the data is NaN 603 work = removeAndSliceByRef(values, sampleWeights, begin, length, Double.NaN); 604 break; 605 default: // FIXED 606 work = copyOf(sampleWeights, begin, length); 607 break; 608 } 609 } 610 return work; 611 } 612 /** 613 * Make a copy of the array for the slice defined by array part from. 614 * [begin, begin+length) 615 * @param values the input array 616 * @param begin start index of the array to include 617 * @param length number of elements to include from begin 618 * @return copy of a slice of the original array 619 */ 620 private static double[] copyOf(final double[] values, final int begin, final int length) { 621 MathArrays.verifyValues(values, begin, length); 622 return Arrays.copyOfRange(values, begin, begin + length); 623 } 624 625 /** 626 * Replace every occurrence of a given value with a replacement value in a 627 * copied slice of array defined by array part from [begin, begin+length). 628 * 629 * @param values the input array 630 * @param begin start index of the array to include 631 * @param length number of elements to include from begin 632 * @param original the value to be replaced with 633 * @param replacement the value to be used for replacement 634 * @return the copy of sliced array with replaced values 635 */ 636 private static double[] replaceAndSlice(final double[] values, 637 final int begin, final int length, 638 final double original, 639 final double replacement) { 640 final double[] temp = copyOf(values, begin, length); 641 for(int i = 0; i < length; i++) { 642 temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ? 643 replacement : 644 temp[i]; 645 } 646 647 return temp; 648 } 649 /** 650 * Remove the occurrence of a given value in a copied slice of array 651 * defined by the array part from [begin, begin+length). 652 * @param values the input array 653 * @param begin start index of the array to include 654 * @param length number of elements to include from begin 655 * @param removedValue the value to be removed from the sliced array 656 * @return the copy of the sliced array after removing the removedValue 657 */ 658 private static double[] removeAndSlice(final double[] values, 659 final int begin, final int length, 660 final double removedValue) { 661 MathArrays.verifyValues(values, begin, length); 662 final double[] temp; 663 // Indicates where the removedValue is located 664 final BitSet bits = new BitSet(length); 665 for (int i = begin; i < begin+length; i++) { 666 if (Precision.equalsIncludingNaN(removedValue, values[i])) { 667 bits.set(i - begin); 668 } 669 } 670 // Check if empty then create a new copy 671 if (bits.isEmpty()) { 672 // Nothing removed, just copy 673 temp = copyOf(values, begin, length); 674 } else if(bits.cardinality() == length) { 675 // All removed, just empty 676 temp = new double[0]; 677 } else { 678 // Some removable, so new 679 temp = new double[length - bits.cardinality()]; 680 // Index from source array (i.e values) 681 int start = begin; 682 // Index in destination array(i.e temp) 683 int dest = 0; 684 // Index of bit set of next one 685 int nextOne = -1; 686 // Start index pointer of bitset 687 int bitSetPtr = 0; 688 while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) { 689 final int lengthToCopy = nextOne - bitSetPtr; 690 System.arraycopy(values, start, temp, dest, lengthToCopy); 691 dest += lengthToCopy; 692 start = begin + (bitSetPtr = bits.nextClearBit(nextOne)); 693 } 694 // Copy any residue past start index till begin+length 695 if (start < begin + length) { 696 System.arraycopy(values,start,temp,dest,begin + length - start); 697 } 698 } 699 return temp; 700 } 701 /** 702 * Remove weights element if the corresponding data is equal to the given value. 703 * in [begin, begin+length) 704 * 705 * @param values the input array 706 * @param sampleWeights weights of the input array 707 * @param begin start index of the array to include 708 * @param length number of elements to include from begin 709 * @param removedValue the value to be removed from the sliced array 710 * @return the copy of the sliced array after removing weights 711 */ 712 private static double[] removeAndSliceByRef(final double[] values, 713 final double[] sampleWeights, 714 final int begin, final int length, 715 final double removedValue) { 716 MathArrays.verifyValues(values, begin, length); 717 final double[] temp; 718 //BitSet(length) to indicate where the removedValue is located 719 final BitSet bits = new BitSet(length); 720 for (int i = begin; i < begin+length; i++) { 721 if (Precision.equalsIncludingNaN(removedValue, values[i])) { 722 bits.set(i - begin); 723 } 724 } 725 //Check if empty then create a new copy 726 if (bits.isEmpty()) { 727 temp = copyOf(sampleWeights, begin, length); // Nothing removed, just copy 728 } else if(bits.cardinality() == length) { 729 temp = new double[0]; // All removed, just empty 730 }else { // Some removable, so new 731 temp = new double[length - bits.cardinality()]; 732 int start = begin; //start index from source array (i.e sampleWeights) 733 int dest = 0; //dest index in destination array(i.e temp) 734 int nextOne = -1; //nextOne is the index of bit set of next one 735 int bitSetPtr = 0; //bitSetPtr is start index pointer of bitset 736 while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) { 737 final int lengthToCopy = nextOne - bitSetPtr; 738 System.arraycopy(sampleWeights, start, temp, dest, lengthToCopy); 739 dest += lengthToCopy; 740 start = begin + (bitSetPtr = bits.nextClearBit(nextOne)); 741 } 742 //Copy any residue past start index till begin+length 743 if (start < begin + length) { 744 System.arraycopy(sampleWeights,start,temp,dest,begin + length - start); 745 } 746 } 747 return temp; 748 } 749 /** 750 * Get pivots which is either cached or a newly created one. 751 * 752 * @param values array containing the input numbers 753 * @return cached pivots or a newly created one 754 */ 755 private int[] getPivots(final double[] values) { 756 final int[] pivotsHeap; 757 if (values == getDataRef()) { 758 pivotsHeap = cachedPivots; 759 } else { 760 pivotsHeap = new int[PIVOTS_HEAP_LENGTH]; 761 Arrays.fill(pivotsHeap, -1); 762 } 763 return pivotsHeap; 764 } 765 766 /** 767 * Get the estimation {@link EstimationType type} used for computation. 768 * 769 * @return the {@code estimationType} set 770 */ 771 public EstimationType getEstimationType() { 772 return estimationType; 773 } 774 775 /** 776 * Build a new instance similar to the current one except for the 777 * {@link EstimationType estimation type}. 778 * <p> 779 * This method is intended to be used as part of a fluent-type builder 780 * pattern. Building finely tune instances should be done as follows: 781 * </p> 782 * <pre> 783 * Percentile customized = new Percentile(quantile). 784 * withEstimationType(estimationType). 785 * withNaNStrategy(nanStrategy). 786 * withKthSelector(kthSelector); 787 * </pre> 788 * <p> 789 * If any of the {@code withXxx} method is omitted, the default value for 790 * the corresponding customization parameter will be used. 791 * </p> 792 * @param newEstimationType estimation type for the new instance. 793 * Cannot be {@code null}. 794 * @return a new instance, with changed estimation type 795 */ 796 public Percentile withEstimationType(final EstimationType newEstimationType) { 797 return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector); 798 } 799 800 /** 801 * Get the {@link NaNStrategy NaN Handling} strategy used for computation. 802 * @return {@code NaN Handling} strategy set during construction 803 */ 804 public NaNStrategy getNaNStrategy() { 805 return nanStrategy; 806 } 807 808 /** 809 * Build a new instance similar to the current one except for the 810 * {@link NaNStrategy NaN handling} strategy. 811 * <p> 812 * This method is intended to be used as part of a fluent-type builder 813 * pattern. Building finely tune instances should be done as follows: 814 * </p> 815 * <pre> 816 * Percentile customized = new Percentile(quantile). 817 * withEstimationType(estimationType). 818 * withNaNStrategy(nanStrategy). 819 * withKthSelector(kthSelector); 820 * </pre> 821 * <p> 822 * If any of the {@code withXxx} method is omitted, the default value for 823 * the corresponding customization parameter will be used. 824 * </p> 825 * @param newNaNStrategy NaN strategy for the new instance. 826 * Cannot be {@code null}. 827 * @return a new instance, with changed NaN handling strategy 828 */ 829 public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) { 830 return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector); 831 } 832 833 /** 834 * Get the {@link KthSelector kthSelector} used for computation. 835 * @return the {@code kthSelector} set 836 */ 837 public KthSelector getKthSelector() { 838 return kthSelector; 839 } 840 841 /** 842 * Get the {@link PivotingStrategy} used in KthSelector for computation. 843 * @return the pivoting strategy set 844 */ 845 public PivotingStrategy getPivotingStrategy() { 846 return kthSelector.getPivotingStrategy(); 847 } 848 849 /** 850 * Build a new instance similar to the current one except for the 851 * {@link KthSelector kthSelector} instance specifically set. 852 * <p> 853 * This method is intended to be used as part of a fluent-type builder 854 * pattern. Building finely tune instances should be done as follows: 855 * </p> 856 * <pre> 857 * Percentile customized = new Percentile(quantile). 858 * withEstimationType(estimationType). 859 * withNaNStrategy(nanStrategy). 860 * withKthSelector(newKthSelector); 861 * </pre> 862 * <p> 863 * If any of the {@code withXxx} method is omitted, the default value for 864 * the corresponding customization parameter will be used. 865 * </p> 866 * @param newKthSelector KthSelector for the new instance. 867 * Cannot be {@code null}. 868 * @return a new instance, with changed KthSelector 869 */ 870 public Percentile withKthSelector(final KthSelector newKthSelector) { 871 return new Percentile(quantile, estimationType, nanStrategy, newKthSelector); 872 } 873 874 /** 875 * An enum for various estimation strategies of a percentile referred in 876 * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a> 877 * with the names of enum matching those of types mentioned in 878 * wikipedia. 879 * <p> 880 * Each enum corresponding to the specific type of estimation in wikipedia 881 * implements the respective formulae that specializes in the below aspects 882 * <ul> 883 * <li>An <b>index method</b> to calculate approximate index of the 884 * estimate</li> 885 * <li>An <b>estimate method</b> to estimate a value found at the earlier 886 * computed index</li> 887 * <li>A <b> minLimit</b> on the quantile for which first element of sorted 888 * input is returned as an estimate </li> 889 * <li>A <b> maxLimit</b> on the quantile for which last element of sorted 890 * input is returned as an estimate </li> 891 * </ul> 892 * <p> 893 * Users can now create {@link Percentile} by explicitly passing this enum; 894 * such as by invoking {@link Percentile#withEstimationType(EstimationType)} 895 * <p> 896 * References: 897 * <ol> 898 * <li> 899 * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a> 900 * </li> 901 * <li> 902 * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf"> 903 * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical 904 * packages, American Statistician 50, 361–365</a> </li> 905 * <li> 906 * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html"> 907 * R-Manual </a></li> 908 * </ol> 909 */ 910 public enum EstimationType { 911 /** 912 * This is the default type used in the {@link Percentile}.This method. 913 * has the following formulae for index and estimates<br> 914 * \( \begin{align} 915 * &index = (N+1)p\ \\ 916 * &estimate = x_{\lceil h\,-\,1/2 \rceil} \\ 917 * &minLimit = 0 \\ 918 * &maxLimit = 1 \\ 919 * \end{align}\) 920 */ 921 LEGACY("Legacy Apache Commons Math") { 922 /** 923 * {@inheritDoc}.This method in particular makes use of existing 924 * Apache Commons Math style of picking up the index. 925 */ 926 @Override 927 protected double index(final double p, final int length) { 928 final double minLimit = 0d; 929 final double maxLimit = 1d; 930 return Double.compare(p, minLimit) == 0 ? 0 : 931 Double.compare(p, maxLimit) == 0 ? 932 length : p * (length + 1); 933 } 934 @Override 935 public double evaluate(final double[] work, final double[] sampleWeights, 936 final double p) { 937 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 938 } 939 }, 940 /** 941 * The method R_1 has the following formulae for index and estimates.<br> 942 * \( \begin{align} 943 * &index= Np + 1/2\, \\ 944 * &estimate= x_{\lceil h\,-\,1/2 \rceil} \\ 945 * &minLimit = 0 \\ 946 * \end{align}\) 947 */ 948 R_1("R-1") { 949 950 @Override 951 protected double index(final double p, final int length) { 952 final double minLimit = 0d; 953 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; 954 } 955 956 /** 957 * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5) 958 */ 959 @Override 960 protected double estimate(final double[] values, 961 final int[] pivotsHeap, final double pos, 962 final int length, final KthSelector selector) { 963 return super.estimate(values, pivotsHeap, JdkMath.ceil(pos - 0.5), length, selector); 964 } 965 @Override 966 public double evaluate(final double[] work, final double[] sampleWeights, 967 final double p) { 968 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 969 } 970 }, 971 /** 972 * The method R_2 has the following formulae for index and estimates.<br> 973 * \( \begin{align} 974 * &index= Np + 1/2\, \\ 975 * &estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} + 976 * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\ 977 * &minLimit = 0 \\ 978 * &maxLimit = 1 \\ 979 * \end{align}\) 980 */ 981 R_2("R-2") { 982 983 @Override 984 protected double index(final double p, final int length) { 985 final double minLimit = 0d; 986 final double maxLimit = 1d; 987 return Double.compare(p, maxLimit) == 0 ? length : 988 Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; 989 } 990 991 /** 992 * {@inheritDoc}This method in particular for R_2 averages the 993 * values at ceil(p+0.5) and floor(p-0.5). 994 */ 995 @Override 996 protected double estimate(final double[] values, 997 final int[] pivotsHeap, final double pos, 998 final int length, final KthSelector selector) { 999 final double low = 1000 super.estimate(values, pivotsHeap, JdkMath.ceil(pos - 0.5), length, selector); 1001 final double high = 1002 super.estimate(values, pivotsHeap,JdkMath.floor(pos + 0.5), length, selector); 1003 return (low + high) / 2; 1004 } 1005 @Override 1006 public double evaluate(final double[] work, final double[] sampleWeights, 1007 final double p) { 1008 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1009 } 1010 }, 1011 /** 1012 * The method R_3 has the following formulae for index and estimates.<br> 1013 * \( \begin{align} 1014 * &index= Np \\ 1015 * &estimate= x_{\lfloor h \rceil}\, \\ 1016 * &minLimit = 0.5/N \\ 1017 * \end{align}\) 1018 */ 1019 R_3("R-3") { 1020 @Override 1021 protected double index(final double p, final int length) { 1022 final double minLimit = 1d/2 / length; 1023 return Double.compare(p, minLimit) <= 0 ? 1024 0 : JdkMath.rint(length * p); 1025 } 1026 @Override 1027 public double evaluate(final double[] work, final double[] sampleWeights, 1028 final double p) { 1029 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1030 } 1031 }, 1032 /** 1033 * The method R_4 has the following formulae for index and estimates.<br> 1034 * \( \begin{align} 1035 * &index= Np\, \\ 1036 * &estimate= x_{\lfloor h \rfloor} + (h - 1037 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 1038 * \rfloor}) \\ 1039 * &minLimit = 1/N \\ 1040 * &maxLimit = 1 \\ 1041 * \end{align}\) 1042 */ 1043 R_4("R-4") { 1044 @Override 1045 protected double index(final double p, final int length) { 1046 final double minLimit = 1d / length; 1047 final double maxLimit = 1d; 1048 return Double.compare(p, minLimit) < 0 ? 0 : 1049 Double.compare(p, maxLimit) == 0 ? length : length * p; 1050 } 1051 @Override 1052 public double evaluate(final double[] work, final double[] sampleWeights, 1053 final double p) { 1054 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1055 } 1056 }, 1057 /** 1058 * The method R_5 has the following formulae for index and estimates.<br> 1059 * \( \begin{align} 1060 * &index= Np + 1/2\\ 1061 * &estimate= x_{\lfloor h \rfloor} + (h - 1062 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 1063 * \rfloor}) \\ 1064 * &minLimit = 0.5/N \\ 1065 * &maxLimit = (N-0.5)/N 1066 * \end{align}\) 1067 */ 1068 R_5("R-5") { 1069 1070 @Override 1071 protected double index(final double p, final int length) { 1072 final double minLimit = 1d/2 / length; 1073 final double maxLimit = (length - 0.5) / length; 1074 return Double.compare(p, minLimit) < 0 ? 0 : 1075 Double.compare(p, maxLimit) >= 0 ? 1076 length : length * p + 0.5; 1077 } 1078 @Override 1079 public double evaluate(final double[] work, final double[] sampleWeights, 1080 final double p) { 1081 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1082 } 1083 }, 1084 /** 1085 * The method R_6 has the following formulae for index and estimates.<br> 1086 * \( \begin{align} 1087 * &index= (N + 1)p \\ 1088 * &estimate= x_{\lfloor h \rfloor} + (h - 1089 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 1090 * \rfloor}) \\ 1091 * &minLimit = 1/(N+1) \\ 1092 * &maxLimit = N/(N+1) \\ 1093 * \end{align}\) 1094 * <p> 1095 * <b>Note:</b> This method computes the index in a manner very close to 1096 * the default Commons Math Percentile existing implementation. However 1097 * the difference to be noted is in picking up the limits with which 1098 * first element (p<1(N+1)) and last elements (p>N/(N+1))are done. 1099 * While in default case; these are done with p=0 and p=1 respectively. 1100 */ 1101 R_6("R-6") { 1102 1103 @Override 1104 protected double index(final double p, final int length) { 1105 final double minLimit = 1d / (length + 1); 1106 final double maxLimit = 1d * length / (length + 1); 1107 return Double.compare(p, minLimit) < 0 ? 0 : 1108 Double.compare(p, maxLimit) >= 0 ? 1109 length : (length + 1) * p; 1110 } 1111 @Override 1112 public double evaluate(final double[] work, final double[] sampleWeights, 1113 final double p) { 1114 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1115 } 1116 }, 1117 1118 /** 1119 * The method R_7 implements Microsoft Excel style computation has the 1120 * following formulae for index and estimates.<br> 1121 * \( \begin{align} 1122 * &index = (N-1)p + 1 \\ 1123 * &estimate = x_{\lfloor h \rfloor} + (h - 1124 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 1125 * \rfloor}) \\ 1126 * &minLimit = 0 \\ 1127 * &maxLimit = 1 \\ 1128 * \end{align}\) 1129 * The formula to evaluate weighted percentiles is as following.<br> 1130 * \( \begin{align} 1131 * &S_k = (k-1)w_k + (n-1)\sum_{i=1}^{k-1}w_i 1132 * &Then find k s.t. \frac{S_k}{S_n}\leq p \leq \frac{S_{k+1}}{S_n} 1133 * \end{align}\) 1134 */ 1135 R_7("R-7") { 1136 @Override 1137 protected double index(final double p, final int length) { 1138 final double minLimit = 0d; 1139 final double maxLimit = 1d; 1140 return Double.compare(p, minLimit) == 0 ? 0 : 1141 Double.compare(p, maxLimit) == 0 ? 1142 length : 1 + (length - 1) * p; 1143 } 1144 1145 @Override 1146 public double evaluate(final double[] work, final double[] sampleWeights, 1147 final double p) { 1148 SortInPlace.ASCENDING.apply(work, sampleWeights); 1149 double[] sk = new double[work.length]; 1150 for(int k = 0; k < work.length; k++) { 1151 sk[k] = 0; 1152 for (int j = 0; j < k; j++) { 1153 sk[k] += sampleWeights[j]; 1154 } 1155 sk[k] = k * sampleWeights[k] + (work.length - 1) * sk[k]; 1156 } 1157 1158 double qsn = (p / 100) * sk[sk.length-1]; 1159 int k = searchSk(qsn, sk, 0, work.length - 1); 1160 1161 double ret; 1162 if (qsn == sk[k] && k == work.length - 1) { 1163 ret = work[k] - (work[k] - work[k-1]) * (1 - (qsn - sk[k]) / (sk[k] - sk[k-1])); 1164 } else { 1165 ret = work[k] + (work[k+1] - work[k]) * (qsn - sk[k]) / (sk[k+1] - sk[k]); 1166 } 1167 return ret; 1168 } 1169 }, 1170 1171 /** 1172 * The method R_8 has the following formulae for index and estimates.<br> 1173 * \( \begin{align} 1174 * &index = (N + 1/3)p + 1/3 \\ 1175 * &estimate = x_{\lfloor h \rfloor} + (h - 1176 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 1177 * \rfloor}) \\ 1178 * &minLimit = (2/3)/(N+1/3) \\ 1179 * &maxLimit = (N-1/3)/(N+1/3) \\ 1180 * \end{align}\) 1181 * <p> 1182 * As per Ref [2,3] this approach is most recommended as it provides 1183 * an approximate median-unbiased estimate regardless of distribution. 1184 */ 1185 R_8("R-8") { 1186 @Override 1187 protected double index(final double p, final int length) { 1188 final double minLimit = 2 * (1d / 3) / (length + 1d / 3); 1189 final double maxLimit = 1190 (length - 1d / 3) / (length + 1d / 3); 1191 return Double.compare(p, minLimit) < 0 ? 0 : 1192 Double.compare(p, maxLimit) >= 0 ? length : 1193 (length + 1d / 3) * p + 1d / 3; 1194 } 1195 @Override 1196 public double evaluate(final double[] work, final double[] sampleWeights, 1197 final double p) { 1198 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1199 } 1200 }, 1201 1202 /** 1203 * The method R_9 has the following formulae for index and estimates.<br> 1204 * \( \begin{align} 1205 * &index = (N + 1/4)p + 3/8\\ 1206 * &estimate = x_{\lfloor h \rfloor} + (h - 1207 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 1208 * \rfloor}) \\ 1209 * &minLimit = (5/8)/(N+1/4) \\ 1210 * &maxLimit = (N-3/8)/(N+1/4) \\ 1211 * \end{align}\) 1212 */ 1213 R_9("R-9") { 1214 @Override 1215 protected double index(final double p, final int length) { 1216 final double minLimit = 5d/8 / (length + 0.25); 1217 final double maxLimit = (length - 3d/8) / (length + 0.25); 1218 return Double.compare(p, minLimit) < 0 ? 0 : 1219 Double.compare(p, maxLimit) >= 0 ? length : 1220 (length + 0.25) * p + 3d/8; 1221 } 1222 @Override 1223 public double evaluate(final double[] work, final double[] sampleWeights, 1224 final double p) { 1225 throw new MathIllegalArgumentException(LocalizedFormats.UNSUPPORTED_OPERATION); 1226 } 1227 }, 1228 ; 1229 1230 /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */ 1231 private final String name; 1232 1233 /** 1234 * Constructor. 1235 * 1236 * @param type name of estimation type as per wikipedia 1237 */ 1238 EstimationType(final String type) { 1239 this.name = type; 1240 } 1241 1242 /** 1243 * Finds the index of array that can be used as starting index to 1244 * {@link #estimate(double[], int[], double, int, KthSelector) estimate} 1245 * percentile. The calculation of index calculation is specific to each 1246 * {@link EstimationType}. 1247 * 1248 * @param p the p<sup>th</sup> quantile 1249 * @param length the total number of array elements in the work array 1250 * @return a computed real valued index as explained in the wikipedia 1251 */ 1252 protected abstract double index(double p, int length); 1253 1254 /** 1255 * Estimation based on K<sup>th</sup> selection. This may be overridden 1256 * in specific enums to compute slightly different estimations. 1257 * 1258 * @param work array of numbers to be used for finding the percentile 1259 * @param pos indicated positional index prior computed from calling 1260 * {@link #index(double, int)} 1261 * @param pivotsHeap an earlier populated cache if exists; will be used 1262 * @param length size of array considered 1263 * @param selector a {@link KthSelector} used for pivoting during search 1264 * @return estimated percentile 1265 */ 1266 protected double estimate(final double[] work, final int[] pivotsHeap, 1267 final double pos, final int length, 1268 final KthSelector selector) { 1269 1270 final double fpos = JdkMath.floor(pos); 1271 final int intPos = (int) fpos; 1272 final double dif = pos - fpos; 1273 1274 if (pos < 1) { 1275 return selector.select(work, pivotsHeap, 0); 1276 } 1277 if (pos >= length) { 1278 return selector.select(work, pivotsHeap, length - 1); 1279 } 1280 1281 final double lower = selector.select(work, pivotsHeap, intPos - 1); 1282 final double upper = selector.select(work, pivotsHeap, intPos); 1283 return lower + dif * (upper - lower); 1284 } 1285 1286 /** 1287 * Evaluate method to compute the percentile for a given bounded array 1288 * using earlier computed pivots heap.<br> 1289 * This basically calls the {@link #index(double, int) index} and then 1290 * {@link #estimate(double[], int[], double, int, KthSelector) estimate} 1291 * functions to return the estimated percentile value. 1292 * 1293 * @param work array of numbers to be used for finding the percentile. 1294 * Cannot be {@code null}. 1295 * @param pivotsHeap a prior cached heap which can speed up estimation 1296 * @param p the p<sup>th</sup> quantile to be computed 1297 * @param selector a {@link KthSelector} used for pivoting during search 1298 * @return estimated percentile 1299 * @throws OutOfRangeException if p is out of range 1300 */ 1301 protected double evaluate(final double[] work, final int[] pivotsHeap, final double p, 1302 final KthSelector selector) { 1303 NullArgumentException.check(work); 1304 if (p > 100 || p <= 0) { 1305 throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, 1306 p, 0, 100); 1307 } 1308 return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector); 1309 } 1310 1311 /** 1312 * Evaluate method to compute the percentile for a given bounded array. 1313 * This basically calls the {@link #index(double, int) index} and then 1314 * {@link #estimate(double[], int[], double, int, KthSelector) estimate} 1315 * functions to return the estimated percentile value. Please 1316 * note that this method does not make use of cached pivots. 1317 * 1318 * @param work array of numbers to be used for finding the percentile. 1319 * Cannot be {@code null}. 1320 * @param p the p<sup>th</sup> quantile to be computed 1321 * @return estimated percentile 1322 * @param selector a {@link KthSelector} used for pivoting during search 1323 * @throws OutOfRangeException if length or p is out of range 1324 */ 1325 public double evaluate(final double[] work, final double p, final KthSelector selector) { 1326 return this.evaluate(work, null, p, selector); 1327 } 1328 /** 1329 * Evaluate weighted percentile by estimation rule specified in {@link EstimationType}. 1330 * @param work array of numbers to be used for finding the percentile 1331 * @param sampleWeights the corresponding weights of data in work 1332 * @param p the p<sup>th</sup> quantile to be computed 1333 * @return estimated weighted percentile 1334 * @throws MathIllegalArgumentException if weighted percentile is not supported by the current estimationType 1335 */ 1336 public abstract double evaluate(double[] work, double[] sampleWeights, 1337 double p); 1338 /** 1339 * Search the interval q*sn locates in. 1340 * @param qsn q*sn, where n refers to the data size 1341 * @param sk the cumulative weights array 1342 * @param lo start position to search qsn 1343 * @param hi end position to search qsn 1344 * @return the index of lower bound qsn locates in 1345 */ 1346 private static int searchSk(double qsn, double[] sk, int lo, int hi) { 1347 if (sk.length == 1) { 1348 return 0; 1349 } 1350 if (hi - lo == 1) { 1351 if (qsn == sk[hi]) { 1352 return hi; 1353 } 1354 return lo; 1355 } else { 1356 int mid = (lo + hi) >>> 1; 1357 if (qsn == sk[mid]) { 1358 return mid; 1359 } else if (qsn > sk[mid]) { 1360 return searchSk(qsn, sk, mid, hi); 1361 } else { 1362 return searchSk(qsn, sk, lo, mid); 1363 } 1364 } 1365 } 1366 /** 1367 * Gets the name of the enum. 1368 * 1369 * @return the name 1370 */ 1371 String getName() { 1372 return name; 1373 } 1374 } 1375 }