001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.statistics.descriptive; 018 019import java.util.Arrays; 020import java.util.Objects; 021import java.util.function.IntToDoubleFunction; 022import org.apache.commons.numbers.arrays.Selection; 023 024/** 025 * Provides quantile computation. 026 * 027 * <p>For values of length {@code n}: 028 * <ul> 029 * <li>The result is {@code NaN} if {@code n = 0}. 030 * <li>The result is {@code values[0]} if {@code n = 1}. 031 * <li>Otherwise the result is computed using the {@link EstimationMethod}. 032 * </ul> 033 * 034 * <p>Computation of multiple quantiles and will handle duplicate and unordered 035 * probabilities. Passing ordered probabilities is recommended if the order is already 036 * known as this can improve efficiency; for example using uniform spacing through the 037 * array data, or to identify extreme values from the data such as {@code [0.001, 0.999]}. 038 * 039 * <p>This implementation respects the ordering imposed by 040 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs 041 * in the selected positions in the fully sorted values then the result is {@code NaN}. 042 * 043 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values. 044 * 045 * <p>Instances of this class are immutable and thread-safe. 046 * 047 * @see #with(NaNPolicy) 048 * @see <a href="http://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a> 049 * @since 1.1 050 */ 051public final class Quantile { 052 /** Message when the probability is not in the range {@code [0, 1]}. */ 053 private static final String INVALID_PROBABILITY = "Invalid probability: "; 054 /** Message when no probabilities are provided for the varargs method. */ 055 private static final String NO_PROBABILITIES_SPECIFIED = "No probabilities specified"; 056 /** Message when the size is not valid. */ 057 private static final String INVALID_SIZE = "Invalid size: "; 058 /** Message when the number of probabilities in a range is not valid. */ 059 private static final String INVALID_NUMBER_OF_PROBABILITIES = "Invalid number of probabilities: "; 060 061 /** Default instance. Method 8 is recommended by Hyndman and Fan. */ 062 private static final Quantile DEFAULT = new Quantile(false, NaNPolicy.INCLUDE, EstimationMethod.HF8); 063 064 /** Flag to indicate if the data should be copied. */ 065 private final boolean copy; 066 /** NaN policy for floating point data. */ 067 private final NaNPolicy nanPolicy; 068 /** Transformer for NaN data. */ 069 private final NaNTransformer nanTransformer; 070 /** Estimation type used to determine the value from the quantile. */ 071 private final EstimationMethod estimationType; 072 073 /** 074 * @param copy Flag to indicate if the data should be copied. 075 * @param nanPolicy NaN policy. 076 * @param estimationType Estimation type used to determine the value from the quantile. 077 */ 078 private Quantile(boolean copy, NaNPolicy nanPolicy, EstimationMethod estimationType) { 079 this.copy = copy; 080 this.nanPolicy = nanPolicy; 081 this.estimationType = estimationType; 082 nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy); 083 } 084 085 /** 086 * Return a new instance with the default options. 087 * 088 * <ul> 089 * <li>{@linkplain #withCopy(boolean) Copy = false} 090 * <li>{@linkplain #with(NaNPolicy) NaN policy = include} 091 * <li>{@linkplain #with(EstimationMethod) Estimation method = HF8} 092 * </ul> 093 * 094 * <p>Note: The default options configure for processing in-place and including 095 * {@code NaN} values in the data. This is the most efficient mode and has the 096 * smallest memory consumption. 097 * 098 * @return the quantile implementation 099 * @see #withCopy(boolean) 100 * @see #with(NaNPolicy) 101 * @see #with(EstimationMethod) 102 */ 103 public static Quantile withDefaults() { 104 return DEFAULT; 105 } 106 107 /** 108 * Return an instance with the configured copy behaviour. If {@code false} then 109 * the input array will be modified by the call to evaluate the quantiles; otherwise 110 * the computation uses a copy of the data. 111 * 112 * @param v Value. 113 * @return an instance 114 */ 115 public Quantile withCopy(boolean v) { 116 return new Quantile(v, nanPolicy, estimationType); 117 } 118 119 /** 120 * Return an instance with the configured {@link NaNPolicy}. 121 * 122 * <p>Note: This implementation respects the ordering imposed by 123 * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is 124 * considered greater than all other values, and all {@code NaN} values are equal. The 125 * {@link NaNPolicy} changes the computation of the statistic in the presence of 126 * {@code NaN} values. 127 * 128 * <ul> 129 * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data; 130 * the size of the data <em>includes</em> the {@code NaN} values and the quantile will be 131 * {@code NaN} if any value used for quantile interpolation is {@code NaN}. 132 * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data; 133 * the size of the data <em>excludes</em> the {@code NaN} values and the quantile will 134 * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero 135 * and the result is {@code NaN}. 136 * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN} 137 * values. 138 * </ul> 139 * 140 * <p>Note that the result is identical for all policies if no {@code NaN} values are present. 141 * 142 * @param v Value. 143 * @return an instance 144 */ 145 public Quantile with(NaNPolicy v) { 146 return new Quantile(copy, Objects.requireNonNull(v), estimationType); 147 } 148 149 /** 150 * Return an instance with the configured {@link EstimationMethod}. 151 * 152 * @param v Value. 153 * @return an instance 154 */ 155 public Quantile with(EstimationMethod v) { 156 return new Quantile(copy, nanPolicy, Objects.requireNonNull(v)); 157 } 158 159 /** 160 * Generate {@code n} evenly spaced probabilities in the range {@code [0, 1]}. 161 * 162 * <pre> 163 * 1/(n + 1), 2/(n + 1), ..., n/(n + 1) 164 * </pre> 165 * 166 * @param n Number of probabilities. 167 * @return the probabilities 168 * @throws IllegalArgumentException if {@code n < 1} 169 */ 170 public static double[] probabilities(int n) { 171 checkNumberOfProbabilities(n); 172 final double c1 = n + 1.0; 173 final double[] p = new double[n]; 174 for (int i = 0; i < n; i++) { 175 p[i] = (i + 1.0) / c1; 176 } 177 return p; 178 } 179 180 /** 181 * Generate {@code n} evenly spaced probabilities in the range {@code [p1, p2]}. 182 * 183 * <pre> 184 * w = p2 - p1 185 * p1 + w/(n + 1), p1 + 2w/(n + 1), ..., p1 + nw/(n + 1) 186 * </pre> 187 * 188 * @param n Number of probabilities. 189 * @param p1 Lower probability. 190 * @param p2 Upper probability. 191 * @return the probabilities 192 * @throws IllegalArgumentException if {@code n < 1}; if the probabilities are not in the 193 * range {@code [0, 1]}; or {@code p2 <= p1}. 194 */ 195 public static double[] probabilities(int n, double p1, double p2) { 196 checkProbability(p1); 197 checkProbability(p2); 198 if (p2 <= p1) { 199 throw new IllegalArgumentException("Invalid range: [" + p1 + ", " + p2 + "]"); 200 } 201 final double[] p = probabilities(n); 202 for (int i = 0; i < n; i++) { 203 p[i] = (1 - p[i]) * p1 + p[i] * p2; 204 } 205 return p; 206 } 207 208 /** 209 * Evaluate the {@code p}-th quantile of the values. 210 * 211 * <p>Note: This method may partially sort the input values if not configured to 212 * {@link #withCopy(boolean) copy} the input data. 213 * 214 * <p><strong>Performance</strong> 215 * 216 * <p>It is not recommended to use this method for repeat calls for different quantiles 217 * within the same values. The {@link #evaluate(double[], double...)} method should be used 218 * which provides better performance. 219 * 220 * @param values Values. 221 * @param p Probability for the quantile to compute. 222 * @return the quantile 223 * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]} 224 * @see #evaluate(double[], double...) 225 */ 226 public double evaluate(double[] values, double p) { 227 checkProbability(p); 228 // Floating-point data handling 229 final int[] bounds = new int[1]; 230 final double[] x = nanTransformer.apply(values, bounds); 231 final int n = bounds[0]; 232 // Special cases 233 if (n <= 1) { 234 return n == 0 ? Double.NaN : x[0]; 235 } 236 final double pos = estimationType.index(p, n); 237 final int i = (int) pos; 238 239 // Partition and compute 240 if (pos > i) { 241 Selection.select(x, 0, n, new int[] {i, i + 1}); 242 return Interpolation.interpolate(x[i], x[i + 1], pos - i); 243 } 244 Selection.select(x, 0, n, i); 245 return x[i]; 246 } 247 248 /** 249 * Evaluate the {@code p}-th quantiles of the values. 250 * 251 * <p>Note: This method may partially sort the input values if not configured to 252 * {@link #withCopy(boolean) copy} the input data. 253 * 254 * @param values Values. 255 * @param p Probabilities for the quantiles to compute. 256 * @return the quantiles 257 * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]}; 258 * or no probabilities are specified. 259 */ 260 public double[] evaluate(double[] values, double... p) { 261 checkProbabilities(p); 262 // Floating-point data handling 263 final int[] bounds = new int[1]; 264 final double[] x = nanTransformer.apply(values, bounds); 265 final int n = bounds[0]; 266 // Special cases 267 final double[] q = new double[p.length]; 268 if (n <= 1) { 269 Arrays.fill(q, n == 0 ? Double.NaN : x[0]); 270 return q; 271 } 272 273 // Collect interpolation positions. We use the output q as storage. 274 final int[] indices = computeIndices(n, p, q); 275 276 // Partition 277 Selection.select(x, 0, n, indices); 278 279 // Compute 280 for (int k = 0; k < p.length; k++) { 281 final int i = (int) q[k]; 282 if (q[k] > i) { 283 q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - i); 284 } else { 285 q[k] = x[i]; 286 } 287 } 288 return q; 289 } 290 291 /** 292 * Evaluate the {@code p}-th quantile of the values. 293 * 294 * <p>Note: This method may partially sort the input values if not configured to 295 * {@link #withCopy(boolean) copy} the input data. 296 * 297 * <p><strong>Performance</strong> 298 * 299 * <p>It is not recommended to use this method for repeat calls for different quantiles 300 * within the same values. The {@link #evaluate(int[], double...)} method should be used 301 * which provides better performance. 302 * 303 * @param values Values. 304 * @param p Probability for the quantile to compute. 305 * @return the quantile 306 * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]} 307 * @see #evaluate(int[], double...) 308 */ 309 public double evaluate(int[] values, double p) { 310 checkProbability(p); 311 final int n = values.length; 312 // Special cases 313 if (n <= 1) { 314 return n == 0 ? Double.NaN : values[0]; 315 } 316 final double pos = estimationType.index(p, n); 317 final int i = (int) pos; 318 319 // Partition and compute 320 final int[] x = copy ? values.clone() : values; 321 if (pos > i) { 322 Selection.select(x, 0, n, new int[] {i, i + 1}); 323 return Interpolation.interpolate(x[i], x[i + 1], pos - i); 324 } 325 Selection.select(x, 0, n, i); 326 return x[i]; 327 } 328 329 /** 330 * Evaluate the {@code p}-th quantiles of the values. 331 * 332 * <p>Note: This method may partially sort the input values if not configured to 333 * {@link #withCopy(boolean) copy} the input data. 334 * 335 * @param values Values. 336 * @param p Probabilities for the quantiles to compute. 337 * @return the quantiles 338 * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]}; 339 * or no probabilities are specified. 340 */ 341 public double[] evaluate(int[] values, double... p) { 342 checkProbabilities(p); 343 final int n = values.length; 344 // Special cases 345 final double[] q = new double[p.length]; 346 if (n <= 1) { 347 Arrays.fill(q, n == 0 ? Double.NaN : values[0]); 348 return q; 349 } 350 351 // Collect interpolation positions. We use the output q as storage. 352 final int[] indices = computeIndices(n, p, q); 353 354 // Partition 355 final int[] x = copy ? values.clone() : values; 356 Selection.select(x, 0, n, indices); 357 358 // Compute 359 for (int k = 0; k < p.length; k++) { 360 final int i = (int) q[k]; 361 if (q[k] > i) { 362 q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - i); 363 } else { 364 q[k] = x[i]; 365 } 366 } 367 return q; 368 } 369 370 /** 371 * Evaluate the {@code p}-th quantile of the values. 372 * 373 * <p>This method can be used when the values of known size are already sorted. 374 * 375 * <pre>{@code 376 * short[] x = ... 377 * Arrays.sort(x); 378 * double q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.05); 379 * }</pre> 380 * 381 * @param n Size of the values. 382 * @param values Values function. 383 * @param p Probability for the quantile to compute. 384 * @return the quantile 385 * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is 386 * not in the range {@code [0, 1]}. 387 */ 388 public double evaluate(int n, IntToDoubleFunction values, double p) { 389 checkSize(n); 390 checkProbability(p); 391 // Special case 392 if (n <= 1) { 393 return n == 0 ? Double.NaN : values.applyAsDouble(0); 394 } 395 final double pos = estimationType.index(p, n); 396 final int i = (int) pos; 397 final double v1 = values.applyAsDouble(i); 398 if (pos > i) { 399 final double v2 = values.applyAsDouble(i + 1); 400 return Interpolation.interpolate(v1, v2, pos - i); 401 } 402 return v1; 403 } 404 405 /** 406 * Evaluate the {@code p}-th quantiles of the values. 407 * 408 * <p>This method can be used when the values of known size are already sorted. 409 * 410 * <pre>{@code 411 * short[] x = ... 412 * Arrays.sort(x); 413 * double[] q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.25, 0.5, 0.75); 414 * }</pre> 415 * 416 * @param n Size of the values. 417 * @param values Values function. 418 * @param p Probabilities for the quantiles to compute. 419 * @return the quantiles 420 * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is 421 * not in the range {@code [0, 1]}; or no probabilities are specified. 422 */ 423 public double[] evaluate(int n, IntToDoubleFunction values, double... p) { 424 checkSize(n); 425 checkProbabilities(p); 426 // Special case 427 final double[] q = new double[p.length]; 428 if (n <= 1) { 429 Arrays.fill(q, n == 0 ? Double.NaN : values.applyAsDouble(0)); 430 return q; 431 } 432 for (int k = 0; k < p.length; k++) { 433 final double pos = estimationType.index(p[k], n); 434 final int i = (int) pos; 435 final double v1 = values.applyAsDouble(i); 436 if (pos > i) { 437 final double v2 = values.applyAsDouble(i + 1); 438 q[k] = Interpolation.interpolate(v1, v2, pos - i); 439 } else { 440 q[k] = v1; 441 } 442 } 443 return q; 444 } 445 446 /** 447 * Check the probability {@code p} is in the range {@code [0, 1]}. 448 * 449 * @param p Probability for the quantile to compute. 450 * @throws IllegalArgumentException if the probability is not in the range {@code [0, 1]} 451 */ 452 private static void checkProbability(double p) { 453 // Logic negation will detect NaN 454 if (!(p >= 0 && p <= 1)) { 455 throw new IllegalArgumentException(INVALID_PROBABILITY + p); 456 } 457 } 458 459 /** 460 * Check the probabilities {@code p} are in the range {@code [0, 1]}. 461 * 462 * @param p Probabilities for the quantiles to compute. 463 * @throws IllegalArgumentException if any probabilities {@code p} is not in the range {@code [0, 1]}; 464 * or no probabilities are specified. 465 */ 466 private static void checkProbabilities(double... p) { 467 if (p.length == 0) { 468 throw new IllegalArgumentException(NO_PROBABILITIES_SPECIFIED); 469 } 470 for (final double pp : p) { 471 checkProbability(pp); 472 } 473 } 474 475 /** 476 * Check the {@code size} is positive. 477 * 478 * @param n Size of the values. 479 * @throws IllegalArgumentException if {@code size < 0} 480 */ 481 private static void checkSize(int n) { 482 if (n < 0) { 483 throw new IllegalArgumentException(INVALID_SIZE + n); 484 } 485 } 486 487 /** 488 * Check the number of probabilities {@code n} is strictly positive. 489 * 490 * @param n Number of probabilities. 491 * @throws IllegalArgumentException if {@code c < 1} 492 */ 493 private static void checkNumberOfProbabilities(int n) { 494 if (n < 1) { 495 throw new IllegalArgumentException(INVALID_NUMBER_OF_PROBABILITIES + n); 496 } 497 } 498 499 /** 500 * Compute the indices required for quantile interpolation. 501 * 502 * <p>The zero-based interpolation index in {@code [0, n)} is 503 * saved into the working array {@code q} for each {@code p}. 504 * 505 * @param n Size of the data. 506 * @param p Probabilities for the quantiles to compute. 507 * @param q Working array for quantiles. 508 * @return the indices 509 */ 510 private int[] computeIndices(int n, double[] p, double[] q) { 511 final int[] indices = new int[p.length << 1]; 512 int count = 0; 513 for (int k = 0; k < p.length; k++) { 514 final double pos = estimationType.index(p[k], n); 515 q[k] = pos; 516 final int i = (int) pos; 517 indices[count++] = i; 518 if (pos > i) { 519 // Require the next index for interpolation 520 indices[count++] = i + 1; 521 } 522 } 523 if (count < indices.length) { 524 return Arrays.copyOf(indices, count); 525 } 526 return indices; 527 } 528 529 /** 530 * Estimation methods for a quantile. Provides the nine quantile algorithms 531 * defined in Hyndman and Fan (1996)[1] as {@code HF1 - HF9}. 532 * 533 * <p>Samples quantiles are defined by: 534 * 535 * <p>\[ Q(p) = (1 - \gamma) x_j + \gamma x_{j+1} \] 536 * 537 * <p>where \( \frac{j-m}{n} \leq p \le \frac{j-m+1}{n} \), \( x_j \) is the \( j \)th 538 * order statistic, \( n \) is the sample size, the value of \( \gamma \) is a function 539 * of \( j = \lfloor np+m \rfloor \) and \( g = np + m - j \), and \( m \) is a constant 540 * determined by the sample quantile type. 541 * 542 * <p>Note that the real-valued position \( np + m \) is a 1-based index and 543 * \( j \in [1, n] \). If the real valued position is computed as beyond the lowest or 544 * highest values in the sample, this implementation will return the minimum or maximum 545 * observation respectively. 546 * 547 * <p>Types 1, 2, and 3 are discontinuous functions of \( p \); types 4 to 9 are continuous 548 * functions of \( p \). 549 * 550 * <p>For the continuous functions, the probability \( p_k \) is provided for the \( k \)-th order 551 * statistic in size \( n \). Samples quantiles are equivalently obtained to \( Q(p) \) by 552 * linear interpolation between points \( (p_k, x_k) \) and \( (p_{k+1}, x_{k+1}) \) for 553 * any \( p_k \leq p \leq p_{k+1} \). 554 * 555 * <ol> 556 * <li>Hyndman and Fan (1996) 557 * <i>Sample Quantiles in Statistical Packages.</i> 558 * The American Statistician, 50, 361-365. 559 * <a href="https://www.jstor.org/stable/2684934">doi.org/10.2307/2684934</a> 560 * <li><a href="http://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a> 561 * </ol> 562 */ 563 public enum EstimationMethod { 564 /** 565 * Inverse of the empirical distribution function. 566 * 567 * <p>\( m = 0 \). \( \gamma = 0 \) if \( g = 0 \), and 1 otherwise. 568 */ 569 HF1 { 570 @Override 571 double position0(double p, int n) { 572 // position = np + 0. This is 1-based so adjust to 0-based. 573 return Math.ceil(n * p) - 1; 574 } 575 }, 576 /** 577 * Similar to {@link #HF1} with averaging at discontinuities. 578 * 579 * <p>\( m = 0 \). \( \gamma = 0.5 \) if \( g = 0 \), and 1 otherwise. 580 */ 581 HF2 { 582 @Override 583 double position0(double p, int n) { 584 final double pos = n * p; 585 // Average at discontinuities 586 final int j = (int) pos; 587 final double g = pos - j; 588 if (g == 0) { 589 return j - 0.5; 590 } 591 // As HF1 : ceil(j + g) - 1 592 return j; 593 } 594 }, 595 /** 596 * The observation closest to \( np \). Ties are resolved to the nearest even order statistic. 597 * 598 * <p>\( m = -1/2 \). \( \gamma = 0 \) if \( g = 0 \) and \( j \) is even, and 1 otherwise. 599 */ 600 HF3 { 601 @Override 602 double position0(double p, int n) { 603 // Let rint do the work for ties to even 604 return Math.rint(n * p) - 1; 605 } 606 }, 607 /** 608 * Linear interpolation of the inverse of the empirical CDF. 609 * 610 * <p>\( m = 0 \). \( p_k = \frac{k}{n} \). 611 */ 612 HF4 { 613 @Override 614 double position0(double p, int n) { 615 // np + 0 - 1 616 return n * p - 1; 617 } 618 }, 619 /** 620 * A piecewise linear function where the knots are the values midway through the steps of 621 * the empirical CDF. Proposed by Hazen (1914) and popular amongst hydrologists. 622 * 623 * <p>\( m = 1/2 \). \( p_k = \frac{k - 1/2}{n} \). 624 */ 625 HF5 { 626 @Override 627 double position0(double p, int n) { 628 // np + 0.5 - 1 629 return n * p - 0.5; 630 } 631 }, 632 /** 633 * Linear interpolation of the expectations for the order statistics for the uniform 634 * distribution on [0,1]. Proposed by Weibull (1939). 635 * 636 * <p>\( m = p \). \( p_k = \frac{k}{n + 1} \). 637 * 638 * <p>This method computes the quantile as per the Apache Commons Math Percentile 639 * legacy implementation. 640 */ 641 HF6 { 642 @Override 643 double position0(double p, int n) { 644 // np + p - 1 645 return (n + 1) * p - 1; 646 } 647 }, 648 /** 649 * Linear interpolation of the modes for the order statistics for the uniform 650 * distribution on [0,1]. Proposed by Gumbull (1939). 651 * 652 * <p>\( m = 1 - p \). \( p_k = \frac{k - 1}{n - 1} \). 653 */ 654 HF7 { 655 @Override 656 double position0(double p, int n) { 657 // np + 1-p - 1 658 return (n - 1) * p; 659 } 660 }, 661 /** 662 * Linear interpolation of the approximate medians for order statistics. 663 * 664 * <p>\( m = (p + 1)/3 \). \( p_k = \frac{k - 1/3}{n + 1/3} \). 665 * 666 * <p>As per Hyndman and Fan (1996) this approach is most recommended as it provides 667 * an approximate median-unbiased estimate regardless of distribution. 668 */ 669 HF8 { 670 @Override 671 double position0(double p, int n) { 672 return n * p + (p + 1) / 3 - 1; 673 } 674 }, 675 /** 676 * Quantile estimates are approximately unbiased for the expected order statistics if 677 * \( x \) is normally distributed. 678 * 679 * <p>\( m = p/4 + 3/8 \). \( p_k = \frac{k - 3/8}{n + 1/4} \). 680 */ 681 HF9 { 682 @Override 683 double position0(double p, int n) { 684 // np + p/4 + 3/8 - 1 685 return (n + 0.25) * p - 0.625; 686 } 687 }; 688 689 /** 690 * Finds the real-valued position for calculation of the quantile. 691 * 692 * <p>Return {@code i + g} such that the quantile value from sorted data is: 693 * 694 * <p>value = data[i] + g * (data[i+1] - data[i]) 695 * 696 * <p>Warning: Interpolation should not use {@code data[i+1]} unless {@code g != 0}. 697 * 698 * <p>Note: In contrast to the definition of Hyndman and Fan in the class header 699 * which uses a 1-based position, this is a zero based index. This change is for 700 * convenience when addressing array positions. 701 * 702 * @param p p<sup>th</sup> quantile. 703 * @param n Size. 704 * @return a real-valued position (0-based) into the range {@code [0, n)} 705 */ 706 abstract double position0(double p, int n); 707 708 /** 709 * Finds the index {@code i} and fractional part {@code g} of a real-valued position 710 * to interpolate the quantile. 711 * 712 * <p>Return {@code i + g} such that the quantile value from sorted data is: 713 * 714 * <p>value = data[i] + g * (data[i+1] - data[i]) 715 * 716 * <p>Note: Interpolation should not use {@code data[i+1]} unless {@code g != 0}. 717 * 718 * @param p p<sup>th</sup> quantile. 719 * @param n Size. 720 * @return index (in [0, n-1]) 721 */ 722 final double index(double p, int n) { 723 final double pos = position0(p, n); 724 // Bounds check in [0, n-1] 725 if (pos < 0) { 726 return 0; 727 } 728 if (pos > n - 1.0) { 729 return n - 1.0; 730 } 731 return pos; 732 } 733 } 734}