1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.statistics.descriptive; 19 20 import java.util.Objects; 21 import org.apache.commons.numbers.arrays.Selection; 22 23 /** 24 * Returns the median of the available values. 25 * 26 * <p>For values of length {@code n}, let {@code k = n / 2}: 27 * <ul> 28 * <li>The result is {@code NaN} if {@code n = 0}. 29 * <li>The result is {@code values[k]} if {@code n} is odd. 30 * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even. 31 * </ul> 32 * 33 * <p>This implementation respects the ordering imposed by 34 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs 35 * in the selected positions in the fully sorted values then the result is {@code NaN}. 36 * 37 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values. 38 * 39 * <p>Instances of this class are immutable and thread-safe. 40 * 41 * @see #with(NaNPolicy) 42 * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a> 43 * @since 1.1 44 */ 45 public final class Median { 46 /** Default instance. */ 47 private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE); 48 49 /** Flag to indicate if the data should be copied. */ 50 private final boolean copy; 51 /** NaN policy for floating point data. */ 52 private final NaNPolicy nanPolicy; 53 /** Transformer for NaN data. */ 54 private final NaNTransformer nanTransformer; 55 56 /** 57 * @param copy Flag to indicate if the data should be copied. 58 * @param nanPolicy NaN policy. 59 */ 60 private Median(boolean copy, NaNPolicy nanPolicy) { 61 this.copy = copy; 62 this.nanPolicy = nanPolicy; 63 nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy); 64 } 65 66 /** 67 * Return a new instance with the default options. 68 * 69 * <ul> 70 * <li>{@linkplain #withCopy(boolean) Copy = false} 71 * <li>{@linkplain #with(NaNPolicy) NaN policy = include} 72 * </ul> 73 * 74 * <p>Note: The default options configure for processing in-place and including 75 * {@code NaN} values in the data. This is the most efficient mode and has the 76 * smallest memory consumption. 77 * 78 * @return the median implementation 79 * @see #withCopy(boolean) 80 * @see #with(NaNPolicy) 81 */ 82 public static Median withDefaults() { 83 return DEFAULT; 84 } 85 86 /** 87 * Return an instance with the configured copy behaviour. If {@code false} then 88 * the input array will be modified by the call to evaluate the median; otherwise 89 * the computation uses a copy of the data. 90 * 91 * @param v Value. 92 * @return an instance 93 */ 94 public Median withCopy(boolean v) { 95 return new Median(v, nanPolicy); 96 } 97 98 /** 99 * Return an instance with the configured {@link NaNPolicy}. 100 * 101 * <p>Note: This implementation respects the ordering imposed by 102 * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is 103 * considered greater than all other values, and all {@code NaN} values are equal. The 104 * {@link NaNPolicy} changes the computation of the statistic in the presence of 105 * {@code NaN} values. 106 * 107 * <ul> 108 * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data; 109 * the size of the data <em>includes</em> the {@code NaN} values and the median will be 110 * {@code NaN} if any value used for median interpolation is {@code NaN}. 111 * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data; 112 * the size of the data <em>excludes</em> the {@code NaN} values and the median will 113 * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero 114 * and the result is {@code NaN}. 115 * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN} 116 * values. 117 * </ul> 118 * 119 * <p>Note that the result is identical for all policies if no {@code NaN} values are present. 120 * 121 * @param v Value. 122 * @return an instance 123 */ 124 public Median with(NaNPolicy v) { 125 return new Median(copy, Objects.requireNonNull(v)); 126 } 127 128 /** 129 * Evaluate the median. 130 * 131 * <p>Note: This method may partially sort the input values if not configured to 132 * {@link #withCopy(boolean) copy} the input data. 133 * 134 * @param values Values. 135 * @return the median 136 */ 137 public double evaluate(double[] values) { 138 // Floating-point data handling 139 final int[] bounds = new int[1]; 140 final double[] x = nanTransformer.apply(values, bounds); 141 final int n = bounds[0]; 142 // Special cases 143 if (n <= 2) { 144 switch (n) { 145 case 2: 146 // Sorting the array matches the behaviour of Quantile for n==2 147 // Handle NaN and signed zeros 148 if (Double.compare(x[1], x[0]) < 0) { 149 final double t = x[0]; 150 x[0] = x[1]; 151 x[1] = t; 152 } 153 return Interpolation.mean(x[0], x[1]); 154 case 1: 155 return x[0]; 156 default: 157 return Double.NaN; 158 } 159 } 160 // Median index 161 final int m = n >>> 1; 162 // Odd 163 if ((n & 0x1) == 1) { 164 Selection.select(x, 0, n, m); 165 return x[m]; 166 } 167 // Even: require (m-1, m) 168 Selection.select(x, 0, n, new int[] {m - 1, m}); 169 return Interpolation.mean(x[m - 1], x[m]); 170 } 171 172 /** 173 * Evaluate the median. 174 * 175 * <p>Note: This method may partially sort the input values if not configured to 176 * {@link #withCopy(boolean) copy} the input data. 177 * 178 * @param values Values. 179 * @return the median 180 */ 181 public double evaluate(int[] values) { 182 final int[] x = copy ? values.clone() : values; 183 final int n = values.length; 184 // Special cases 185 if (n <= 2) { 186 switch (n) { 187 case 2: 188 // Sorting the array matches the behaviour of Quantile for n==2 189 if (x[1] < x[0]) { 190 final int t = x[0]; 191 x[0] = x[1]; 192 x[1] = t; 193 } 194 return Interpolation.mean(x[0], x[1]); 195 case 1: 196 return x[0]; 197 default: 198 return Double.NaN; 199 } 200 } 201 // Median index 202 final int m = n >>> 1; 203 // Odd 204 if ((n & 0x1) == 1) { 205 Selection.select(x, 0, n, m); 206 return x[m]; 207 } 208 // Even: require (m-1, m) 209 Selection.select(x, 0, n, new int[] {m - 1, m}); 210 return Interpolation.mean(x[m - 1], x[m]); 211 } 212 }