001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.statistics.descriptive; 019 020import java.util.Objects; 021import org.apache.commons.numbers.arrays.Selection; 022 023/** 024 * Returns the median of the available values. 025 * 026 * <p>For values of length {@code n}, let {@code k = n / 2}: 027 * <ul> 028 * <li>The result is {@code NaN} if {@code n = 0}. 029 * <li>The result is {@code values[k]} if {@code n} is odd. 030 * <li>The result is {@code (values[k - 1] + values[k]) / 2} if {@code n} is even. 031 * </ul> 032 * 033 * <p>This implementation respects the ordering imposed by 034 * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs 035 * in the selected positions in the fully sorted values then the result is {@code NaN}. 036 * 037 * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values. 038 * 039 * <p>Instances of this class are immutable and thread-safe. 040 * 041 * @see #with(NaNPolicy) 042 * @see <a href="https://en.wikipedia.org/wiki/Median">Median (Wikipedia)</a> 043 * @since 1.1 044 */ 045public final class Median { 046 /** Default instance. */ 047 private static final Median DEFAULT = new Median(false, NaNPolicy.INCLUDE); 048 049 /** Flag to indicate if the data should be copied. */ 050 private final boolean copy; 051 /** NaN policy for floating point data. */ 052 private final NaNPolicy nanPolicy; 053 /** Transformer for NaN data. */ 054 private final NaNTransformer nanTransformer; 055 056 /** 057 * @param copy Flag to indicate if the data should be copied. 058 * @param nanPolicy NaN policy. 059 */ 060 private Median(boolean copy, NaNPolicy nanPolicy) { 061 this.copy = copy; 062 this.nanPolicy = nanPolicy; 063 nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy); 064 } 065 066 /** 067 * Return a new instance with the default options. 068 * 069 * <ul> 070 * <li>{@linkplain #withCopy(boolean) Copy = false} 071 * <li>{@linkplain #with(NaNPolicy) NaN policy = include} 072 * </ul> 073 * 074 * <p>Note: The default options configure for processing in-place and including 075 * {@code NaN} values in the data. This is the most efficient mode and has the 076 * smallest memory consumption. 077 * 078 * @return the median implementation 079 * @see #withCopy(boolean) 080 * @see #with(NaNPolicy) 081 */ 082 public static Median withDefaults() { 083 return DEFAULT; 084 } 085 086 /** 087 * Return an instance with the configured copy behaviour. If {@code false} then 088 * the input array will be modified by the call to evaluate the median; otherwise 089 * the computation uses a copy of the data. 090 * 091 * @param v Value. 092 * @return an instance 093 */ 094 public Median withCopy(boolean v) { 095 return new Median(v, nanPolicy); 096 } 097 098 /** 099 * Return an instance with the configured {@link NaNPolicy}. 100 * 101 * <p>Note: This implementation respects the ordering imposed by 102 * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is 103 * considered greater than all other values, and all {@code NaN} values are equal. The 104 * {@link NaNPolicy} changes the computation of the statistic in the presence of 105 * {@code NaN} values. 106 * 107 * <ul> 108 * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data; 109 * the size of the data <em>includes</em> the {@code NaN} values and the median will be 110 * {@code NaN} if any value used for median interpolation is {@code NaN}. 111 * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data; 112 * the size of the data <em>excludes</em> the {@code NaN} values and the median will 113 * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero 114 * and the result is {@code NaN}. 115 * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN} 116 * values. 117 * </ul> 118 * 119 * <p>Note that the result is identical for all policies if no {@code NaN} values are present. 120 * 121 * @param v Value. 122 * @return an instance 123 */ 124 public Median with(NaNPolicy v) { 125 return new Median(copy, Objects.requireNonNull(v)); 126 } 127 128 /** 129 * Evaluate the median. 130 * 131 * <p>Note: This method may partially sort the input values if not configured to 132 * {@link #withCopy(boolean) copy} the input data. 133 * 134 * @param values Values. 135 * @return the median 136 */ 137 public double evaluate(double[] values) { 138 // Floating-point data handling 139 final int[] bounds = new int[1]; 140 final double[] x = nanTransformer.apply(values, bounds); 141 final int n = bounds[0]; 142 // Special cases 143 if (n <= 2) { 144 switch (n) { 145 case 2: 146 // Sorting the array matches the behaviour of Quantile for n==2 147 // Handle NaN and signed zeros 148 if (Double.compare(x[1], x[0]) < 0) { 149 final double t = x[0]; 150 x[0] = x[1]; 151 x[1] = t; 152 } 153 return Interpolation.mean(x[0], x[1]); 154 case 1: 155 return x[0]; 156 default: 157 return Double.NaN; 158 } 159 } 160 // Median index 161 final int m = n >>> 1; 162 // Odd 163 if ((n & 0x1) == 1) { 164 Selection.select(x, 0, n, m); 165 return x[m]; 166 } 167 // Even: require (m-1, m) 168 Selection.select(x, 0, n, new int[] {m - 1, m}); 169 return Interpolation.mean(x[m - 1], x[m]); 170 } 171 172 /** 173 * Evaluate the median. 174 * 175 * <p>Note: This method may partially sort the input values if not configured to 176 * {@link #withCopy(boolean) copy} the input data. 177 * 178 * @param values Values. 179 * @return the median 180 */ 181 public double evaluate(int[] values) { 182 final int[] x = copy ? values.clone() : values; 183 final int n = values.length; 184 // Special cases 185 if (n <= 2) { 186 switch (n) { 187 case 2: 188 // Sorting the array matches the behaviour of Quantile for n==2 189 if (x[1] < x[0]) { 190 final int t = x[0]; 191 x[0] = x[1]; 192 x[1] = t; 193 } 194 return Interpolation.mean(x[0], x[1]); 195 case 1: 196 return x[0]; 197 default: 198 return Double.NaN; 199 } 200 } 201 // Median index 202 final int m = n >>> 1; 203 // Odd 204 if ((n & 0x1) == 1) { 205 Selection.select(x, 0, n, m); 206 return x[m]; 207 } 208 // Even: require (m-1, m) 209 Selection.select(x, 0, n, new int[] {m - 1, m}); 210 return Interpolation.mean(x[m - 1], x[m]); 211 } 212}