StorelessBivariateCovariance.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.legacy.stat.correlation;
import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
/**
* Bivariate Covariance implementation that does not require input data to be
* stored in memory.
*
* <p>This class is based on a paper written by Philippe Pébay:
* <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
* Formulas for Robust, One-Pass Parallel Computation of Covariances and
* Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
* Sandia National Laboratories. It computes the covariance for a pair of variables.
* Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
*
* <p>Note: This class is package private as it is only used internally in
* the {@link StorelessCovariance} class.</p>
*
* @since 3.0
*/
class StorelessBivariateCovariance {
/** the mean of variable x. */
private double meanX;
/** the mean of variable y. */
private double meanY;
/** number of observations. */
private double n;
/** the running covariance estimate. */
private double covarianceNumerator;
/** flag for bias correction. */
private boolean biasCorrected;
/**
* Create an empty {@link StorelessBivariateCovariance} instance with
* bias correction.
*/
StorelessBivariateCovariance() {
this(true);
}
/**
* Create an empty {@link StorelessBivariateCovariance} instance.
*
* @param biasCorrection if <code>true</code> the covariance estimate is corrected
* for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
* i.e. n in the denominator.
*/
StorelessBivariateCovariance(final boolean biasCorrection) {
meanX = meanY = 0.0;
n = 0;
covarianceNumerator = 0.0;
biasCorrected = biasCorrection;
}
/**
* Update the covariance estimation with a pair of variables (x, y).
*
* @param x the x value
* @param y the y value
*/
public void increment(final double x, final double y) {
n++;
final double deltaX = x - meanX;
final double deltaY = y - meanY;
meanX += deltaX / n;
meanY += deltaY / n;
covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
}
/**
* Appends another bivariate covariance calculation to this.
* After this operation, statistics returned should be close to what would
* have been obtained by by performing all of the {@link #increment(double, double)}
* operations in {@code cov} directly on this.
*
* @param cov StorelessBivariateCovariance instance to append.
*/
public void append(StorelessBivariateCovariance cov) {
double oldN = n;
n += cov.n;
final double deltaX = cov.meanX - meanX;
final double deltaY = cov.meanY - meanY;
meanX += deltaX * cov.n / n;
meanY += deltaY * cov.n / n;
covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
}
/**
* Returns the number of observations.
*
* @return number of observations
*/
public double getN() {
return n;
}
/**
* Return the current covariance estimate.
*
* @return the current covariance
* @throws NumberIsTooSmallException if the number of observations
* is < 2
*/
public double getResult() throws NumberIsTooSmallException {
if (n < 2) {
throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION,
n, 2, true);
}
if (biasCorrected) {
return covarianceNumerator / (n - 1d);
} else {
return covarianceNumerator / n;
}
}
}