NumberStatistics.java
package org.xandercat.pmdb.util;
import java.util.List;
/**
* Statistics for numbers. This class expects the list of numbers to be non-null, contain no null values, and have a size of at least 1.
*
* Standard deviation and interquartile range are lazy calculated when needed.
*
* @author Scott Arnold
*/
public abstract class NumberStatistics<T extends Number> {
public static final double IQR_MULTIPLIER = 1.5; // standard multiplier for finding outliers using IQR
private List<T> values;
private Double standardDeviation;
private Double interquartileRange;
private Double iqrQ1;
private Double iqrQ3;
/**
* Construct statistics for the provided list of values.
*
* @param values double values
*/
public NumberStatistics(List<T> values) {
if (values == null || values.size() < 1) {
throw new IllegalArgumentException("List of values must be non null and contain at least 1 value.");
}
this.values = values;
sort(values);
}
/**
* Sorts the list of values in natural order.
*
* @param values values to sort
*/
protected abstract void sort(List<T> values);
/**
* Return number parsed from given string. If value cannot be parsed, null should be returned.
*
* @param value value to parse
*
* @return parsed value
*/
protected abstract T parse(String value);
/**
* Returns the minimum number from the list of numbers.
*
* @return minimum
*/
public abstract T getMin();
/**
* Returns the maximum number from the list of numbers.
*
* @return maximum
*/
public abstract T getMax();
/**
* Returns the average from the list of numbers.
*
* @return average
*/
public abstract double getAverage();
/**
* Returns median value from list of numbers. Returned number is a double due to
* rule of taking average of two middle values for a list with an even quantity of numbers.
*
* @return median value
*/
public double getMedian() {
return calculateMedian(values);
}
/**
* Returns standard deviation from list of numbers.
*
* @return standard deviation
*/
public double getStandardDeviation() {
if (standardDeviation == null) {
calculateStandardDeviation();
}
return standardDeviation;
}
/**
* Returns interquartile range from list of numbers.
*
* @return interquartile range
*/
public double getInterquartileRange() {
if (interquartileRange == null) {
calculateInterquartileRange();
}
return interquartileRange;
}
/**
* Returns whether or not the provided value is an outlier from the list of doubles using the 1.5xIQR rule.
* If the value cannot be parsed, it is considered an outlier.
*
* @param value value to test
*
* @return whether or not value is an outlier as compared to the internal list of numbers.
*/
public boolean isOutlier(String value) {
T parsedValue = parse(value);
return (parsedValue == null || isOutlier(parsedValue));
}
/**
* Returns whether or not the provided value is an outlier from the list of numbers using the 1.5xIQR rule.
*
* @param value value to test
*
* @return whether or not value is an outlier as compared to the internal list of numbers.
*/
public boolean isOutlier(T value) {
return isHighOutlier(value) || isLowOutlier(value);
}
/**
* Returns whether or not the provided value is an outlier on the high end from the list of numbers using the 1.5xIQR rule.
* If the value cannot be parsed, it is considered an outlier.
*
* @param value value to test
*
* @return whether or not value is an outlier on the high end as compared to the internal list of numbers.
*/
public boolean isHighOutlier(String value) {
T parsedValue = parse(value);
return (parsedValue == null || isHighOutlier(parsedValue));
}
/**
* Returns whether or not the provided value is an outlier on the high end from the list of numbers using the 1.5xIQR rule.
* If the value is null, it is considered an outlier.
*
* @param value value to test
*
* @return whether or not value is an outlier on the high end as compared to the internal list of numbers.
*/
public boolean isHighOutlier(T value) {
if (value == null) {
return true;
}
double iqr = getInterquartileRange();
return (value.doubleValue() > (iqrQ3 + IQR_MULTIPLIER * iqr));
}
/**
* Returns whether or not the provided value is an outlier on the low end from the list of numbers using the 1.5xIQR rule.
* If the value cannot be parsed, it is considered an outlier.
*
* @param value value to test
*
* @return whether or not value is an outlier on the low end as compared to the internal list of numbers.
*/
public boolean isLowOutlier(String value) {
T parsedValue = parse(value);
return (parsedValue == null || isLowOutlier(parsedValue));
}
/**
* Returns whether or not the provided value is an outlier on the low end from the list of numbers using the 1.5xIQR rule.
* If the value is null, it is considered an outlier.
*
* @param value value to test
*
* @return whether or not value is an outlier on the low end as compared to the internal list of numbers.
*/
public boolean isLowOutlier(T value) {
if (value == null) {
return true;
}
double iqr = getInterquartileRange();
return (value.doubleValue() < (iqrQ1 - IQR_MULTIPLIER * iqr));
}
private void calculateStandardDeviation() {
double intermediateSum = 0;
double average = getAverage();
for (Number value : values) {
intermediateSum += Math.pow(value.doubleValue() - average, 2);
}
this.standardDeviation = Math.sqrt(intermediateSum / (double) values.size());
}
private void calculateInterquartileRange() {
if (values.size() == 1) {
this.iqrQ1 = values.get(0).doubleValue();
this.iqrQ3 = values.get(0).doubleValue();
} else {
int midpoint = values.size() / 2;
List<T> lowerHalf = values.subList(0, midpoint);
List<T> upperHalf = values.subList(midpoint+(values.size() % 2), values.size());
this.iqrQ1 = calculateMedian(lowerHalf);
this.iqrQ3 = calculateMedian(upperHalf);
}
this.interquartileRange = iqrQ3 - iqrQ1;
}
private double calculateMedian(List<T> values) {
int midpointIndex = values.size() / 2;
if (values.size() % 2 == 0) {
return (values.get(midpointIndex -1).doubleValue() + values.get(midpointIndex).doubleValue()) / 2d;
} else {
return values.get(midpointIndex).doubleValue();
}
}
}