DataFormatterSelector.java

package org.xandercat.pmdb.util.format;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;

/**
 * Data formatter selector to be used to select the best data formatter for a group of tested values.
 * 
 * @author Scott Arnold
 */
public class DataFormatterSelector {

	private class FormatterCount {
		private DataFormatter dataFormatter;
		private int count;
		public FormatterCount(DataFormatter dataFormatter) {
			this.dataFormatter = dataFormatter;
		}
		public int getCount() {
			return count;
		}
	}
	
	private int maxSampleSize = 20;
	private int sampleSize;
	private String attributeName;
	private List<FormatterCount> formatterCounts = new ArrayList<FormatterCount>();
	
	public DataFormatterSelector(String attributeName, List<DataFormatter> dataFormatters) {
		this.attributeName = attributeName;
		dataFormatters.forEach(dataFormatter -> formatterCounts.add(new FormatterCount(dataFormatter)));
	}
	
	/**
	 * Sets how many values should be tested in order to make selection on best formatter to use.
	 * 
	 * @param sampleSize sample size
	 */
	public void setSampleSize(int sampleSize) {
		this.sampleSize = sampleSize;
	}
	
	/**
	 * Returns the name of the attribute this selector is for.
	 * 
	 * @return name of the attribute
	 */
	public String getAttributeName() {
		return attributeName;
	}
	
	/**
	 * Tests a value against the collection of data formatters.  Value should be a value for the attribute this selector represents.
	 * 
	 * @param value value to be tested
	 */
	public void test(String value) {
		if (sampleSize >= maxSampleSize || FormatUtil.isBlank(value)) {
			return; // only test non-blank values, and stop testing after reaching max sample size
		}
		formatterCounts.stream()
				.filter(formatterCount -> formatterCount.dataFormatter.isAcceptable(value))
				.forEach(formatterCount -> formatterCount.count++);
		sampleSize++;
	}
	
	/**
	 * Returns the data formatter selected for the attribute.  For a data formatter to be selected, it must
	 * be able to accept more than half the tested values.  If two or more data formatters are tied for best,
	 * the data formatter selected from that group is indeterminate.
	 * 
	 * @return data formatter (empty if no data formatter is appropriate)
	 */
	public Optional<DataFormatter> getDataFormatter() {
		Optional<FormatterCount> maxFormatterCount = formatterCounts.stream()
				.max(Comparator.comparing(FormatterCount::getCount));
		if (maxFormatterCount.isPresent() && maxFormatterCount.get().count * 2 > sampleSize) {
			// only use data formatter if it could accept more than half the tested values
			return Optional.of(maxFormatterCount.get().dataFormatter);
		}
		return Optional.empty();
	}
}