ExcelPorter.java

package org.xandercat.pmdb.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xandercat.pmdb.dto.Movie;
import org.xandercat.pmdb.dto.MovieCollection;
import org.xandercat.pmdb.util.format.FormatUtil;

/**
 * Movie exporter and importer for Microsoft Excel workbooks.
 * 
 * For importing, any movie list in a worksheet must have a header row that can be found somewhere
 * near the top of the sheet, and at least one column header must have the text "title" in it
 * (case insensitive) to indicate a movie title column.
 * 
 * @author Scott Arnold
 */
public class ExcelPorter {

	private static final int MAX_SCAN_ROW_IDX = 10;
	private static final int MAX_SCAN_COL_IDX = 10;
	private static final DataFormatter DATA_FORMATTER = new DataFormatter();
	
	public static enum Format {
		XLS("application/vnd.ms-excel", ".xls"), 
		XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx");
		
		private String contentType;
		private String extension;
		
		private Format(String contentType, String extension) {
			this.contentType = contentType;
			this.extension = extension;
		}
		
		public String getContentType() {
			return contentType;
		}
		
		public String getExtension() {
			return extension;
		}
	}
	
	private class HeaderRow {
		private int rowIdx;
		private int colStartIdx;
		private int titleIdx;
		private List<String> headers; 
		public HeaderRow(int rowIdx, int colStartIdx, int titleIdx, List<String> headers) {
			this.rowIdx = rowIdx;
			this.colStartIdx = colStartIdx;
			this.titleIdx = titleIdx;
			this.headers = headers;
		}
		public int getIndex(String header) {
			int idx = headers.indexOf(header);
			return (idx < 0)? -1 : colStartIdx + idx;
		}
	}
	
	private Workbook workbook;
	private Format format;
	private List<String> sheetNames = new ArrayList<String>();
	private List<String> allColumnNames = new ArrayList<String>();
	private Map<String, HeaderRow> headerRows;
	
	/**
	 * Constructor for export mode using the provided format.
	 * 
	 * @param format document format to use
	 */
	public ExcelPorter(Format format) {
		this.format = format;
		if (format == Format.XLS) {
			this.workbook = new HSSFWorkbook();
		} else {
			this.workbook = new XSSFWorkbook();
		}
	}
	
	/**
	 * Constructor for import mode using provided input stream and filename. 
	 * Filename is used to determine the document format.
	 * 
	 * @param inputStream input stream
	 * @param fileName    name of file for given input stream
	 * @throws IOException if any io errors occur
	 */
	public ExcelPorter(InputStream inputStream, String fileName) throws IOException {
		if (fileName.toLowerCase().endsWith(Format.XLSX.getExtension())) {
			this.format = Format.XLSX;
			this.workbook = new XSSFWorkbook(inputStream);
		} else if (fileName.toLowerCase().endsWith(Format.XLS.getExtension())) {
			this.format = Format.XLS;
			this.workbook = new HSSFWorkbook(inputStream);
		} else {
			throw new IOException("File name should end with " + Format.XLSX.getExtension() + " or " + Format.XLS.getExtension());
		}
		for (int i=0; i<workbook.getNumberOfSheets(); i++) {
			sheetNames.add(workbook.getSheetAt(i).getSheetName());
		}
		this.headerRows = scanForHeaderRows();
		
		// remove sheets we couldn't identify a header row for
		this.sheetNames.retainAll(headerRows.keySet());
		
		// combine headers for all sheets to create master list of column names
		this.allColumnNames = headerRows.values().stream()
				.flatMap(headerRow -> headerRow.headers.stream())
				.distinct().sorted().collect(Collectors.toList());
	}
	
	/**
	 * Returns the content-type header value for the workbook.
	 * 
	 * @return content-type header value for the workbook
	 */
	public String getContentType() {
		return format.getContentType();
	}
	
	/**
	 * Returns a filename with extension provided a base filename without extension.
	 * 
	 * @param baseFilename base filename without extension
	 * @return filename with extension
	 */
	public String getFilename(String baseFilename) {
		return baseFilename + format.getExtension();
	}
	
	/**
	 * Returns list of all sheet names in the document.
	 * 
	 * @return sheet names
	 */
	public List<String> getSheetNames() {
		return sheetNames;
	}
	
	/**
	 * Returns list of all column names found from all sheets.  Only populated when importing.
	 * 
	 * Duplicates are not included across sheets.  However, duplicates are included is found within
	 * the same sheet, but duplicate row names are appended with an identifier to keep the name unique.
	 * 
	 * @return list of column names found from all sheets
	 */
	public List<String> getAllColumnNames() {
		return allColumnNames;
	}
	
	/**
	 * Add a sheet to the document with the given movie collection and movies with columns for the given column names.
	 * It is the responsibility of the caller to ensure that the movies provided are associated with the given 
	 * movie collection, and that the column names are valid attribute names for the movies.
	 * 
	 * @param movieCollection  movie collection to add sheet for
	 * @param movies           movies to include on the sheet
	 * @param columns          columns or movie attributes to include from the movies
	 */
	public void addSheet(MovieCollection movieCollection, Collection<Movie> movies, List<String> columns) {
		Sheet sheet = workbook.createSheet(movieCollection.getName());
		sheetNames.add(sheet.getSheetName());
		Row row = sheet.createRow(0);
		row.createCell(0).setCellValue("Title");
		int colIdx = 1;
		for (String column : columns) {
			Cell cell = row.createCell(colIdx++);
			cell.setCellValue(FormatUtil.titleCase(column));
		}
		int rowIdx = 1;
		for (Movie movie : movies) {
			row = sheet.createRow(rowIdx++);
			row.createCell(0).setCellValue(movie.getTitle());
			colIdx = 1;
			for (String column : columns) {
				Cell cell = row.createCell(colIdx++);
				cell.setCellValue(movie.getAttribute(column));
			}
		}
	}

	private Map<String, HeaderRow> scanForHeaderRows() {
		return sheetNames.stream()
				.map(workbook::getSheet)
				.collect(Collectors.toMap(Sheet::getSheetName, this::scanSheetForHeaderRow))
				.entrySet().stream()
				.filter(entry -> entry.getValue().isPresent())
				.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().get()));
	}
	
	private Optional<HeaderRow> scanSheetForHeaderRow(Sheet sheet) {
		for (int r=0; r<=MAX_SCAN_ROW_IDX; r++) {
			Row row = sheet.getRow(r);
			if (row != null) {
				for (int c=0; c<=MAX_SCAN_COL_IDX; c++) {
					Cell cell = row.getCell(c);
					boolean titleRowLikelyFound = false;
					int startIdx = c;
					int titleIdx = 0;
					List<String> headers = new ArrayList<String>();
					while (cell != null && FormatUtil.isNotBlank(DATA_FORMATTER.formatCellValue(cell))) {
						String heading = FormatUtil.titleCase(FormatUtil.formatAlphaNumeric(DATA_FORMATTER.formatCellValue(cell).trim()));
						String origHeading = heading;
						int dupIdx = 2;
						while (headers.contains(heading)) {
							heading = origHeading + " " + dupIdx;
							dupIdx++;
						}
						headers.add(heading);
						if (!titleRowLikelyFound && heading.toLowerCase().indexOf("title") >= 0) {
							titleRowLikelyFound = true;
							titleIdx = c;
						}
						cell = row.getCell(++c);
					}
					if (titleRowLikelyFound) {
						return Optional.of(new HeaderRow(r, startIdx, titleIdx, headers));
					}
				}
			}
		}
		return Optional.empty();
	}
	
	/**
	 * Extract movies from the given sheet, setting attributes for the given columns. Movie title will always be
	 * pulled regardless of whether or not it is in the included columns list.
	 * 
	 * @param sheetName          sheet name
	 * @param includedColumns    columns to include
	 * @return movies for sheet
	 */
	public List<Movie> getMoviesForSheet(String sheetName, List<String> includedColumns) {
		List<Movie> movies = new ArrayList<Movie>();
		Sheet sheet = workbook.getSheet(sheetName);
		if (headerRows == null) {
			this.headerRows = scanForHeaderRows();
		}
		HeaderRow headerRow = headerRows.get(sheetName);
		if (headerRow == null) {
			return movies;
		}
		int r = headerRow.rowIdx;
		while (true) {
			Row row = sheet.getRow(++r);
			Movie movie = new Movie();
			if (row != null) {
				for (String includedColumn : includedColumns) {
					int idx = headerRow.getIndex(includedColumn);
					Cell cell = null;
					if (idx >= 0) {
						cell = row.getCell(idx);
					}
					if (cell != null && FormatUtil.isNotBlank(DATA_FORMATTER.formatCellValue(cell))) {
						String value = DATA_FORMATTER.formatCellValue(cell).trim();
						if (idx != headerRow.titleIdx) {
							movie.addAttribute(includedColumn, value);
						}
					}
				}
				Cell cell = row.getCell(headerRow.titleIdx);
				if (cell != null && FormatUtil.isNotBlank(DATA_FORMATTER.formatCellValue(cell))) {
					movie.setTitle(DATA_FORMATTER.formatCellValue(cell).trim());
				}
			}
			if (FormatUtil.isBlank(movie.getTitle())) {
				return movies;
			} else {
				movies.add(movie);
			}
		}
	}
	
	/**
	 * Write out and close the workbook.  This only need be called when exporting.
	 * 
	 * @param outputStream the output stream to write to
	 * @throws IOException if any io errors occur
	 */
	public void writeWorkbook(OutputStream outputStream) throws IOException {
		workbook.write(outputStream);
		workbook.close();
	}
}