/*
 * Copyright: Scheer E2E AG
 */
package ch.e2e.bridge.server.office;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.format.CellFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

class CSVConverter {
    public static final String LINE_SEPARATOR = System.getProperty("line.separator", "\n");
    public static final String DEFAULT_SEPARATOR = ",";
    private Workbook workbook;
    private FormulaEvaluator evaluator;
    private DataFormatter formatter;
    private int maxRowWidth;
    private String separator;
    private Escaping escaping = Escaping.EXCEL_STYLE;

    CSVConverter(File file) throws IOException, InvalidFormatException {
        this(file, DEFAULT_SEPARATOR);
    }

    CSVConverter(File file, String separator) throws IOException, InvalidFormatException {
        this.separator = (separator == null ? DEFAULT_SEPARATOR : separator);
        workbook = WorkbookFactory.create(file);
        evaluator = workbook.getCreationHelper().createFormulaEvaluator();
        formatter = new DataFormatter();
    }

    CSVConverter(InputStream stream) throws IOException, InvalidFormatException {
        this(stream, DEFAULT_SEPARATOR);
    }

    CSVConverter(InputStream stream, String separator) throws IOException, InvalidFormatException {
        this.separator = (separator == null ? DEFAULT_SEPARATOR : separator);
        try {
            workbook = WorkbookFactory.create(stream);
            evaluator = workbook.getCreationHelper().createFormulaEvaluator();
            formatter = new DataFormatter();
        } finally {
            stream.close();
        }
    }

    String[] convertToCVS() throws IOException {
        try {
            return convert(process());
        } finally {
            workbook.close();
        }
    }

    String convertToCVS(String sheetName) throws IOException {
        try {
            final StringBuilder sb = new StringBuilder();
            convertSheet(process(workbook.getSheet(sheetName)), sb);
            return sb.toString();
        } finally {
            workbook.close();
        }
    }

    String convertToCVS(int sheetNumber) throws IOException {
        try {
            final StringBuilder sb = new StringBuilder();
            convertSheet(process(workbook.getSheetAt(sheetNumber)), sb);
            return sb.toString();
        } finally {
            workbook.close();
        }
    }

    /**
     * Called to convert the contents of the currently opened workbook into
     * a CSV file.
     */
    private String[][][] process() {
        int numberOfSheets = workbook.getNumberOfSheets();
        String[][][] sheets = new String[numberOfSheets][][];
        for (int i = 0; i < numberOfSheets; i++) {
            // Get a reference to a sheet and check to see if it contains any rows.
            sheets[i] = process(workbook.getSheetAt(i));
        }
        return sheets;
    }

    private String[][] process(Sheet sheet) {
        final int physicalNumberOfRows = sheet.getPhysicalNumberOfRows();
        String[][] rows;
        if (physicalNumberOfRows == 0) {
            rows = new String[0][];
        } else {
            // Note down the index number of the bottom-most row and
            // then iterate through all of the rows on the sheet starting
            // from the very first row - number 1 - even if it is missing.
            // Recover a reference to the row and then call another method
            // which will strip the data from the cells and build lines
            // for inclusion in the resulting CSV file.
            int lastRowNum = sheet.getLastRowNum();
            rows = new String[lastRowNum + 1][];
            for (int i = 0; i <= lastRowNum; i++) {
                rows[i] = process(sheet.getRow(i));
            }
        }
        return rows;
    }

    /**
     * Called to convert a row of cells into a line of data that can later be
     * output to the CSV file.
     *
     * @param row An instance of either the HSSFRow or XSSFRow classes that
     *            encapsulates information about a row of cells recovered from
     *            an Excel workbook.
     */
    private String[] process(Row row) {
        String[] cells;
        // Check to ensure that a row was recovered from the sheet as it is
        // possible that one or more rows between other populated rows could be
        // missing - blank. If the row does contain cells then...
        if (row == null) {
            cells = new String[0];
        } else {
            // Get the index for the right most cell on the row and then
            // step along the row from left to right recovering the contents
            // of each cell, converting that into a formatted String and
            // then storing the String into the csvLine ArrayList.
            int lastCellNum = row.getLastCellNum();
            cells = new String[lastCellNum];
            for (int i = 0; i < lastCellNum; i++) {
                Cell cell = row.getCell(i);
                cells[i] = (cell == null ? null : process(cell));
            }
            // Make a note of the index number of the right most cell. This value
            // will later be used to ensure that the matrix of data in the CSV file
            // is square.
            if (lastCellNum > maxRowWidth) {
                maxRowWidth = lastCellNum;
            }
        }
        return cells;
    }

    private String process(Cell cell) {
        String result;
        if (cell.getCellType().equals(CellType.FORMULA)) {
            result = formatter.formatCellValue(cell, evaluator);
        } else if (cell.getCellType().equals(CellType.NUMERIC) && !DateUtil.isCellDateFormatted(cell)) {
            final short formatIndex = cell.getCellStyle().getDataFormat();
            if (formatIndex > 163) { // custom format
                final CellFormat cellFormat = CellFormat.getInstance(cell.getCellStyle().getDataFormatString());
                result = cellFormat.apply(cell).text;
            } else {
                result = formatter.formatCellValue(cell);
            }
        } else {
            result = formatter.formatCellValue(cell);
        }
        return result;
    }

    private String[] convert(String[][][] sheets) {
        final int length = sheets.length;
        String[] result = new String[length];
        for (int i = 0; i < length; i++) {
            StringBuilder sb = new StringBuilder();
            convertSheet(sheets[i], sb);
            result[i] = sb.toString();
        }
        return result;
    }

    private void convertSheet(String[][] rows, StringBuilder sb) {
        final int length = rows.length;
        if (length > 0) {
            convertRow(rows[0], sb);
            for (int i = 1; i < length; i++) {
                sb.append(LINE_SEPARATOR);
                convertRow(rows[i], sb);
            }
        }
    }

    private void convertRow(String[] cells, StringBuilder sb) {
        if (cells.length > 0) {
            String cell = cells[0];
            if (cell != null) {
                sb.append(escapeEmbeddedCharacters(cell));
            }
        }
        for (int i = 1; i < cells.length; i++) {
            sb.append(separator);
            String cell = cells[i];
            if (cell != null) {
                sb.append(escapeEmbeddedCharacters(cell));
            }
        }
        for (int i = cells.length; i < maxRowWidth; i++) {
            sb.append(separator);
        }
    }

    private String escapeEmbeddedCharacters(String field) {
        return escapeEmbeddedCharacters(field, separator, escaping);
    }

    /**
     * Checks to see whether the field - which consists of the formatted
     * contents of an Excel worksheet cell encapsulated within a String - contains
     * any embedded characters that must be escaped. The method is able to
     * comply with either Excel's or UNIX formatting conventions in the
     * following manner;
     * <p/>
     * With regard to UNIX conventions, if the field contains any embedded
     * field separator or EOL characters they will each be escaped by prefixing
     * a leading backspace character. These are the only changes that have yet
     * emerged following some research as being required.
     * <p/>
     * Excel has other embedded character escaping requirements, some that emerged
     * from empirical testing, other through research. Firstly, with regards to
     * any embedded speech marks ("), each occurrence should be escaped with
     * another speech mark and the whole field then surrounded with speech marks.
     * Thus if a field holds <em>"Hello" he said</em> then it should be modified
     * to appear as <em>"""Hello"" he said"</em>. Furthermore, if the field
     * contains either embedded separator or EOL characters, it should also
     * be surrounded with speech marks. As a result <em>1,400</em> would become
     * <em>"1,400"</em> assuming that the comma is the required field separator.
     * This has one consequence in, if a field contains embedded speech marks
     * and embedded separator characters, checks for both are not required as the
     * additional set of speech marks that should be placed around ay field
     * containing embedded speech marks will also account for the embedded
     * separator.
     * <p/>
     * It is worth making one further note with regard to embedded EOL
     * characters. If the data in a worksheet is exported as a CSV file using
     * Excel itself, then the field will be surrounded with speech marks. If the
     * resulting CSV file is then re-imports into another worksheet, the EOL
     * character will result in the original single field occupying more than
     * one cell. This same 'feature' is replicated in this classes behaviour.
     *
     * @param field An instance of the String class encapsulating the formatted
     *              contents of a cell on an Excel worksheet.
     * @return A String that encapsulates the formatted contents of that
     * Excel worksheet cell but with any embedded separator, EOL or
     * speech mark characters correctly escaped.
     */
    static String escapeEmbeddedCharacters(String field, String separator, Escaping escaping) {
        String result;
        StringBuilder buffer;
        // If the fields contents should be formatted to conform with Excel's
        // convention....
        switch (escaping) {
            case EXCEL_STYLE:
                // Firstly, check if there are any speech marks (") in the field;
                // each occurrence must be escaped with another set of spech marks
                // and then the entire field should be enclosed within another
                // set of speech marks. Thus, "Yes" he said would become
                // """Yes"" he said"
                if (field.contains("\"")) {
                    buffer = new StringBuilder(field.replaceAll("\"", "\\\"\\\""));
                    buffer.insert(0, "\"");
                    buffer.append("\"");
                    result = buffer.toString().trim();
                } else if ((field.indexOf(separator)) > -1 || (field.indexOf("\n")) > -1) {
                    // If the field contains either embedded separator or EOL
                    // characters, then escape the whole field by surrounding it
                    // with speech marks.
                    buffer = new StringBuilder(field);
                    buffer.insert(0, "\"");
                    buffer.append("\"");
                    result = buffer.toString().trim();
                } else {
                    result = field;
                }
                break;
            case UNIX_STYLE:
                // The only other formatting convention this class obeys is the UNIX one
                // where any occurrence of the field separator or EOL character will
                // be escaped by preceding it with a backslash.
                if (field.contains(separator)) {
                    field = field.replaceAll(separator, ("\\\\" + separator));
                }
                if (field.contains("\n")) {
                    field = field.replaceAll("\n", "\\\\\n");
                }
                result = field;
                break;
            default:
                throw new IllegalStateException();
        }
        return result;
    }

//	public static void main(String[] args) throws IOException, InvalidFormatException {
//		File file = new File("d:/temp/AggregatedState_AverageTime_day.xlsx");
//		final String s = new CSVConverter(file).convertToCVS(0);
//		int dummy = 42;
//	}

    public static enum Escaping {
        /**
         * Identifies that the CSV file should obey Excel's formatting conventions
         * with regard to escaping certain embedded characters - the field separator,
         * speech mark and end of line (EOL) character
         */
        EXCEL_STYLE,

        /**
         * Identifies that the CSV file should obey UNIX formatting conventions
         * with regard to escaping certain embedded characters - the field separator
         * and end of line (EOL) character
         */
        UNIX_STYLE
    }
}
