package org.apache.poi.hssf.extractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.Locale;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFComment;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.usermodel.HeaderFooter;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
public class extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
private final HSSFWorkbook ;
private final HSSFDataFormatter ;
private boolean = true;
private boolean = true;
private boolean ;
private boolean ;
private boolean = true;
public (HSSFWorkbook wb) {
super(wb);
_wb = wb;
_formatter = new HSSFDataFormatter();
}
public (POIFSFileSystem fs) throws IOException {
this(fs.getRoot());
}
public (DirectoryNode dir) throws IOException {
this(new HSSFWorkbook(dir, true));
}
private static final class CommandParseException extends Exception {
public CommandParseException(String msg) {
super(msg);
}
}
private static final class CommandArgs {
private final boolean _requestHelp;
private final File _inputFile;
private final boolean _showSheetNames;
private final boolean _evaluateFormulas;
private final boolean _showCellComments;
private final boolean _showBlankCells;
private final boolean _headersFooters;
public CommandArgs(String[] args) throws CommandParseException {
int nArgs = args.length;
File inputFile = null;
boolean requestHelp = false;
boolean showSheetNames = true;
boolean evaluateFormulas = true;
boolean showCellComments = false;
boolean showBlankCells = false;
boolean headersFooters = true;
for (int i=0; i<nArgs; i++) {
String arg = args[i];
if ("-help".equalsIgnoreCase(arg)) {
requestHelp = true;
break;
}
if ("-i".equals(arg)) {
if (++i >= nArgs) {
throw new CommandParseException("Expected filename after '-i'");
}
arg = args[i];
if (inputFile != null) {
throw new CommandParseException("Only one input file can be supplied");
}
inputFile = new File(arg);
if (!inputFile.exists()) {
throw new CommandParseException("Specified input file '" + arg + "' does not exist");
}
if (inputFile.isDirectory()) {
throw new CommandParseException("Specified input file '" + arg + "' is a directory");
}
continue;
}
if ("--show-sheet-names".equals(arg)) {
showSheetNames = parseBoolArg(args, ++i);
continue;
}
if ("--evaluate-formulas".equals(arg)) {
evaluateFormulas = parseBoolArg(args, ++i);
continue;
}
if ("--show-comments".equals(arg)) {
showCellComments = parseBoolArg(args, ++i);
continue;
}
if ("--show-blanks".equals(arg)) {
showBlankCells = parseBoolArg(args, ++i);
continue;
}
if ("--headers-footers".equals(arg)) {
headersFooters = parseBoolArg(args, ++i);
continue;
}
throw new CommandParseException("Invalid argument '" + arg + "'");
}
_requestHelp = requestHelp;
_inputFile = inputFile;
_showSheetNames = showSheetNames;
_evaluateFormulas = evaluateFormulas;
_showCellComments = showCellComments;
_showBlankCells = showBlankCells;
_headersFooters = headersFooters;
}
private static boolean parseBoolArg(String[] args, int i) throws CommandParseException {
if (i >= args.length) {
throw new CommandParseException("Expected value after '" + args[i-1] + "'");
}
String value = args[i].toUpperCase(Locale.ROOT);
if ("Y".equals(value) || "YES".equals(value) || "ON".equals(value) || "TRUE".equals(value)) {
return true;
}
if ("N".equals(value) || "NO".equals(value) || "OFF".equals(value) || "FALSE".equals(value)) {
return false;
}
throw new CommandParseException("Invalid value '" + args[i] + "' for '" + args[i-1] + "'. Expected 'Y' or 'N'");
}
public boolean isRequestHelp() {
return _requestHelp;
}
public File getInputFile() {
return _inputFile;
}
public boolean shouldShowSheetNames() {
return _showSheetNames;
}
public boolean shouldEvaluateFormulas() {
return _evaluateFormulas;
}
public boolean shouldShowCellComments() {
return _showCellComments;
}
public boolean shouldShowBlankCells() {
return _showBlankCells;
}
public boolean shouldIncludeHeadersFooters() {
return _headersFooters;
}
}
private static void (PrintStream ps) {
ps.println("Use:");
ps.println(" " + ExcelExtractor.class.getName() + " [<flag> <value> [<flag> <value> [...]]] [-i <filename.xls>]");
ps.println(" -i <filename.xls> specifies input file (default is to use stdin)");
ps.println(" Flags can be set on or off by using the values 'Y' or 'N'.");
ps.println(" Following are available flags and their default values:");
ps.println(" --show-sheet-names Y");
ps.println(" --evaluate-formulas Y");
ps.println(" --show-comments N");
ps.println(" --show-blanks Y");
ps.println(" --headers-footers Y");
}
public static void main(String[] args) throws IOException {
CommandArgs cmdArgs;
try {
cmdArgs = new CommandArgs(args);
} catch (CommandParseException e) {
System.err.println(e.getMessage());
printUsageMessage(System.err);
System.exit(1);
return;
}
if (cmdArgs.isRequestHelp()) {
printUsageMessage(System.out);
return;
}
InputStream is;
if(cmdArgs.getInputFile() == null) {
is = System.in;
} else {
is = new FileInputStream(cmdArgs.getInputFile());
}
HSSFWorkbook wb = new HSSFWorkbook(is);
is.close();
ExcelExtractor extractor = new ExcelExtractor(wb);
extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames());
extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas());
extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments());
extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters());
System.out.println(extractor.getText());
extractor.close();
wb.close();
}
@Override
public void (boolean includeSheetNames) {
_includeSheetNames = includeSheetNames;
}
@Override
public void (boolean formulasNotResults) {
_shouldEvaluateFormulas = !formulasNotResults;
}
@Override
public void (boolean includeCellComments) {
_includeCellComments = includeCellComments;
}
public void (boolean includeBlankCells) {
_includeBlankCells = includeBlankCells;
}
@Override
public void (boolean includeHeadersFooters) {
_includeHeadersFooters = includeHeadersFooters;
}
@Override
public String () {
StringBuilder text = new StringBuilder();
_wb.setMissingCellPolicy(MissingCellPolicy.RETURN_BLANK_AS_NULL);
for(int i=0;i<_wb.getNumberOfSheets();i++) {
HSSFSheet sheet = _wb.getSheetAt(i);
if(sheet == null) { continue; }
if(_includeSheetNames) {
String name = _wb.getSheetName(i);
if(name != null) {
text.append(name);
text.append("\n");
}
}
if(_includeHeadersFooters) {
text.append(_extractHeaderFooter(sheet.getHeader()));
}
int firstRow = sheet.getFirstRowNum();
int lastRow = sheet.getLastRowNum();
for(int j=firstRow;j<=lastRow;j++) {
HSSFRow row = sheet.getRow(j);
if(row == null) { continue; }
int firstCell = row.getFirstCellNum();
int lastCell = row.getLastCellNum();
if(_includeBlankCells) {
firstCell = 0;
}
for(int k=firstCell;k<lastCell;k++) {
HSSFCell cell = row.getCell(k);
boolean outputContents = true;
if(cell == null) {
outputContents = _includeBlankCells;
} else {
switch(cell.getCellType()) {
case STRING:
text.append(cell.getRichStringCellValue().getString());
break;
case NUMERIC:
text.append(_formatter.formatCellValue(cell));
break;
case BOOLEAN:
text.append(cell.getBooleanCellValue());
break;
case ERROR:
text.append(ErrorEval.getText(cell.getErrorCellValue()));
break;
case FORMULA:
if(!_shouldEvaluateFormulas) {
text.append(cell.getCellFormula());
} else {
switch(cell.getCachedFormulaResultType()) {
case STRING:
HSSFRichTextString str = cell.getRichStringCellValue();
if(str != null && str.length() > 0) {
text.append(str);
}
break;
case NUMERIC:
HSSFCellStyle style = cell.getCellStyle();
double nVal = cell.getNumericCellValue();
short df = style.getDataFormat();
String dfs = style.getDataFormatString();
text.append(_formatter.formatRawCellContents(nVal, df, dfs));
break;
case BOOLEAN:
text.append(cell.getBooleanCellValue());
break;
case ERROR:
text.append(ErrorEval.getText(cell.getErrorCellValue()));
break;
default:
throw new IllegalStateException("Unexpected cell cached formula result type: " + cell.getCachedFormulaResultType());
}
}
break;
default:
throw new RuntimeException("Unexpected cell type (" + cell.getCellType() + ")");
}
HSSFComment comment = cell.getCellComment();
if(_includeCellComments && comment != null) {
String commentText = comment.getString().getString().replace('\n', ' ');
text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
}
}
if(outputContents && k < (lastCell-1)) {
text.append("\t");
}
}
text.append("\n");
}
if(_includeHeadersFooters) {
text.append(_extractHeaderFooter(sheet.getFooter()));
}
}
return text.toString();
}
public static String (HeaderFooter hf) {
StringBuilder text = new StringBuilder();
if(hf.getLeft() != null) {
text.append(hf.getLeft());
}
if(hf.getCenter() != null) {
if(text.length() > 0)
text.append("\t");
text.append(hf.getCenter());
}
if(hf.getRight() != null) {
if(text.length() > 0)
text.append("\t");
text.append(hf.getRight());
}
if(text.length() > 0)
text.append("\n");
return text.toString();
}
}