package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.util.Iterator;
import java.util.Locale;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Comment;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.HeaderFooter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
public class extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
public static final XSSFRelation[] = new XSSFRelation[] {
XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
XSSFRelation.MACROS_WORKBOOK
};
private Locale ;
private XSSFWorkbook ;
private boolean = true;
private boolean ;
private boolean ;
private boolean = true;
private boolean = true;
public (OPCPackage container) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(container));
}
public (XSSFWorkbook workbook) {
super(workbook);
this.workbook = workbook;
}
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Use:");
System.err.println(" XSSFExcelExtractor <filename.xlsx>");
System.exit(1);
}
try (OPCPackage pkg = OPCPackage.create(args[0]);
POIXMLTextExtractor extractor = new XSSFExcelExtractor(pkg)) {
System.out.println(extractor.getText());
}
}
public void (boolean includeSheetNames) {
this.includeSheetNames = includeSheetNames;
}
public void (boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
public void (boolean includeCellComments) {
this.includeCellComments = includeCellComments;
}
public void (boolean includeHeadersFooters) {
this.includeHeadersFooters = includeHeadersFooters;
}
public void (boolean includeTextBoxes){
this.includeTextBoxes = includeTextBoxes;
}
public void (Locale locale) {
this.locale = locale;
}
public String () {
DataFormatter formatter;
if(locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
StringBuilder text = new StringBuilder(64);
for(Sheet sh : workbook) {
XSSFSheet sheet = (XSSFSheet) sh;
if(includeSheetNames) {
text.append(sheet.getSheetName()).append("\n");
}
if(includeHeadersFooters) {
text.append(
extractHeaderFooter(sheet.getFirstHeader())
);
text.append(
extractHeaderFooter(sheet.getOddHeader())
);
text.append(
extractHeaderFooter(sheet.getEvenHeader())
);
}
for (Object rawR : sheet) {
Row row = (Row)rawR;
for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
Cell cell = ri.next();
if(cell.getCellType() == CellType.FORMULA) {
if (formulasNotResults) {
String contents = cell.getCellFormula();
checkMaxTextSize(text, contents);
text.append(contents);
} else {
if (cell.getCachedFormulaResultType() == CellType.STRING) {
handleStringCell(text, cell);
} else {
handleNonStringCell(text, cell, formatter);
}
}
} else if(cell.getCellType() == CellType.STRING) {
handleStringCell(text, cell);
} else {
handleNonStringCell(text, cell, formatter);
}
Comment comment = cell.getCellComment();
if(includeCellComments && comment != null) {
String commentText = comment.getString().getString().replace('\n', ' ');
checkMaxTextSize(text, commentText);
text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
}
if(ri.hasNext()) {
text.append("\t");
}
}
text.append("\n");
}
if (includeTextBoxes){
XSSFDrawing drawing = sheet.getDrawingPatriarch();
if (drawing != null) {
for (XSSFShape shape : drawing.getShapes()){
if (shape instanceof XSSFSimpleShape){
String boxText = ((XSSFSimpleShape)shape).getText();
if (boxText.length() > 0){
text.append(boxText);
text.append('\n');
}
}
}
}
}
if(includeHeadersFooters) {
text.append(
extractHeaderFooter(sheet.getFirstFooter())
);
text.append(
extractHeaderFooter(sheet.getOddFooter())
);
text.append(
extractHeaderFooter(sheet.getEvenFooter())
);
}
}
return text.toString();
}
private void handleStringCell(StringBuilder text, Cell cell) {
String contents = cell.getRichStringCellValue().getString();
checkMaxTextSize(text, contents);
text.append(contents);
}
private void handleNonStringCell(StringBuilder text, Cell cell, DataFormatter formatter) {
CellType type = cell.getCellType();
if (type == CellType.FORMULA) {
type = cell.getCachedFormulaResultType();
}
if (type == CellType.NUMERIC) {
CellStyle cs = cell.getCellStyle();
if (cs != null && cs.getDataFormatString() != null) {
String contents = formatter.formatRawCellContents(
cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString());
checkMaxTextSize(text, contents);
text.append(contents);
return;
}
}
String contents = ((XSSFCell)cell).getRawValue();
if (contents != null) {
checkMaxTextSize(text, contents);
text.append(contents);
}
}
private String (HeaderFooter hf) {
return ExcelExtractor._extractHeaderFooter(hf);
}
}