/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package org.apache.poi.xssf.streaming;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.SheetUtil;
import org.apache.poi.util.Internal;

Tracks best fit column width for rows of an SXSSFSheet, to be able to correctly calculate auto-sized column widths even if some rows are already flushed to disk. This is an auxiliary data structure that uses a TreeMap containing one entry per tracked column, where the key is the column index and the value is a pair of doubles. This data structure's memory footprint is linear with the number of *tracked* columns and invariant with the number of rows or columns in the sheet.
Since:3.14beta1
/** * Tracks best fit column width for rows of an {@link SXSSFSheet}, * to be able to correctly calculate auto-sized column widths even * if some rows are already flushed to disk. * This is an auxiliary data structure that uses a TreeMap containing * one entry per tracked column, where the key is the column index and * the value is a pair of doubles. This data structure's memory footprint * is linear with the number of *tracked* columns and invariant with * the number of rows or columns in the sheet. * @since 3.14beta1 */
@Internal /*package*/ class AutoSizeColumnTracker { private final int defaultCharWidth; private final DataFormatter dataFormatter = new DataFormatter(); // map of tracked columns, with values containing the best-fit width for the column // Using a HashMap instead of a TreeMap because insertion (trackColumn), removal (untrackColumn), and membership (everything) // will be called more frequently than getTrackedColumns(). The O(1) cost of insertion, removal, and membership operations // outweigh the infrequent O(n*log n) cost of sorting getTrackedColumns(). // Memory consumption for a HashMap and TreeMap is about the same private final Map<Integer, ColumnWidthPair> maxColumnWidths = new HashMap<>(); // untrackedColumns stores columns have been explicitly untracked so they aren't implicitly re-tracked by trackAllColumns // Using a HashSet instead of a TreeSet because we don't care about order. private final Set<Integer> untrackedColumns = new HashSet<>(); private boolean trackAllColumns;
Tuple to store the column widths considering and not considering merged cells If more permutations are needed, it may be prudent to require the user to specify how they intend to auto-size a column when they track the column, so calculations are limited to the desired intentions. Unless this proves to be a performance problem, it's probably better to let the user defer how they want to auto-size to SXSSFSheet.autoSizeColumn, rather than twice (via SXSSFSheet.trackColumn(int column, boolean useMergedCells) and again at SXSFSheet.autoSizeColumn(int column, boolean useMergedCells))
Since:3.14beta1
/** * Tuple to store the column widths considering and not considering merged cells * If more permutations are needed, it may be prudent to require the user to specify * how they intend to auto-size a column when they track the column, so calculations * are limited to the desired intentions. Unless this proves to be a performance problem, * it's probably better to let the user defer how they want to auto-size to SXSSFSheet.autoSizeColumn, * rather than twice (via SXSSFSheet.trackColumn(int column, boolean useMergedCells) and again at * SXSFSheet.autoSizeColumn(int column, boolean useMergedCells)) * @since 3.14beta1 */
private static class ColumnWidthPair { private double withSkipMergedCells; private double withUseMergedCells; public ColumnWidthPair() { this(-1.0, -1.0); } public ColumnWidthPair(final double columnWidthSkipMergedCells, final double columnWidthUseMergedCells) { withSkipMergedCells = columnWidthSkipMergedCells; withUseMergedCells = columnWidthUseMergedCells; }
Gets the current best-fit column width for the provided settings
Params:
  • useMergedCells – true if merged cells are considered into the best-fit column width calculation
Returns:best fit column width, measured in default character widths.
/** * Gets the current best-fit column width for the provided settings * * @param useMergedCells true if merged cells are considered into the best-fit column width calculation * @return best fit column width, measured in default character widths. */
public double getMaxColumnWidth(final boolean useMergedCells) { return useMergedCells ? withUseMergedCells : withSkipMergedCells; }
Sets the best-fit column width to the maximum of the current width and the provided width
Params:
  • unmergedWidth – the best-fit column width calculated with useMergedCells=False
  • mergedWidth – the best-fit column width calculated with useMergedCells=True
/** * Sets the best-fit column width to the maximum of the current width and the provided width * * @param unmergedWidth the best-fit column width calculated with useMergedCells=False * @param mergedWidth the best-fit column width calculated with useMergedCells=True */
public void setMaxColumnWidths(double unmergedWidth, double mergedWidth) { withUseMergedCells = Math.max(withUseMergedCells, mergedWidth); withSkipMergedCells = Math.max(withUseMergedCells, unmergedWidth); } }
AutoSizeColumnTracker constructor. Holds no reference to sheet
Params:
  • sheet – the sheet associated with this auto-size column tracker
Since:3.14beta1
/** * AutoSizeColumnTracker constructor. Holds no reference to <code>sheet</code> * * @param sheet the sheet associated with this auto-size column tracker * @since 3.14beta1 */
public AutoSizeColumnTracker(final Sheet sheet) { // If sheet needs to be saved, use a java.lang.ref.WeakReference to avoid garbage collector gridlock. defaultCharWidth = SheetUtil.getDefaultCharWidth(sheet.getWorkbook()); }
Get the currently tracked columns, naturally ordered. Note if all columns are tracked, this will only return the columns that have been explicitly or implicitly tracked, which is probably only columns containing 1 or more non-blank values
Returns:a set of the indices of all tracked columns
Since:3.14beta1
/** * Get the currently tracked columns, naturally ordered. * Note if all columns are tracked, this will only return the columns that have been explicitly or implicitly tracked, * which is probably only columns containing 1 or more non-blank values * * @return a set of the indices of all tracked columns * @since 3.14beta1 */
public SortedSet<Integer> getTrackedColumns() { SortedSet<Integer> sorted = new TreeSet<>(maxColumnWidths.keySet()); return Collections.unmodifiableSortedSet(sorted); }
Returns true if column is currently tracked for auto-sizing.
Params:
  • column – the index of the column to check
Returns:true if column is tracked
Since:3.14beta1
/** * Returns true if column is currently tracked for auto-sizing. * * @param column the index of the column to check * @return true if column is tracked * @since 3.14beta1 */
public boolean isColumnTracked(int column) { return trackAllColumns || maxColumnWidths.containsKey(column); }
Returns true if all columns are implicitly tracked.
Returns:true if all columns are implicitly tracked
Since:3.14beta1
/** * Returns true if all columns are implicitly tracked. * * @return true if all columns are implicitly tracked * @since 3.14beta1 */
public boolean isAllColumnsTracked() { return trackAllColumns; }
Tracks all non-blank columns Allows columns that have been explicitly untracked to be tracked
Since:3.14beta1
/** * Tracks all non-blank columns * Allows columns that have been explicitly untracked to be tracked * @since 3.14beta1 */
public void trackAllColumns() { trackAllColumns = true; untrackedColumns.clear(); }
Untrack all columns that were previously tracked for auto-sizing. All best-fit column widths are forgotten.
Since:3.14beta1
/** * Untrack all columns that were previously tracked for auto-sizing. * All best-fit column widths are forgotten. * @since 3.14beta1 */
public void untrackAllColumns() { trackAllColumns = false; maxColumnWidths.clear(); untrackedColumns.clear(); }
Marks multiple columns for inclusion in auto-size column tracking. Note this has undefined behavior if columns are tracked after one or more rows are written to the sheet. Any column in columns that are already tracked are ignored by this call.
Params:
  • columns – the indices of the columns to track
Since:3.14beta1
/** * Marks multiple columns for inclusion in auto-size column tracking. * Note this has undefined behavior if columns are tracked after one or more rows are written to the sheet. * Any column in <code>columns</code> that are already tracked are ignored by this call. * * @param columns the indices of the columns to track * @since 3.14beta1 */
public void trackColumns(Collection<Integer> columns) { for (final int column : columns) { trackColumn(column); } }
Marks a column for inclusion in auto-size column tracking. Note this has undefined behavior if a column is tracked after one or more rows are written to the sheet. If column is already tracked, this call does nothing.
Params:
  • column – the index of the column to track for auto-sizing
Returns:if column is already tracked, the call does nothing and returns false
Since:3.14beta1
/** * Marks a column for inclusion in auto-size column tracking. * Note this has undefined behavior if a column is tracked after one or more rows are written to the sheet. * If <code>column</code> is already tracked, this call does nothing. * * @param column the index of the column to track for auto-sizing * @return if column is already tracked, the call does nothing and returns false * @since 3.14beta1 */
public boolean trackColumn(int column) { untrackedColumns.remove(column); if (!maxColumnWidths.containsKey(column)) { maxColumnWidths.put(column, new ColumnWidthPair()); return true; } return false; }
Implicitly track a column if it has not been explicitly untracked If it has been explicitly untracked, this call does nothing and returns false. Otherwise return true
Params:
  • column – the column to implicitly track
Returns:false if column has been explicitly untracked, otherwise return true
/** * Implicitly track a column if it has not been explicitly untracked * If it has been explicitly untracked, this call does nothing and returns false. * Otherwise return true * * @param column the column to implicitly track * @return false if column has been explicitly untracked, otherwise return true */
private boolean implicitlyTrackColumn(int column) { if (!untrackedColumns.contains(column)) { trackColumn(column); return true; } return false; }
Removes columns that were previously marked for inclusion in auto-size column tracking. When a column is untracked, the best-fit width is forgotten. Any column in columns that is not tracked will be ignored by this call.
Params:
  • columns – the indices of the columns to track for auto-sizing
Returns:true if one or more columns were untracked as a result of this call
Since:3.14beta1
/** * Removes columns that were previously marked for inclusion in auto-size column tracking. * When a column is untracked, the best-fit width is forgotten. * Any column in <code>columns</code> that is not tracked will be ignored by this call. * * @param columns the indices of the columns to track for auto-sizing * @return true if one or more columns were untracked as a result of this call * @since 3.14beta1 */
public boolean untrackColumns(Collection<Integer> columns) { untrackedColumns.addAll(columns); return maxColumnWidths.keySet().removeAll(columns); }
Removes a column that was previously marked for inclusion in auto-size column tracking. When a column is untracked, the best-fit width is forgotten. If column is not tracked, it will be ignored by this call.
Params:
  • column – the index of the column to track for auto-sizing
Returns:true if column was tracked prior this call, false if no action was taken
Since:3.14beta1
/** * Removes a column that was previously marked for inclusion in auto-size column tracking. * When a column is untracked, the best-fit width is forgotten. * If <code>column</code> is not tracked, it will be ignored by this call. * * @param column the index of the column to track for auto-sizing * @return true if column was tracked prior this call, false if no action was taken * @since 3.14beta1 */
public boolean untrackColumn(int column) { untrackedColumns.add(column); return maxColumnWidths.keySet().remove(column); }
Get the best-fit width of a tracked column
Params:
  • column – the index of the column to get the current best-fit width of
  • useMergedCells – true if merged cells should be considered when computing the best-fit width
Throws:
Returns:best-fit column width, measured in number of characters
Since:3.14beta1
/** * Get the best-fit width of a tracked column * * @param column the index of the column to get the current best-fit width of * @param useMergedCells true if merged cells should be considered when computing the best-fit width * @return best-fit column width, measured in number of characters * @throws IllegalStateException if column is not tracked and trackAllColumns is false * @since 3.14beta1 */
public int getBestFitColumnWidth(int column, boolean useMergedCells) { if (!maxColumnWidths.containsKey(column)) { // if column is not tracked, implicitly track the column if trackAllColumns is True and column has not been explicitly untracked if (trackAllColumns) { if (!implicitlyTrackColumn(column)) { final Throwable reason = new IllegalStateException( "Column was explicitly untracked after trackAllColumns() was called."); throw new IllegalStateException( "Cannot get best fit column width on explicitly untracked column " + column + ". " + "Either explicitly track the column or track all columns.", reason); } } else { final Throwable reason = new IllegalStateException( "Column was never explicitly tracked and isAllColumnsTracked() is false " + "(trackAllColumns() was never called or untrackAllColumns() was called after trackAllColumns() was called)."); throw new IllegalStateException( "Cannot get best fit column width on untracked column " + column + ". " + "Either explicitly track the column or track all columns.", reason); } } final double width = maxColumnWidths.get(column).getMaxColumnWidth(useMergedCells); return (int) (256*width); }
Calculate the best fit width for each tracked column in row
Params:
  • row – the row to get the cells
Since:3.14beta1
/** * Calculate the best fit width for each tracked column in row * * @param row the row to get the cells * @since 3.14beta1 */
public void updateColumnWidths(Row row) { // track new columns implicitlyTrackColumnsInRow(row); // update the widths // for-loop over the shorter of the number of cells in the row and the number of tracked columns // these two for-loops should do the same thing if (maxColumnWidths.size() < row.getPhysicalNumberOfCells()) { // loop over the tracked columns, because there are fewer tracked columns than cells in this row for (final Entry<Integer, ColumnWidthPair> e : maxColumnWidths.entrySet()) { final int column = e.getKey(); final Cell cell = row.getCell(column); //is MissingCellPolicy=Row.RETURN_NULL_AND_BLANK needed? // FIXME: if cell belongs to a merged region, some of the merged region may have fallen outside of the random access window // In this case, getting the column width may result in an error. Need to gracefully handle this. // FIXME: Most cells are not merged, so calling getCellWidth twice re-computes the same value twice. // Need to rewrite this to avoid unnecessary computation if this proves to be a performance bottleneck. if (cell != null) { final ColumnWidthPair pair = e.getValue(); updateColumnWidth(cell, pair); } } } else { // loop over the cells in this row, because there are fewer cells in this row than tracked columns for (final Cell cell : row) { final int column = cell.getColumnIndex(); // FIXME: if cell belongs to a merged region, some of the merged region may have fallen outside of the random access window // In this case, getting the column width may result in an error. Need to gracefully handle this. // FIXME: Most cells are not merged, so calling getCellWidth twice re-computes the same value twice. // Need to rewrite this to avoid unnecessary computation if this proves to be a performance bottleneck. if (maxColumnWidths.containsKey(column)) { final ColumnWidthPair pair = maxColumnWidths.get(column); updateColumnWidth(cell, pair); } } } }
Helper for updateColumnWidths(Row). Implicitly track the columns corresponding to the cells in row. If all columns in the row are already tracked, this call does nothing. Explicitly untracked columns will not be tracked.
Params:
  • row – the row containing cells to implicitly track the columns
Since:3.14beta1
/** * Helper for {@link #updateColumnWidths(Row)}. * Implicitly track the columns corresponding to the cells in row. * If all columns in the row are already tracked, this call does nothing. * Explicitly untracked columns will not be tracked. * * @param row the row containing cells to implicitly track the columns * @since 3.14beta1 */
private void implicitlyTrackColumnsInRow(Row row) { // track new columns if (trackAllColumns) { // if column is not tracked, implicitly track the column if trackAllColumns is True and column has not been explicitly untracked for (final Cell cell : row) { final int column = cell.getColumnIndex(); implicitlyTrackColumn(column); } } }
Params:
  • cell – the cell to compute the best fit width on
  • pair – the column width pair to update
Since:3.14beta1
/** * Helper for {@link #updateColumnWidths(Row)}. * * @param cell the cell to compute the best fit width on * @param pair the column width pair to update * @since 3.14beta1 */
private void updateColumnWidth(final Cell cell, final ColumnWidthPair pair) { final double unmergedWidth = SheetUtil.getCellWidth(cell, defaultCharWidth, dataFormatter, false); final double mergedWidth = SheetUtil.getCellWidth(cell, defaultCharWidth, dataFormatter, true); pair.setMaxColumnWidths(unmergedWidth, mergedWidth); } }