/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.spatial.prefix;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;

Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called "heatmap", hence the name.
@lucene.experimental
/** * Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called * "heatmap", hence the name. * * @lucene.experimental */
public class HeatmapFacetCounter { //TODO where should this code live? It could go to PrefixTreeFacetCounter, or maybe here in its own class is fine.
Maximum number of supported rows (or columns).
/** Maximum number of supported rows (or columns). */
public static final int MAX_ROWS_OR_COLUMNS = (int) Math.sqrt(ArrayUtil.MAX_ARRAY_LENGTH); static { Math.multiplyExact(MAX_ROWS_OR_COLUMNS, MAX_ROWS_OR_COLUMNS);//will throw if doesn't stay within integer }
Response structure
/** Response structure */
public static class Heatmap { public final int columns; public final int rows; public final int[] counts;//in order of 1st column (all rows) then 2nd column (all rows) etc. public final Rectangle region; public Heatmap(int columns, int rows, Rectangle region) { this.columns = columns; this.rows = rows; this.counts = new int[columns * rows]; this.region = region; } public int getCount(int x, int y) { return counts[x * rows + y]; } @Override public String toString() { return "Heatmap{" + columns + "x" + rows + " " + region + '}'; } }
Calculates spatial 2D facets (aggregated counts) in a grid, sometimes called a heatmap. Facet computation is implemented by navigating the underlying indexed terms efficiently. If you don't know exactly what facetLevel to go to for a given input box but you have some sense of how many cells there should be relative to the size of the shape, then consider using the logic that PrefixTreeStrategy uses when approximating what level to go to when indexing a shape given a distErrPct.
Params:
  • context – the IndexReader's context
  • topAcceptDocs – a Bits to limit counted docs. If null, live docs are counted.
  • inputShape – the shape to gather grid squares for; typically a Rectangle. The actual heatmap area will usually be larger since the cells on the edge that overlap are returned. We always return a rectangle of integers even if the inputShape isn't a rectangle -- the non-intersecting cells will all be 0. If null is given, the entire world is assumed.
  • facetLevel – the target depth (detail) of cells.
  • maxCells – the maximum number of cells to return. If the cells exceed this count, an
/** * Calculates spatial 2D facets (aggregated counts) in a grid, sometimes called a heatmap. * Facet computation is implemented by navigating the underlying indexed terms efficiently. If you don't know exactly * what facetLevel to go to for a given input box but you have some sense of how many cells there should be relative * to the size of the shape, then consider using the logic that {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy} * uses when approximating what level to go to when indexing a shape given a distErrPct. * * @param context the IndexReader's context * @param topAcceptDocs a Bits to limit counted docs. If null, live docs are counted. * @param inputShape the shape to gather grid squares for; typically a {@link Rectangle}. * The <em>actual</em> heatmap area will usually be larger since the cells on the edge that overlap * are returned. We always return a rectangle of integers even if the inputShape isn't a rectangle * -- the non-intersecting cells will all be 0. * If null is given, the entire world is assumed. * @param facetLevel the target depth (detail) of cells. * @param maxCells the maximum number of cells to return. If the cells exceed this count, an */
public static Heatmap calcFacets(PrefixTreeStrategy strategy, IndexReaderContext context, Bits topAcceptDocs, Shape inputShape, final int facetLevel, int maxCells) throws IOException { if (maxCells > (MAX_ROWS_OR_COLUMNS * MAX_ROWS_OR_COLUMNS)) { throw new IllegalArgumentException("maxCells (" + maxCells + ") should be <= " + MAX_ROWS_OR_COLUMNS); } if (inputShape == null) { inputShape = strategy.getSpatialContext().getWorldBounds(); } final Rectangle inputRect = inputShape.getBoundingBox(); //First get the rect of the cell at the bottom-left at depth facetLevel final SpatialPrefixTree grid = strategy.getGrid(); final SpatialContext ctx = grid.getSpatialContext(); final Point cornerPt = ctx.makePoint(inputRect.getMinX(), inputRect.getMinY()); final CellIterator cellIterator = grid.getTreeCellIterator(cornerPt, facetLevel); Cell cornerCell = null; while (cellIterator.hasNext()) { cornerCell = cellIterator.next(); } assert cornerCell != null && cornerCell.getLevel() == facetLevel : "Cell not at target level: " + cornerCell; final Rectangle cornerRect = (Rectangle) cornerCell.getShape(); assert cornerRect.hasArea(); //Now calculate the number of columns and rows necessary to cover the inputRect double heatMinX = cornerRect.getMinX();//note: we might change this below... final double cellWidth = cornerRect.getWidth(); final Rectangle worldRect = ctx.getWorldBounds(); final int columns = calcRowsOrCols(cellWidth, heatMinX, inputRect.getWidth(), inputRect.getMinX(), worldRect.getWidth()); final double heatMinY = cornerRect.getMinY(); final double cellHeight = cornerRect.getHeight(); final int rows = calcRowsOrCols(cellHeight, heatMinY, inputRect.getHeight(), inputRect.getMinY(), worldRect.getHeight()); assert rows > 0 && columns > 0; if (columns > MAX_ROWS_OR_COLUMNS || rows > MAX_ROWS_OR_COLUMNS || columns * rows > maxCells) { throw new IllegalArgumentException( "Too many cells (" + columns + " x " + rows + ") for level " + facetLevel + " shape " + inputRect); } //Create resulting heatmap bounding rectangle & Heatmap object. final double halfCellWidth = cellWidth / 2.0; // if X world-wraps, use world bounds' range if (columns * cellWidth + halfCellWidth > worldRect.getWidth()) { heatMinX = worldRect.getMinX(); } double heatMaxX = heatMinX + columns * cellWidth; if (Math.abs(heatMaxX - worldRect.getMaxX()) < halfCellWidth) {//numeric conditioning issue heatMaxX = worldRect.getMaxX(); } else if (heatMaxX > worldRect.getMaxX()) {//wraps dateline (won't happen if !geo) heatMaxX = heatMaxX - worldRect.getMaxX() + worldRect.getMinX(); } final double halfCellHeight = cellHeight / 2.0; double heatMaxY = heatMinY + rows * cellHeight; if (Math.abs(heatMaxY - worldRect.getMaxY()) < halfCellHeight) {//numeric conditioning issue heatMaxY = worldRect.getMaxY(); } final Heatmap heatmap = new Heatmap(columns, rows, ctx.makeRectangle(heatMinX, heatMaxX, heatMinY, heatMaxY)); if (topAcceptDocs instanceof Bits.MatchNoBits) { return heatmap; // short-circuit } //All ancestor cell counts (of facetLevel) will be captured during facet visiting and applied later. If the data is // just points then there won't be any ancestors. //Facet count of ancestors covering all of the heatmap: int[] allCellsAncestorCount = new int[1]; // single-element array so it can be accumulated in the inner class //All other ancestors: Map<Rectangle,Integer> ancestors = new HashMap<>(); //Now lets count some facets! PrefixTreeFacetCounter.compute(strategy, context, topAcceptDocs, inputShape, facetLevel, new PrefixTreeFacetCounter.FacetVisitor() { @Override public void visit(Cell cell, int count) { final double heatMinX = heatmap.region.getMinX(); final Rectangle rect = (Rectangle) cell.getShape(); if (cell.getLevel() == facetLevel) {//heatmap level; count it directly //convert to col & row int column; if (rect.getMinX() >= heatMinX) { column = (int) Math.round((rect.getMinX() - heatMinX) / cellWidth); } else { // due to dateline wrap column = (int) Math.round((rect.getMinX() + 360 - heatMinX) / cellWidth); } int row = (int) Math.round((rect.getMinY() - heatMinY) / cellHeight); //note: unfortunately, it's possible for us to visit adjacent cells to the heatmap (if the SpatialPrefixTree // allows adjacent cells to overlap on the seam), so we need to skip them if (column < 0 || column >= heatmap.columns || row < 0 || row >= heatmap.rows) { return; } // increment heatmap.counts[column * heatmap.rows + row] += count; } else if (rect.relate(heatmap.region) == SpatialRelation.CONTAINS) {//containing ancestor allCellsAncestorCount[0] += count; } else { // ancestor // note: not particularly efficient (possible put twice, and Integer wrapper); oh well Integer existingCount = ancestors.put(rect, count); if (existingCount != null) { ancestors.put(rect, count + existingCount); } } } }); //Update the heatmap counts with ancestor counts // Apply allCellsAncestorCount if (allCellsAncestorCount[0] > 0) { for (int i = 0; i < heatmap.counts.length; i++) { heatmap.counts[i] += allCellsAncestorCount[0]; } } // Apply ancestors // note: This approach isn't optimized for a ton of ancestor cells. We'll potentially increment the same cells // multiple times in separate passes if any ancestors overlap. IF this poses a problem, we could optimize it // with additional complication by keeping track of intervals in a sorted tree structure (possible TreeMap/Set) // and iterate them cleverly such that we just make one pass at this stage. int[] pair = new int[2];//output of intersectInterval for (Map.Entry<Rectangle, Integer> entry : ancestors.entrySet()) { Rectangle rect = entry.getKey(); // from a cell (thus doesn't cross DL) final int count = entry.getValue(); //note: we approach this in a way that eliminates int overflow/underflow (think huge cell, tiny heatmap) intersectInterval(heatMinY, heatMaxY, cellHeight, rows, rect.getMinY(), rect.getMaxY(), pair); final int startRow = pair[0]; final int endRow = pair[1]; if (!heatmap.region.getCrossesDateLine()) { intersectInterval(heatMinX, heatMaxX, cellWidth, columns, rect.getMinX(), rect.getMaxX(), pair); final int startCol = pair[0]; final int endCol = pair[1]; incrementRange(heatmap, startCol, endCol, startRow, endRow, count); } else { // note: the cell rect might intersect 2 disjoint parts of the heatmap, so we do the left & right separately final int leftColumns = (int) Math.round((180 - heatMinX) / cellWidth); final int rightColumns = heatmap.columns - leftColumns; //left half of dateline: if (rect.getMaxX() > heatMinX) { intersectInterval(heatMinX, 180, cellWidth, leftColumns, rect.getMinX(), rect.getMaxX(), pair); final int startCol = pair[0]; final int endCol = pair[1]; incrementRange(heatmap, startCol, endCol, startRow, endRow, count); } //right half of dateline if (rect.getMinX() < heatMaxX) { intersectInterval(-180, heatMaxX, cellWidth, rightColumns, rect.getMinX(), rect.getMaxX(), pair); final int startCol = pair[0] + leftColumns; final int endCol = pair[1] + leftColumns; incrementRange(heatmap, startCol, endCol, startRow, endRow, count); } } } return heatmap; } private static void intersectInterval(double heatMin, double heatMax, double heatCellLen, int numCells, double cellMin, double cellMax, int[] out) { assert heatMin < heatMax && cellMin < cellMax; //precondition: we know there's an intersection if (heatMin >= cellMin) { out[0] = 0; } else { out[0] = (int) Math.round((cellMin - heatMin) / heatCellLen); } if (heatMax <= cellMax) { out[1] = numCells - 1; } else { out[1] = (int) Math.round((cellMax - heatMin) / heatCellLen) - 1; } } private static void incrementRange(Heatmap heatmap, int startColumn, int endColumn, int startRow, int endRow, int count) { //startColumn & startRow are not necessarily within the heatmap range; likewise numRows/columns may overlap. if (startColumn < 0) { endColumn += startColumn; startColumn = 0; } endColumn = Math.min(heatmap.columns-1, endColumn); if (startRow < 0) { endRow += startRow; startRow = 0; } endRow = Math.min(heatmap.rows-1, endRow); if (startRow > endRow) { return;//short-circuit } for (int c = startColumn; c <= endColumn; c++) { int cBase = c * heatmap.rows; for (int r = startRow; r <= endRow; r++) { heatmap.counts[cBase + r] += count; } } }
Computes the number of intervals (rows or columns) to cover a range given the sizes.
/** Computes the number of intervals (rows or columns) to cover a range given the sizes. */
private static int calcRowsOrCols(double cellRange, double cellMin, double requestRange, double requestMin, double worldRange) { assert requestMin >= cellMin; //Idealistically this wouldn't be so complicated but we concern ourselves with overflow and edge cases double range = (requestRange + (requestMin - cellMin)); if (range == 0) { return 1; } final double intervals = Math.ceil(range / cellRange); if (intervals > Integer.MAX_VALUE) { return Integer.MAX_VALUE;//should result in an error soon (exceed thresholds) } // ensures we don't have more intervals than world bounds (possibly due to rounding/edge issue) final long intervalsMax = Math.round(worldRange / cellRange); if (intervalsMax > Integer.MAX_VALUE) { //just return intervals return (int) intervals; } return Math.min((int)intervalsMax, (int)intervals); } private HeatmapFacetCounter() { } }