/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.spatial.prefix;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;

An abstract SpatialStrategy based on SpatialPrefixTree. The two subclasses are RecursivePrefixTreeStrategy and TermQueryPrefixTreeStrategy. This strategy is most effective as a fast approximate spatial search filter.

Characteristics:

  • Can index any shape; however only RecursivePrefixTreeStrategy can effectively search non-point shapes.
  • Can index a variable number of shapes per field value. This strategy can do it via multiple calls to createIndexableFields(Shape) for a document or by giving it some sort of Shape aggregate (e.g. JTS WKT MultiPoint). The shape's boundary is approximated to a grid precision.
  • Can query with any shape. The shape's boundary is approximated to a grid precision.
  • Only SpatialOperation.Intersects is supported. If only points are indexed then this is effectively equivalent to IsWithin.
  • The strategy supports makeDistanceValueSource(Point, double) even for multi-valued data, so long as the indexed data is all points; the behavior is undefined otherwise. However, it will likely be removed in the future in lieu of using another strategy with a more scalable implementation. Use of this call is the only circumstance in which a cache is used. The cache is simple but as such it doesn't scale to large numbers of points nor is it real-time-search friendly.

Implementation:

The SpatialPrefixTree does most of the work, for example returning a list of terms representing grids of various sizes for a supplied shape. An important configuration item is setDistErrPct(double) which balances shape precision against scalability. See those javadocs.

@lucene.experimental
/** * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two * subclasses are {@link RecursivePrefixTreeStrategy} and {@link * TermQueryPrefixTreeStrategy}. This strategy is most effective as a fast * approximate spatial search filter. * <p> * <b>Characteristics:</b> * <br> * <ul> * <li>Can index any shape; however only {@link RecursivePrefixTreeStrategy} * can effectively search non-point shapes.</li> * <li>Can index a variable number of shapes per field value. This strategy * can do it via multiple calls to {@link #createIndexableFields(org.locationtech.spatial4j.shape.Shape)} * for a document or by giving it some sort of Shape aggregate (e.g. JTS * WKT MultiPoint). The shape's boundary is approximated to a grid precision. * </li> * <li>Can query with any shape. The shape's boundary is approximated to a grid * precision.</li> * <li>Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects} * is supported. If only points are indexed then this is effectively equivalent * to IsWithin.</li> * <li>The strategy supports {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point,double)} * even for multi-valued data, so long as the indexed data is all points; the * behavior is undefined otherwise. However, <em>it will likely be removed in * the future</em> in lieu of using another strategy with a more scalable * implementation. Use of this call is the only * circumstance in which a cache is used. The cache is simple but as such * it doesn't scale to large numbers of points nor is it real-time-search * friendly.</li> * </ul> * <p> * <b>Implementation:</b> * <p> * The {@link SpatialPrefixTree} does most of the work, for example returning * a list of terms representing grids of various sizes for a supplied shape. * An important * configuration item is {@link #setDistErrPct(double)} which balances * shape precision against scalability. See those javadocs. * * @lucene.experimental */
public abstract class PrefixTreeStrategy extends SpatialStrategy { protected final SpatialPrefixTree grid; private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<>(); protected int defaultFieldValuesArrayLen = 2; protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ] protected boolean pointsOnly = false;//if true, there are no leaves public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { super(grid.getSpatialContext(), fieldName); this.grid = grid; } public SpatialPrefixTree getGrid() { return grid; }
A memory hint used by SpatialStrategy.makeDistanceValueSource(Point) for how big the initial size of each Document's array should be. The default is 2. Set this to slightly more than the default expected number of points per document.
/** * A memory hint used by {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} * for how big the initial size of each Document's array should be. The * default is 2. Set this to slightly more than the default expected number * of points per document. */
public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) { this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen; } public double getDistErrPct() { return distErrPct; }
The default measure of shape precision affecting shapes at index and query times. Points don't use this as they are always indexed at the configured maximum precision (SpatialPrefixTree.getMaxLevels()); this applies to all other shapes. Specific shapes at index and query time can use something different than this default value. If you don't set a default then the default is SpatialArgs.DEFAULT_DISTERRPCT -- 2.5%.
See Also:
/** * The default measure of shape precision affecting shapes at index and query * times. Points don't use this as they are always indexed at the configured * maximum precision ({@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()}); * this applies to all other shapes. Specific shapes at index and query time * can use something different than this default value. If you don't set a * default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} -- * 2.5%. * * @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct() */
public void setDistErrPct(double distErrPct) { this.distErrPct = distErrPct; } public boolean isPointsOnly() { return pointsOnly; }
True if only indexed points shall be supported. There are no "leafs" in such a case, except those at maximum precision.
/** True if only indexed points shall be supported. There are no "leafs" in such a case, except those * at maximum precision. */
public void setPointsOnly(boolean pointsOnly) { this.pointsOnly = pointsOnly; } @Override public Field[] createIndexableFields(Shape shape) { double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx); return createIndexableFields(shape, distErr); } /** * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a * {@link org.apache.lucene.analysis.TokenStream}. */ public Field[] createIndexableFields(Shape shape, double distErr) { int detailLevel = grid.getLevelForDistance(distErr); return createIndexableFields(shape, detailLevel); } public Field[] createIndexableFields(Shape shape, int detailLevel) { //TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override tokenStream() Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null); CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); cellToBytesRefIterator.reset(cells); BytesRefIteratorTokenStream tokenStream = new BytesRefIteratorTokenStream(); tokenStream.setBytesRefIterator(cellToBytesRefIterator); Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); return new Field[]{field}; } public class ShapeTokenStream extends BytesRefIteratorTokenStream { public void setShape(Shape shape) { double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx); int detailLevel = grid.getLevelForDistance(distErr); Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null); CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); cellToBytesRefIterator.reset(cells); setBytesRefIterator(cellToBytesRefIterator); } } public ShapeTokenStream tokenStream() { return new ShapeTokenStream(); } protected CellToBytesRefIterator newCellToBytesRefIterator() { //subclasses could return one that never emits leaves, or does both, or who knows. return new CellToBytesRefIterator(); } protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) { if (pointsOnly && !isPointShape(shape)) { throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing"); } return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator } /* Indexed, tokenized, not stored. */ public static final FieldType FIELD_TYPE = new FieldType(); static { FIELD_TYPE.setTokenized(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); FIELD_TYPE.freeze(); } @Override public DoubleValuesSource makeDistanceValueSource(Point queryPoint, double multiplier) { PointPrefixTreeFieldCacheProvider p = provider.get( getFieldName() ); if( p == null ) { synchronized (this) {//double checked locking idiom is okay since provider is threadsafe p = provider.get( getFieldName() ); if (p == null) { p = new PointPrefixTreeFieldCacheProvider(grid, getFieldName(), defaultFieldValuesArrayLen); provider.put(getFieldName(),p); } } } return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint, multiplier); }
Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called "heatmap".
See Also:
  • calcFacets.calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int, int)
/** * Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called * "heatmap". * * @see HeatmapFacetCounter#calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int, int) */
public HeatmapFacetCounter.Heatmap calcFacets(IndexReaderContext context, Bits topAcceptDocs, Shape inputShape, final int facetLevel, int maxCells) throws IOException { return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells); }
Returns true if the shape is a Point. For custom spatial contexts, it may make sense to have certain other shapes return true.
@lucene.experimental
/** * Returns true if the {@code shape} is a {@link Point}. For custom spatial contexts, it may make sense to * have certain other shapes return true. * @lucene.experimental */
protected boolean isPointShape(Shape shape) { return shape instanceof Point; } }