org.apache.lucene/lucene-sandbox/8.2.0 : org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java

IndexSortSortedNumericDocValuesRangeQuery
http://lucene.apache.org/lucene-parent/lucene-sandbox: Lucene Sandbox (The Apache Software Foundation)
Apache 2
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Objects;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;

A range query that can take advantage of the fact that the index is sorted to speed up
execution. If the index is sorted on the same field as the query, it performs binary
search on the field's numeric doc values to find the documents at the lower and upper
ends of the range.
This optimized execution strategy is only used if the following conditions hold:

   The index is sorted, and its primary sort is on the same field as the query.
  
 The query field has either SortedNumericDocValues or NumericDocValues. 
 The segments must have at most one field value per document (otherwise we cannot easily
determine the matching document IDs through a binary search).
 If any of these conditions isn't met, the search is delegated to fallbackQuery. This fallback must be an equivalent range query -- it should produce the same documents and give constant scores. As an example, an IndexSortSortedNumericDocValuesRangeQuery might be constructed as follows:   String field = "field";
  long lowerValue = 0, long upperValue = 10;
  Query fallbackQuery = LongPoint.newRangeQuery(field, lowerValue, upperValue);
  Query rangeQuery = new IndexSortSortedNumericDocValuesRangeQuery(
      field, lowerValue, upperValue, fallbackQuery);

@lucene.experimental /**
 * A range query that can take advantage of the fact that the index is sorted to speed up
 * execution. If the index is sorted on the same field as the query, it performs binary
 * search on the field's numeric doc values to find the documents at the lower and upper
 * ends of the range.
 *
 * This optimized execution strategy is only used if the following conditions hold:
 * <ul>
 *   <li> The index is sorted, and its primary sort is on the same field as the query.
 *   <li> The query field has either {@link SortedNumericDocValues} or {@link NumericDocValues}.
 *   <li> The segments must have at most one field value per document (otherwise we cannot easily
 * determine the matching document IDs through a binary search).
 * </ul>
 *
 * If any of these conditions isn't met, the search is delegated to {@code fallbackQuery}.
 *
 * This fallback must be an equivalent range query -- it should produce the same documents and give
 * constant scores. As an example, an {@link IndexSortSortedNumericDocValuesRangeQuery} might be
 * constructed as follows:
 * <pre class="prettyprint">
 *   String field = "field";
 *   long lowerValue = 0, long upperValue = 10;
 *   Query fallbackQuery = LongPoint.newRangeQuery(field, lowerValue, upperValue);
 *   Query rangeQuery = new IndexSortSortedNumericDocValuesRangeQuery(
 *       field, lowerValue, upperValue, fallbackQuery);
 * </pre>
 *
 * @lucene.experimental
 */
public class IndexSortSortedNumericDocValuesRangeQuery extends Query {

  private final String field;
  private final long lowerValue;
  private final long upperValue;
  private final Query fallbackQuery;

  Creates a new IndexSortSortedNumericDocValuesRangeQuery. 
Params: field – The field name.
lowerValue – The lower end of the range (inclusive).
upperValue – The upper end of the range (exclusive).
fallbackQuery – A query to fall back to if the optimization cannot be applied./**
   * Creates a new {@link IndexSortSortedNumericDocValuesRangeQuery}.
   *
   * @param field The field name.
   * @param lowerValue The lower end of the range (inclusive).
   * @param upperValue The upper end of the range (exclusive).
   * @param fallbackQuery A query to fall back to if the optimization cannot be applied.
      */
  public IndexSortSortedNumericDocValuesRangeQuery(String field,
                                                   long lowerValue,
                                                   long upperValue,
                                                   Query fallbackQuery) {
    this.field = Objects.requireNonNull(field);
    this.lowerValue = lowerValue;
    this.upperValue = upperValue;
    this.fallbackQuery = fallbackQuery;
  }

  public Query getFallbackQuery() {
    return fallbackQuery;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    IndexSortSortedNumericDocValuesRangeQuery that = (IndexSortSortedNumericDocValuesRangeQuery) o;
    return lowerValue == that.lowerValue &&
        upperValue == that.upperValue &&
        Objects.equals(field, that.field) &&
        Objects.equals(fallbackQuery, that.fallbackQuery);
  }

  @Override
  public int hashCode() {
    return Objects.hash(field, lowerValue, upperValue, fallbackQuery);
  }

  @Override
  public void visit(QueryVisitor visitor) {
    if (visitor.acceptField(field)) {
      visitor.visitLeaf(this);
      fallbackQuery.visit(visitor);
    }
  }

  @Override
  public String toString(String field) {
    StringBuilder b = new StringBuilder();
    if (this.field.equals(field) == false) {
      b.append(this.field).append(":");
    }
    return b
        .append("[")
        .append(lowerValue)
        .append(" TO ")
        .append(upperValue)
        .append("]")
        .toString();
  }

  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    if (lowerValue == Long.MIN_VALUE && upperValue == Long.MAX_VALUE) {
      return new DocValuesFieldExistsQuery(field);
    }

    Query rewrittenFallback = fallbackQuery.rewrite(reader);
    if (rewrittenFallback == fallbackQuery) {
      return this;
    } else {
      return new IndexSortSortedNumericDocValuesRangeQuery(
          field, lowerValue, upperValue, rewrittenFallback);
    }
  }

  @Override
  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
    Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost);

    return new ConstantScoreWeight(this, boost) {
      @Override
      public Scorer scorer(LeafReaderContext context) throws IOException {
        SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field);
        NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);

        if (numericValues != null) {
          Sort indexSort = context.reader().getMetaData().getSort();
          if (indexSort != null
              && indexSort.getSort().length > 0
              && indexSort.getSort()[0].getField().equals(field)) {

            SortField sortField = indexSort.getSort()[0];
            DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
            return new ConstantScoreScorer(this, score(), scoreMode, disi);
          }
        }
        return fallbackWeight.scorer(context);
      }

      @Override
      public boolean isCacheable(LeafReaderContext ctx) {
        // Both queries should always return the same values, so we can just check
        // if the fallback query is cacheable.
        return fallbackWeight.isCacheable(ctx);
      }
    };
  }

  Computes the document IDs that lie within the range [lowerValue, upperValue] by performing binary search on the field's doc values. Because doc values only allow forward iteration, we need to reload the field comparator every time the binary search accesses an earlier element. We must also account for missing values when performing the binary search. For this reason, we load the FieldComparator instead of checking the docvalues directly. The returned DocIdSetIterator makes sure to wrap the original docvalues to skip over documents with no value. /**
   * Computes the document IDs that lie within the range [lowerValue, upperValue] by
   * performing binary search on the field's doc values.
   *
   * Because doc values only allow forward iteration, we need to reload the field comparator
   * every time the binary search accesses an earlier element.
   *
   * We must also account for missing values when performing the binary search. For this
   * reason, we load the {@link FieldComparator} instead of checking the docvalues directly.
   * The returned {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip
   * over documents with no value.
   */
  private DocIdSetIterator getDocIdSetIterator(SortField sortField,
                                               LeafReaderContext context,
                                               DocIdSetIterator delegate) throws IOException {
    long lower = sortField.getReverse() ? upperValue : lowerValue;
    long upper = sortField.getReverse() ? lowerValue : upperValue;
    int maxDoc = context.reader().maxDoc();

    // Perform a binary search to find the first document with value >= lower.
    ValueComparator comparator = loadComparator(sortField, lower, context);
    int low = 0;
    int high = maxDoc - 1;

    while (low <= high) {
      int mid = (low + high) >>> 1;
      if (comparator.compare(mid) <= 0) {
        high = mid - 1;
        comparator = loadComparator(sortField, lower, context);
      } else {
        low = mid + 1;
      }
    }
    int firstDocIdInclusive = high + 1;

    // Perform a binary search to find the first document with value > upper.
    // Since we know that upper >= lower, we can initialize the lower bound
    // of the binary search to the result of the previous search.
    comparator = loadComparator(sortField, upper, context);
    low = firstDocIdInclusive;
    high = maxDoc - 1;

    while (low <= high) {
      int mid = (low + high) >>> 1;
      if (comparator.compare(mid) < 0) {
        high = mid - 1;
        comparator = loadComparator(sortField, upper, context);
      } else {
        low = mid + 1;
      }
    }

    int lastDocIdExclusive = high + 1;
    return new BoundedDocSetIdIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
  }

  Compares the given document's value with a stored reference value.
/**
   * Compares the given document's value with a stored reference value.
   */
  private interface ValueComparator {
    int compare(int docID) throws IOException;
  }

  private static ValueComparator loadComparator(SortField sortField,
                                                long topValue,
                                                LeafReaderContext context) throws IOException {
    @SuppressWarnings("unchecked")
    FieldComparator<Long> fieldComparator = (FieldComparator<Long>) sortField.getComparator(1, 0);
    fieldComparator.setTopValue(topValue);

    LeafFieldComparator leafFieldComparator = fieldComparator.getLeafComparator(context);
    int direction = sortField.getReverse() ? -1 : 1;

    return doc -> {
      int value = leafFieldComparator.compareTop(doc);
      return direction * value;
    };
  }

  A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in
the range [firstDocInclusive, lastDoc).
/**
   * A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in
   * the range [firstDocInclusive, lastDoc).
   */
  private static class BoundedDocSetIdIterator extends DocIdSetIterator {
    private final int firstDoc;
    private final int lastDoc;
    private final DocIdSetIterator delegate;

    private int docID = -1;

    BoundedDocSetIdIterator(int firstDoc,
                            int lastDoc,
                            DocIdSetIterator delegate) {
      this.firstDoc = firstDoc;
      this.lastDoc = lastDoc;
      this.delegate = delegate;
    }

    @Override
    public int docID() {
      return docID;
    }

    @Override
    public int nextDoc() throws IOException {
      return advance(docID + 1);
    }

    @Override
    public int advance(int target) throws IOException {
      if (target < firstDoc) {
        target = firstDoc;
      }

      int result = delegate.advance(target);
      if (result < lastDoc) {
        docID = result;
        return docID;
      } else {
        return NO_MORE_DOCS;
      }
    }

    @Override
    public long cost() {
      return lastDoc - firstDoc;
    }
  }
}
/

org.apache.lucene/ lucene-sandbox/ 8.2.0/ org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java