/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.queries.intervals;

import java.util.Arrays;
import java.util.List;
import java.util.function.Predicate;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;

Constructor functions for IntervalsSource types These sources implement minimum-interval algorithms taken from the paper Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics By default, sources that are sensitive to internal gaps (e.g. PHRASE and MAXGAPS) will rewrite their sub-sources so that disjunctions of different lengths are pulled up to the top of the interval tree. For example, PHRASE(or(PHRASE("a", "b", "c"), "b"), "c") will automatically rewrite itself to OR(PHRASE("a", "b", "c", "c"), PHRASE("b", "c")) to ensure that documents containing "b c" are matched. This can lead to less efficient queries, as more terms need to be loaded (for example, the "c" iterator above is loaded twice), so if you care more about speed than about accuracy you can use the or(boolean, IntervalsSource...) factory method to prevent rewriting.
/** * Constructor functions for {@link IntervalsSource} types * * These sources implement minimum-interval algorithms taken from the paper * <a href="http://vigna.di.unimi.it/ftp/papers/EfficientAlgorithmsMinimalIntervalSemantics.pdf"> * Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics</a> * * By default, sources that are sensitive to internal gaps (e.g. PHRASE and MAXGAPS) will * rewrite their sub-sources so that disjunctions of different lengths are pulled up * to the top of the interval tree. For example, PHRASE(or(PHRASE("a", "b", "c"), "b"), "c") * will automatically rewrite itself to OR(PHRASE("a", "b", "c", "c"), PHRASE("b", "c")) * to ensure that documents containing "b c" are matched. This can lead to less efficient * queries, as more terms need to be loaded (for example, the "c" iterator above is loaded * twice), so if you care more about speed than about accuracy you can use the * {@link #or(boolean, IntervalsSource...)} factory method to prevent rewriting. */
public final class Intervals { private Intervals() {}
Return an IntervalsSource exposing intervals for a term
/** * Return an {@link IntervalsSource} exposing intervals for a term */
public static IntervalsSource term(BytesRef term) { return new TermIntervalsSource(term); }
Return an IntervalsSource exposing intervals for a term
/** * Return an {@link IntervalsSource} exposing intervals for a term */
public static IntervalsSource term(String term) { return new TermIntervalsSource(new BytesRef(term)); }
Return an IntervalsSource exposing intervals for a term, filtered by the value of the term's payload at each position
/** * Return an {@link IntervalsSource} exposing intervals for a term, filtered by the value * of the term's payload at each position */
public static IntervalsSource term(String term, Predicate<BytesRef> payloadFilter) { return term(new BytesRef(term), payloadFilter); }
Return an IntervalsSource exposing intervals for a term, filtered by the value of the term's payload at each position
/** * Return an {@link IntervalsSource} exposing intervals for a term, filtered by the value * of the term's payload at each position */
public static IntervalsSource term(BytesRef term, Predicate<BytesRef> payloadFilter) { return new PayloadFilteredTermIntervalsSource(term, payloadFilter); }
Return an IntervalsSource exposing intervals for a phrase consisting of a list of terms
/** * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of terms */
public static IntervalsSource phrase(String... terms) { if (terms.length == 1) { return Intervals.term(terms[0]); } IntervalsSource[] sources = new IntervalsSource[terms.length]; int i = 0; for (String term : terms) { sources[i] = term(term); i++; } return phrase(sources); }
Return an IntervalsSource exposing intervals for a phrase consisting of a list of IntervalsSources
/** * Return an {@link IntervalsSource} exposing intervals for a phrase consisting of a list of IntervalsSources */
public static IntervalsSource phrase(IntervalsSource... subSources) { return BlockIntervalsSource.build(Arrays.asList(subSources)); }
Return an IntervalsSource over the disjunction of a set of sub-sources Automatically rewrites if wrapped by an interval source that is sensitive to internal gaps
/** * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources * * Automatically rewrites if wrapped by an interval source that is sensitive to * internal gaps */
public static IntervalsSource or(IntervalsSource... subSources) { return or(true, Arrays.asList(subSources)); }
Return an IntervalsSource over the disjunction of a set of sub-sources
Params:
  • rewrite – if false, do not rewrite intervals that are sensitive to internal gaps; this may run more efficiently, but can miss valid hits due to minimization
  • subSources – the sources to combine
/** * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources * * @param rewrite if {@code false}, do not rewrite intervals that are sensitive to * internal gaps; this may run more efficiently, but can miss valid * hits due to minimization * @param subSources the sources to combine */
public static IntervalsSource or(boolean rewrite, IntervalsSource... subSources) { return or(rewrite, Arrays.asList(subSources)); }
Return an IntervalsSource over the disjunction of a set of sub-sources
/** * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources */
public static IntervalsSource or(List<IntervalsSource> subSources) { return or(true, subSources); }
Return an IntervalsSource over the disjunction of a set of sub-sources
Params:
  • rewrite – if false, do not rewrite intervals that are sensitive to internal gaps; this may run more efficiently, but can miss valid hits due to minimization
  • subSources – the sources to combine
/** * Return an {@link IntervalsSource} over the disjunction of a set of sub-sources * * @param rewrite if {@code false}, do not rewrite intervals that are sensitive to * internal gaps; this may run more efficiently, but can miss valid * hits due to minimization * @param subSources the sources to combine */
public static IntervalsSource or(boolean rewrite, List<IntervalsSource> subSources) { return DisjunctionIntervalsSource.create(subSources, rewrite); }
Return an IntervalsSource over the disjunction of all terms that begin with a prefix
Throws:
/** * Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix * * @throws IllegalStateException if the prefix expands to more than 128 terms */
public static IntervalsSource prefix(BytesRef prefix) { return prefix(prefix, 128); }
Expert: Return an IntervalsSource over the disjunction of all terms that begin with a prefix WARNING: Setting maxExpansions to higher than the default value of 128 can be both slow and memory-intensive
Params:
  • prefix – the prefix to expand
  • maxExpansions – the maximum number of terms to expand to
Throws:
/** * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that begin with a prefix * * WARNING: Setting {@code maxExpansions} to higher than the default value of 128 * can be both slow and memory-intensive * * @param prefix the prefix to expand * @param maxExpansions the maximum number of terms to expand to * * @throws IllegalStateException if the prefix expands to more than {@code maxExpansions} terms */
public static IntervalsSource prefix(BytesRef prefix, int maxExpansions) { CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefix)); return new MultiTermIntervalsSource(ca, maxExpansions, prefix.utf8ToString() + "*"); }
Return an IntervalsSource over the disjunction of all terms that match a wildcard glob
Throws:
See Also:
/** * Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob * * @throws IllegalStateException if the wildcard glob expands to more than 128 terms * * @see WildcardQuery for glob format */
public static IntervalsSource wildcard(BytesRef wildcard) { return wildcard(wildcard, 128); }
Expert: Return an IntervalsSource over the disjunction of all terms that match a wildcard glob WARNING: Setting maxExpansions to higher than the default value of 128 can be both slow and memory-intensive
Params:
  • wildcard – the glob to expand
  • maxExpansions – the maximum number of terms to expand to
Throws:
See Also:
/** * Expert: Return an {@link IntervalsSource} over the disjunction of all terms that match a wildcard glob * * WARNING: Setting {@code maxExpansions} to higher than the default value of 128 * can be both slow and memory-intensive * * @param wildcard the glob to expand * @param maxExpansions the maximum number of terms to expand to * * @throws IllegalStateException if the wildcard glob expands to more than {@code maxExpansions} terms * * @see WildcardQuery for glob format */
public static IntervalsSource wildcard(BytesRef wildcard, int maxExpansions) { CompiledAutomaton ca = new CompiledAutomaton(WildcardQuery.toAutomaton(new Term("", wildcard))); return new MultiTermIntervalsSource(ca, maxExpansions, wildcard.utf8ToString()); }
Create an IntervalsSource that filters a sub-source by the width of its intervals
Params:
  • width – the maximum width of intervals in the sub-source to filter
  • subSource – the sub-source to filter
/** * Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals * @param width the maximum width of intervals in the sub-source to filter * @param subSource the sub-source to filter */
public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { return FilteredIntervalsSource.maxWidth(subSource, width); }
Create an IntervalsSource that filters a sub-source by its gaps
Params:
  • gaps – the maximum number of gaps in the sub-source to filter
  • subSource – the sub-source to filter
/** * Create an {@link IntervalsSource} that filters a sub-source by its gaps * @param gaps the maximum number of gaps in the sub-source to filter * @param subSource the sub-source to filter */
public static IntervalsSource maxgaps(int gaps, IntervalsSource subSource) { return FilteredIntervalsSource.maxGaps(subSource, gaps); }
Create an IntervalsSource that wraps another source, extending its intervals by a number of positions before and after. This can be useful for adding defined gaps in a block query; for example, to find 'a b [2 arbitrary terms] c', you can call:
  Intervals.phrase(Intervals.term("a"), Intervals.extend(Intervals.term("b"), 0, 2), Intervals.term("c"));
Note that calling IntervalIterator.gaps() on iterators returned by this source delegates directly to the wrapped iterator, and does not include the extensions.
Params:
  • source – the source to extend
  • before – how many positions to extend before the delegated interval
  • after – how many positions to extend after the delegated interval
/** * Create an {@link IntervalsSource} that wraps another source, extending its * intervals by a number of positions before and after. * * This can be useful for adding defined gaps in a block query; for example, * to find 'a b [2 arbitrary terms] c', you can call: * <pre> * Intervals.phrase(Intervals.term("a"), Intervals.extend(Intervals.term("b"), 0, 2), Intervals.term("c")); * </pre> * * Note that calling {@link IntervalIterator#gaps()} on iterators returned by this source * delegates directly to the wrapped iterator, and does not include the extensions. * * @param source the source to extend * @param before how many positions to extend before the delegated interval * @param after how many positions to extend after the delegated interval */
public static IntervalsSource extend(IntervalsSource source, int before, int after) { return new ExtendedIntervalsSource(source, before, after); }
Create an ordered IntervalsSource Returns intervals in which the subsources all appear in the given order
Params:
/** * Create an ordered {@link IntervalsSource} * * Returns intervals in which the subsources all appear in the given order * * @param subSources an ordered set of {@link IntervalsSource} objects */
public static IntervalsSource ordered(IntervalsSource... subSources) { return OrderedIntervalsSource.build(Arrays.asList(subSources)); }
Create an unordered IntervalsSource Returns intervals in which all the subsources appear. The subsources may overlap
Params:
/** * Create an unordered {@link IntervalsSource} * * Returns intervals in which all the subsources appear. The subsources may overlap * * @param subSources an unordered set of {@link IntervalsSource}s */
public static IntervalsSource unordered(IntervalsSource... subSources) { return UnorderedIntervalsSource.build(Arrays.asList(subSources)); }
Create an unordered IntervalsSource allowing no overlaps between subsources Returns intervals in which both the subsources appear and do not overlap.
/** * Create an unordered {@link IntervalsSource} allowing no overlaps between subsources * * Returns intervals in which both the subsources appear and do not overlap. */
public static IntervalsSource unorderedNoOverlaps(IntervalsSource a, IntervalsSource b) { return Intervals.or(Intervals.ordered(a, b), Intervals.ordered(b, a)); }
Create an IntervalsSource that always returns intervals from a specific field This is useful for comparing intervals across multiple fields, for example fields that have been analyzed differently, allowing you to search for stemmed terms near unstemmed terms, etc.
/** * Create an {@link IntervalsSource} that always returns intervals from a specific field * * This is useful for comparing intervals across multiple fields, for example fields that * have been analyzed differently, allowing you to search for stemmed terms near unstemmed * terms, etc. */
public static IntervalsSource fixField(String field, IntervalsSource source) { return new FixedFieldIntervalsSource(field, source); }
Create a non-overlapping IntervalsSource Returns intervals of the minuend that do not overlap with intervals from the subtrahend
Params:
/** * Create a non-overlapping IntervalsSource * * Returns intervals of the minuend that do not overlap with intervals from the subtrahend * @param minuend the {@link IntervalsSource} to filter * @param subtrahend the {@link IntervalsSource} to filter by */
public static IntervalsSource nonOverlapping(IntervalsSource minuend, IntervalsSource subtrahend) { return new NonOverlappingIntervalsSource(minuend, subtrahend); }
Returns intervals from a source that overlap with intervals from another source
Params:
  • source – the source to filter
  • reference – the source to filter by
/** * Returns intervals from a source that overlap with intervals from another source * @param source the source to filter * @param reference the source to filter by */
public static IntervalsSource overlapping(IntervalsSource source, IntervalsSource reference) { return new OverlappingIntervalsSource(source, reference); }
Create a not-within IntervalsSource Returns intervals of the minuend that do not appear within a set number of positions of intervals from the subtrahend query
Params:
  • minuend – the IntervalsSource to filter
  • positions – the minimum distance that intervals from the minuend may occur from intervals of the subtrahend
  • subtrahend – the IntervalsSource to filter by
/** * Create a not-within {@link IntervalsSource} * * Returns intervals of the minuend that do not appear within a set number of positions of * intervals from the subtrahend query * * @param minuend the {@link IntervalsSource} to filter * @param positions the minimum distance that intervals from the minuend may occur from intervals * of the subtrahend * @param subtrahend the {@link IntervalsSource} to filter by */
public static IntervalsSource notWithin(IntervalsSource minuend, int positions, IntervalsSource subtrahend) { return new NonOverlappingIntervalsSource(minuend, Intervals.extend(subtrahend, positions, positions)); }
Returns intervals of the source that appear within a set number of positions of intervals from the reference
Params:
  • source – the IntervalsSource to filter
  • positions – the maximum distance that intervals of the source may occur from intervals of the reference
  • reference – the IntervalsSource to filter by
/** * Returns intervals of the source that appear within a set number of positions of intervals from * the reference * * @param source the {@link IntervalsSource} to filter * @param positions the maximum distance that intervals of the source may occur from intervals of the reference * @param reference the {@link IntervalsSource} to filter by */
public static IntervalsSource within(IntervalsSource source, int positions, IntervalsSource reference) { return containedBy(source, Intervals.extend(reference, positions, positions)); }
Create a not-containing IntervalsSource Returns intervals from the minuend that do not contain intervals of the subtrahend
Params:
/** * Create a not-containing {@link IntervalsSource} * * Returns intervals from the minuend that do not contain intervals of the subtrahend * * @param minuend the {@link IntervalsSource} to filter * @param subtrahend the {@link IntervalsSource} to filter by */
public static IntervalsSource notContaining(IntervalsSource minuend, IntervalsSource subtrahend) { return NotContainingIntervalsSource.build(minuend, subtrahend); }
Create a containing IntervalsSource Returns intervals from the big source that contain one or more intervals from the small source
Params:
/** * Create a containing {@link IntervalsSource} * * Returns intervals from the big source that contain one or more intervals from * the small source * * @param big the {@link IntervalsSource} to filter * @param small the {@link IntervalsSource} to filter by */
public static IntervalsSource containing(IntervalsSource big, IntervalsSource small) { return ContainingIntervalsSource.build(big, small); }
Create a not-contained-by IntervalsSource Returns intervals from the small IntervalsSource that do not appear within intervals from the big IntervalsSource.
Params:
/** * Create a not-contained-by {@link IntervalsSource} * * Returns intervals from the small {@link IntervalsSource} that do not appear within * intervals from the big {@link IntervalsSource}. * * @param small the {@link IntervalsSource} to filter * @param big the {@link IntervalsSource} to filter by */
public static IntervalsSource notContainedBy(IntervalsSource small, IntervalsSource big) { return NotContainedByIntervalsSource.build(small, big); }
Create a contained-by IntervalsSource Returns intervals from the small query that appear within intervals of the big query
Params:
/** * Create a contained-by {@link IntervalsSource} * * Returns intervals from the small query that appear within intervals of the big query * * @param small the {@link IntervalsSource} to filter * @param big the {@link IntervalsSource} to filter by */
public static IntervalsSource containedBy(IntervalsSource small, IntervalsSource big) { return ContainedByIntervalsSource.build(small, big); }
Return intervals that span combinations of intervals from minShouldMatch of the sources
/** * Return intervals that span combinations of intervals from {@code minShouldMatch} of the sources */
public static IntervalsSource atLeast(int minShouldMatch, IntervalsSource... sources) { return new MinimumShouldMatchIntervalsSource(sources, minShouldMatch); }
Returns intervals from the source that appear before intervals from the reference
/** * Returns intervals from the source that appear before intervals from the reference */
public static IntervalsSource before(IntervalsSource source, IntervalsSource reference) { return ContainedByIntervalsSource.build(source, Intervals.extend(new OffsetIntervalsSource(reference, true), Integer.MAX_VALUE, 0)); }
Returns intervals from the source that appear after intervals from the reference
/** * Returns intervals from the source that appear after intervals from the reference */
public static IntervalsSource after(IntervalsSource source, IntervalsSource reference) { return ContainedByIntervalsSource.build(source, Intervals.extend(new OffsetIntervalsSource(reference, false), 0, Integer.MAX_VALUE)); } }