/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
A multi-valued version of SortedDocValues
.
Per-Document values in a SortedSetDocValues are deduplicated, dereferenced,
and sorted into a dictionary of unique values. A pointer to the
dictionary value (ordinal) can be retrieved for each document. Ordinals
are dense and in increasing sorted order.
/**
* A multi-valued version of {@link SortedDocValues}.
* <p>
* Per-Document values in a SortedSetDocValues are deduplicated, dereferenced,
* and sorted into a dictionary of unique values. A pointer to the
* dictionary value (ordinal) can be retrieved for each document. Ordinals
* are dense and in increasing sorted order.
*/
public abstract class SortedSetDocValues extends DocValuesIterator {
Sole constructor. (For invocation by subclass
constructors, typically implicit.) /** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected SortedSetDocValues() {}
When returned by nextOrd()
it means there are no more ordinals for the document. /** When returned by {@link #nextOrd()} it means there are no more
* ordinals for the document.
*/
public static final long NO_MORE_ORDS = -1;
Returns the next ordinal for the current document. It is illegal to call this method after DocValuesIterator.advanceExact(int)
returned false
. Returns: next ordinal for the document, or NO_MORE_ORDS
. ordinals are dense, start at 0, then increment by 1 for the next value in sorted order.
/**
* Returns the next ordinal for the current document.
* It is illegal to call this method after {@link #advanceExact(int)}
* returned {@code false}.
* @return next ordinal for the document, or {@link #NO_MORE_ORDS}.
* ordinals are dense, start at 0, then increment by 1 for
* the next value in sorted order.
*/
public abstract long nextOrd() throws IOException;
// TODO: should we have a docValueCount, like SortedNumeric?
Retrieves the value for the specified ordinal. The returned BytesRef
may be re-used across calls to lookupOrd so make sure to copy it
if you want to keep it around. Params: - ord – ordinal to lookup
See Also:
/** Retrieves the value for the specified ordinal. The returned
* {@link BytesRef} may be re-used across calls to lookupOrd so make sure to
* {@link BytesRef#deepCopyOf(BytesRef) copy it} if you want to keep it
* around.
* @param ord ordinal to lookup
* @see #nextOrd
*/
public abstract BytesRef lookupOrd(long ord) throws IOException;
Returns the number of unique values.
Returns: number of unique values in this SortedDocValues. This is
also equivalent to one plus the maximum ordinal.
/**
* Returns the number of unique values.
* @return number of unique values in this SortedDocValues. This is
* also equivalent to one plus the maximum ordinal.
*/
public abstract long getValueCount();
If key
exists, returns its ordinal, else returns -insertionPoint-1
, like
Arrays.binarySearch
. @param key Key to look up /** If {@code key} exists, returns its ordinal, else
* returns {@code -insertionPoint-1}, like {@code
* Arrays.binarySearch}.
*
* @param key Key to look up
**/
public long lookupTerm(BytesRef key) throws IOException {
long low = 0;
long high = getValueCount()-1;
while (low <= high) {
long mid = (low + high) >>> 1;
final BytesRef term = lookupOrd(mid);
int cmp = term.compareTo(key);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid; // key found
}
}
return -(low + 1); // key not found.
}
Returns a TermsEnum
over the values. The enum supports TermsEnum.ord()
and TermsEnum.seekExact(long)
. /**
* Returns a {@link TermsEnum} over the values.
* The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
*/
public TermsEnum termsEnum() throws IOException {
return new SortedSetDocValuesTermsEnum(this);
}
Returns a TermsEnum
over the values, filtered by a CompiledAutomaton
The enum supports TermsEnum.ord()
. /**
* Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
* The enum supports {@link TermsEnum#ord()}.
*/
public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
TermsEnum in = termsEnum();
switch (automaton.type) {
case NONE:
return TermsEnum.EMPTY;
case ALL:
return in;
case SINGLE:
return new SingleTermsEnum(in, automaton.term);
case NORMAL:
return new AutomatonTermsEnum(in, automaton);
default:
// unreachable
throw new RuntimeException("unhandled case");
}
}
}