/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;

A per-document byte[] with presorted values. This is fundamentally an iterator over the int ord values per document, with random access APIs to resolve an int ord to BytesRef.

Per-Document values in a SortedDocValues are deduplicated, dereferenced, and sorted into a dictionary of unique values. A pointer to the dictionary value (ordinal) can be retrieved for each document. Ordinals are dense and in increasing sorted order.

/** * A per-document byte[] with presorted values. This is fundamentally an * iterator over the int ord values per document, with random access APIs * to resolve an int ord to BytesRef. * <p> * Per-Document values in a SortedDocValues are deduplicated, dereferenced, * and sorted into a dictionary of unique values. A pointer to the * dictionary value (ordinal) can be retrieved for each document. Ordinals * are dense and in increasing sorted order. */
public abstract class SortedDocValues extends BinaryDocValues {
Sole constructor. (For invocation by subclass constructors, typically implicit.)
/** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */
protected SortedDocValues() {}
Returns the ordinal for the current docID. It is illegal to call this method after DocValuesIterator.advanceExact(int) returned false.
Returns:ordinal for the document: this is dense, starts at 0, then increments by 1 for the next value in sorted order.
/** * Returns the ordinal for the current docID. * It is illegal to call this method after {@link #advanceExact(int)} * returned {@code false}. * @return ordinal for the document: this is dense, starts at 0, then * increments by 1 for the next value in sorted order. */
public abstract int ordValue() throws IOException;
Retrieves the value for the specified ordinal. The returned BytesRef may be re-used across calls to lookupOrd(int) so make sure to copy it if you want to keep it around.
Params:
See Also:
/** Retrieves the value for the specified ordinal. The returned * {@link BytesRef} may be re-used across calls to {@link #lookupOrd(int)} * so make sure to {@link BytesRef#deepCopyOf(BytesRef) copy it} if you want * to keep it around. * @param ord ordinal to lookup (must be &gt;= 0 and &lt; {@link #getValueCount()}) * @see #ordValue() */
public abstract BytesRef lookupOrd(int ord) throws IOException; private final BytesRef empty = new BytesRef(); @Override public BytesRef binaryValue() throws IOException { int ord = ordValue(); if (ord == -1) { return empty; } else { return lookupOrd(ord); } }
Returns the number of unique values.
Returns:number of unique values in this SortedDocValues. This is also equivalent to one plus the maximum ordinal.
/** * Returns the number of unique values. * @return number of unique values in this SortedDocValues. This is * also equivalent to one plus the maximum ordinal. */
public abstract int getValueCount();
If key exists, returns its ordinal, else returns -insertionPoint-1, like Arrays.binarySearch. @param key Key to look up
/** If {@code key} exists, returns its ordinal, else * returns {@code -insertionPoint-1}, like {@code * Arrays.binarySearch}. * * @param key Key to look up **/
public int lookupTerm(BytesRef key) throws IOException { int low = 0; int high = getValueCount()-1; while (low <= high) { int mid = (low + high) >>> 1; final BytesRef term = lookupOrd(mid); int cmp = term.compareTo(key); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return mid; // key found } } return -(low + 1); // key not found. }
Returns a TermsEnum over the values. The enum supports TermsEnum.ord() and TermsEnum.seekExact(long).
/** * Returns a {@link TermsEnum} over the values. * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}. */
public TermsEnum termsEnum() throws IOException { return new SortedDocValuesTermsEnum(this); }
Returns a TermsEnum over the values, filtered by a CompiledAutomaton The enum supports TermsEnum.ord().
/** * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton} * The enum supports {@link TermsEnum#ord()}. */
public TermsEnum intersect(CompiledAutomaton automaton) throws IOException { TermsEnum in = termsEnum(); switch (automaton.type) { case NONE: return TermsEnum.EMPTY; case ALL: return in; case SINGLE: return new SingleTermsEnum(in, automaton.term); case NORMAL: return new AutomatonTermsEnum(in, automaton); default: // unreachable throw new RuntimeException("unhandled case"); } } }