/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedMutable;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

Holds updates of a single DocValues field, for a set of documents within one segment.
@lucene.experimental
/** * Holds updates of a single DocValues field, for a set of documents within one segment. * * @lucene.experimental */
abstract class DocValuesFieldUpdates implements Accountable { protected static final int PAGE_SIZE = 1024; private static final long HAS_VALUE_MASK = 1; private static final long HAS_NO_VALUE_MASK = 0; private static final int SHIFT = 1; // we use the first bit of each value to mark if the doc has a value or not
An iterator over documents and their updated values. Only documents with updates are returned by this iterator, and the documents are returned in increasing order.
/** * An iterator over documents and their updated values. Only documents with * updates are returned by this iterator, and the documents are returned in * increasing order. */
static abstract class Iterator extends DocValuesIterator { @Override public final boolean advanceExact(int target) { throw new UnsupportedOperationException(); } @Override public final int advance(int target) { throw new UnsupportedOperationException(); } @Override public final long cost() { throw new UnsupportedOperationException(); } @Override public abstract int nextDoc(); // no IOException
Returns a long value for the current document if this iterator is a long iterator.
/** * Returns a long value for the current document if this iterator is a long iterator. */
abstract long longValue();
Returns a binary value for the current document if this iterator is a binary value iterator.
/** * Returns a binary value for the current document if this iterator is a binary value iterator. */
abstract BytesRef binaryValue();
Returns delGen for this packet.
/** Returns delGen for this packet. */
abstract long delGen();
Returns true if this doc has a value
/** * Returns true if this doc has a value */
abstract boolean hasValue();
Wraps the given iterator as a BinaryDocValues instance.
/** * Wraps the given iterator as a BinaryDocValues instance. */
static BinaryDocValues asBinaryDocValues(Iterator iterator) { return new BinaryDocValues() { @Override public int docID() { return iterator.docID(); } @Override public BytesRef binaryValue() { return iterator.binaryValue(); } @Override public boolean advanceExact(int target) { return iterator.advanceExact(target); } @Override public int nextDoc() { return iterator.nextDoc(); } @Override public int advance(int target) { return iterator.advance(target); } @Override public long cost() { return iterator.cost(); } }; }
Wraps the given iterator as a NumericDocValues instance.
/** * Wraps the given iterator as a NumericDocValues instance. */
static NumericDocValues asNumericDocValues(Iterator iterator) { return new NumericDocValues() { @Override public long longValue() { return iterator.longValue(); } @Override public boolean advanceExact(int target) { throw new UnsupportedOperationException(); } @Override public int docID() { return iterator.docID(); } @Override public int nextDoc() { return iterator.nextDoc(); } @Override public int advance(int target) { return iterator.advance(target); } @Override public long cost() { return iterator.cost(); } }; } }
Merge-sorts multiple iterators, one per delGen, favoring the largest delGen that has updates for a given docID.
/** Merge-sorts multiple iterators, one per delGen, favoring the largest delGen that has updates for a given docID. */
public static Iterator mergedIterator(Iterator[] subs) { if (subs.length == 1) { return subs[0]; } PriorityQueue<Iterator> queue = new PriorityQueue<Iterator>(subs.length) { @Override protected boolean lessThan(Iterator a, Iterator b) { // sort by smaller docID int cmp = Integer.compare(a.docID(), b.docID()); if (cmp == 0) { // then by larger delGen cmp = Long.compare(b.delGen(), a.delGen()); // delGens are unique across our subs: assert cmp != 0; } return cmp < 0; } }; for (Iterator sub : subs) { if (sub.nextDoc() != NO_MORE_DOCS) { queue.add(sub); } } if (queue.size() == 0) { return null; } return new Iterator() { private int doc = -1; @Override public int nextDoc() { // Advance all sub iterators past current doc while (true) { if (queue.size() == 0) { doc = NO_MORE_DOCS; break; } int newDoc = queue.top().docID(); if (newDoc != doc) { assert newDoc > doc: "doc=" + doc + " newDoc=" + newDoc; doc = newDoc; break; } if (queue.top().nextDoc() == NO_MORE_DOCS) { queue.pop(); } else { queue.updateTop(); } } return doc; } @Override public int docID() { return doc; } @Override long longValue() { return queue.top().longValue(); } @Override BytesRef binaryValue() { return queue.top().binaryValue(); } @Override public long delGen() { throw new UnsupportedOperationException(); } @Override boolean hasValue() { return queue.top().hasValue(); } }; } final String field; final DocValuesType type; final long delGen; private final int bitsPerValue; private boolean finished; protected final int maxDoc; protected PagedMutable docs; protected int size; protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) { this.maxDoc = maxDoc; this.delGen = delGen; this.field = field; if (type == null) { throw new NullPointerException("DocValuesType must not be null"); } this.type = type; bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT; docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT); } final boolean getFinished() { return finished; } abstract void add(int doc, long value); abstract void add(int doc, BytesRef value);
Adds the value for the given docID. This method prevents conditional calls to Iterator.longValue() or Iterator.binaryValue() since the implementation knows if it's a long value iterator or binary value
/** * Adds the value for the given docID. * This method prevents conditional calls to {@link Iterator#longValue()} or {@link Iterator#binaryValue()} * since the implementation knows if it's a long value iterator or binary value */
abstract void add(int docId, Iterator iterator);
Returns an Iterator over the updated documents and their values.
/** * Returns an {@link Iterator} over the updated documents and their * values. */
// TODO: also use this for merging, instead of having to write through to disk first abstract Iterator iterator();
Freezes internal data structures and sorts updates by docID for efficient iteration.
/** Freezes internal data structures and sorts updates by docID for efficient iteration. */
final synchronized void finish() { if (finished) { throw new IllegalStateException("already finished"); } finished = true; // shrink wrap if (size < docs.size()) { resize(size); } if (size > 0) { // We need a stable sort but InPlaceMergeSorter performs lots of swaps // which hurts performance due to all the packed ints we are using. // Another option would be TimSorter, but it needs additional API (copy to // temp storage, compare with item in temp storage, etc.) so we instead // use quicksort and record ords of each update to guarantee stability. final PackedInts.Mutable ords = PackedInts.getMutable(size, PackedInts.bitsRequired(size - 1), PackedInts.DEFAULT); for (int i = 0; i < size; ++i) { ords.set(i, i); } new IntroSorter() { @Override protected void swap(int i, int j) { final long tmpOrd = ords.get(i); ords.set(i, ords.get(j)); ords.set(j, tmpOrd); DocValuesFieldUpdates.this.swap(i, j); } @Override protected int compare(int i, int j) { // increasing docID order: // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being // stable and preserving original order so the last update to that docID wins int cmp = Long.compare(docs.get(i)>>>1, docs.get(j)>>>1); if (cmp == 0) { cmp = (int) (ords.get(i) - ords.get(j)); } return cmp; } long pivotDoc; int pivotOrd; @Override protected void setPivot(int i) { pivotDoc = docs.get(i) >>> 1; pivotOrd = (int) ords.get(i); } @Override protected int comparePivot(int j) { int cmp = Long.compare(pivotDoc, docs.get(j) >>> 1); if (cmp == 0) { cmp = pivotOrd - (int) ords.get(j); } return cmp; } }.sort(0, size); } }
Returns true if this instance contains any updates.
/** Returns true if this instance contains any updates. */
synchronized boolean any() { return size > 0; } synchronized final int size() { return size; }
Adds an update that resets the documents value.
Params:
  • doc – the doc to update
/** * Adds an update that resets the documents value. * @param doc the doc to update */
synchronized void reset(int doc) { addInternal(doc, HAS_NO_VALUE_MASK); } final synchronized int add(int doc) { return addInternal(doc, HAS_VALUE_MASK); } private synchronized int addInternal(int doc, long hasValueMask) { if (finished) { throw new IllegalStateException("already finished"); } assert doc < maxDoc; // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation if (size == Integer.MAX_VALUE) { throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries"); } // grow the structures to have room for more elements if (docs.size() == size) { grow(size+1); } docs.set(size, (((long)doc) << SHIFT) | hasValueMask); ++size; return size-1; } protected void swap(int i, int j) { long tmpDoc = docs.get(j); docs.set(j, docs.get(i)); docs.set(i, tmpDoc); } protected void grow(int size) { docs = docs.grow(size); } protected void resize(int size) { docs = docs.resize(size); } protected final void ensureFinished() { if (finished == false) { throw new IllegalStateException("call finish first"); } } @Override public long ramBytesUsed() { return docs.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2 * Integer.BYTES + 2 + Long.BYTES + RamUsageEstimator.NUM_BYTES_OBJECT_REF; } // TODO: can't this just be NumericDocValues now? avoid boxing the long value... protected abstract static class AbstractIterator extends DocValuesFieldUpdates.Iterator { private final int size; private final PagedMutable docs; private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE private int doc = -1; private final long delGen; private boolean hasValue; AbstractIterator(int size, PagedMutable docs, long delGen) { this.size = size; this.docs = docs; this.delGen = delGen; } @Override public final int nextDoc() { if (idx >= size) { return doc = DocIdSetIterator.NO_MORE_DOCS; } long longDoc = docs.get(idx); ++idx; for (; idx < size; idx++) { // scan forward to last update to this doc final long nextLongDoc = docs.get(idx); if ((longDoc >>> 1) != (nextLongDoc >>> 1)) { break; } longDoc = nextLongDoc; } hasValue = (longDoc & HAS_VALUE_MASK) > 0; if (hasValue) { set(idx - 1); } doc = (int)(longDoc >> SHIFT); return doc; }
Called when the iterator moved to the next document
Params:
  • idx – the internal index to set the value to
/** * Called when the iterator moved to the next document * @param idx the internal index to set the value to */
protected abstract void set(long idx); @Override public final int docID() { return doc; } @Override final long delGen() { return delGen; } @Override final boolean hasValue() { return hasValue; } } static abstract class SingleValueDocValuesFieldUpdates extends DocValuesFieldUpdates { private final BitSet bitSet; private BitSet hasNoValue; private boolean hasAtLeastOneValue; protected SingleValueDocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) { super(maxDoc, delGen, field, type); this.bitSet = new SparseFixedBitSet(maxDoc); } @Override void add(int doc, long value) { assert longValue() == value; bitSet.set(doc); this.hasAtLeastOneValue = true; if (hasNoValue != null) { hasNoValue.clear(doc); } } @Override void add(int doc, BytesRef value) { assert binaryValue().equals(value); bitSet.set(doc); this.hasAtLeastOneValue = true; if (hasNoValue != null) { hasNoValue.clear(doc); } } @Override synchronized void reset(int doc) { bitSet.set(doc); this.hasAtLeastOneValue = true; if (hasNoValue == null) { hasNoValue = new SparseFixedBitSet(maxDoc); } hasNoValue.set(doc); } @Override void add(int docId, Iterator iterator) { throw new UnsupportedOperationException(); } protected abstract BytesRef binaryValue(); protected abstract long longValue(); @Override synchronized boolean any() { return super.any() || hasAtLeastOneValue; } @Override public long ramBytesUsed() { return super.ramBytesUsed() + bitSet.ramBytesUsed() + (hasNoValue == null ? 0 : hasNoValue.ramBytesUsed()); } @Override Iterator iterator() { BitSetIterator iterator = new BitSetIterator(bitSet, maxDoc); return new DocValuesFieldUpdates.Iterator() { @Override public int docID() { return iterator.docID(); } @Override public int nextDoc() { return iterator.nextDoc(); } @Override long longValue() { return SingleValueDocValuesFieldUpdates.this.longValue(); } @Override BytesRef binaryValue() { return SingleValueDocValuesFieldUpdates.this.binaryValue(); } @Override long delGen() { return delGen; } @Override boolean hasValue() { if (hasNoValue != null) { return hasNoValue.get(docID()) == false; } return true; } }; } } }