/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.index;

import java.io.Closeable;
import java.io.IOException;

import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOSupplier;
import org.apache.lucene.util.IOUtils;

final class PendingSoftDeletes extends PendingDeletes {

  private final String field;
  private long dvGeneration = -2;
  private final PendingDeletes hardDeletes;

  PendingSoftDeletes(String field, SegmentCommitInfo info)  {
    super(info, null, info.getDelCount(true) == 0);
    this.field = field;
    hardDeletes = new PendingDeletes(info);
  }

  PendingSoftDeletes(String field, SegmentReader reader, SegmentCommitInfo info) {
    super(reader, info);
    this.field = field;
    hardDeletes = new PendingDeletes(reader, info);
  }

  @Override
  boolean delete(int docID) throws IOException {
    FixedBitSet mutableBits = getMutableBits(); // we need to fetch this first it might be a shared instance with hardDeletes
    if (hardDeletes.delete(docID)) {
      if (mutableBits.get(docID)) { // delete it here too!
        mutableBits.clear(docID);
        assert hardDeletes.delete(docID) == false;
      } else {
        // if it was deleted subtract the delCount
        pendingDeleteCount--;
        assert assertPendingDeletes();
      }
      return true;
    }
    return false;
  }

  @Override
  protected int numPendingDeletes() {
    return super.numPendingDeletes() + hardDeletes.numPendingDeletes();
  }

  @Override
  void onNewReader(CodecReader reader, SegmentCommitInfo info) throws IOException {
    super.onNewReader(reader, info);
    hardDeletes.onNewReader(reader, info);
    if (dvGeneration < info.getDocValuesGen()) { // only re-calculate this if we haven't seen this generation
      final DocIdSetIterator iterator = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(field, reader);
      int newDelCount;
      if (iterator != null) { // nothing is deleted we don't have a soft deletes field in this segment
        assert info.info.maxDoc() > 0 : "maxDoc is 0";
        newDelCount = applySoftDeletes(iterator, getMutableBits());
        assert newDelCount >= 0 : " illegal pending delete count: " + newDelCount;
      } else {
        newDelCount = 0;
      }
      assert info.getSoftDelCount() == newDelCount : "softDeleteCount doesn't match " + info.getSoftDelCount() + " != " + newDelCount;
      dvGeneration = info.getDocValuesGen();
    }
    assert getDelCount() <= info.info.maxDoc() : getDelCount() + " > " + info.info.maxDoc();
  }

  @Override
  boolean writeLiveDocs(Directory dir) throws IOException {
    // we need to set this here to make sure our stats in SCI are up-to-date otherwise we might hit an assertion
    // when the hard deletes are set since we need to account for docs that used to be only soft-delete but now hard-deleted
    this.info.setSoftDelCount(this.info.getSoftDelCount() + pendingDeleteCount);
    super.dropChanges();
    // delegate the write to the hard deletes - it will only write if somebody used it.
    if (hardDeletes.writeLiveDocs(dir)) {
      return true;
    }
    return false;
  }

  @Override
  void dropChanges() {
    // don't reset anything here - this is called after a merge (successful or not) to prevent
    // rewriting the deleted docs to disk. we only pass it on and reset the number of pending deletes
    hardDeletes.dropChanges();
  }

  
Clears all bits in the given bitset that are set and are also in the given DocIdSetIterator.
Params:
  • iterator – the doc ID set iterator for apply
  • bits – the bit set to apply the deletes to
Returns:the number of bits changed by this function
/** * Clears all bits in the given bitset that are set and are also in the given DocIdSetIterator. * * @param iterator the doc ID set iterator for apply * @param bits the bit set to apply the deletes to * @return the number of bits changed by this function */
static int applySoftDeletes(DocIdSetIterator iterator, FixedBitSet bits) throws IOException { assert iterator != null; int newDeletes = 0; int docID; DocValuesFieldUpdates.Iterator hasValue = iterator instanceof DocValuesFieldUpdates.Iterator ? (DocValuesFieldUpdates.Iterator) iterator : null; while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (hasValue == null || hasValue.hasValue()) { if (bits.get(docID)) { // doc is live - clear it bits.clear(docID); newDeletes++; // now that we know we deleted it and we fully control the hard deletes we can do correct accounting // below. } } else { if (bits.get(docID) == false) { bits.set(docID); newDeletes--; } } } return newDeletes; } @Override void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws IOException { if (this.field.equals(info.name)) { pendingDeleteCount += applySoftDeletes(iterator, getMutableBits()); assert assertPendingDeletes(); this.info.setSoftDelCount(this.info.getSoftDelCount() + pendingDeleteCount); super.dropChanges(); } assert dvGeneration < info.getDocValuesGen() : "we have seen this generation update already: " + dvGeneration + " vs. " + info.getDocValuesGen(); assert dvGeneration != -2 : "docValues generation is still uninitialized"; dvGeneration = info.getDocValuesGen(); } private boolean assertPendingDeletes() { assert pendingDeleteCount + info.getSoftDelCount() >= 0 : " illegal pending delete count: " + pendingDeleteCount + info.getSoftDelCount(); assert info.info.maxDoc() >= getDelCount(); return true; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("PendingSoftDeletes(seg=").append(info); sb.append(" numPendingDeletes=").append(pendingDeleteCount); sb.append(" field=").append(field); sb.append(" dvGeneration=").append(dvGeneration); sb.append(" hardDeletes=").append(hardDeletes); return sb.toString(); } @Override int numDeletesToMerge(MergePolicy policy, IOSupplier<CodecReader> readerIOSupplier) throws IOException { ensureInitialized(readerIOSupplier); // initialize to ensure we have accurate counts return super.numDeletesToMerge(policy, readerIOSupplier); } private void ensureInitialized(IOSupplier<CodecReader> readerIOSupplier) throws IOException { if (dvGeneration == -2) { FieldInfos fieldInfos = readFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); // we try to only open a reader if it's really necessary ie. indices that are mainly append only might have // big segments that don't even have any docs in the soft deletes field. In such a case it's simply // enough to look at the FieldInfo for the field and check if the field has DocValues if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.NONE) { // in order to get accurate numbers we need to have a least one reader see here. onNewReader(readerIOSupplier.get(), info); } else { // we are safe here since we don't have any doc values for the soft-delete field on disk // no need to open a new reader dvGeneration = fieldInfo == null ? -1 : fieldInfo.getDocValuesGen(); } } } @Override boolean isFullyDeleted(IOSupplier<CodecReader> readerIOSupplier) throws IOException { ensureInitialized(readerIOSupplier); // initialize to ensure we have accurate counts - only needed in the soft-delete case return super.isFullyDeleted(readerIOSupplier); } private FieldInfos readFieldInfos() throws IOException { SegmentInfo segInfo = info.info; Directory dir = segInfo.dir; if (info.hasFieldUpdates() == false) { // updates always outside of CFS Closeable toClose; if (segInfo.getUseCompoundFile()) { toClose = dir = segInfo.getCodec().compoundFormat().getCompoundReader(segInfo.dir, segInfo, IOContext.READONCE); } else { toClose = null; dir = segInfo.dir; } try { return segInfo.getCodec().fieldInfosFormat().read(dir, segInfo, "", IOContext.READONCE); } finally { IOUtils.close(toClose); } } else { FieldInfosFormat fisFormat = segInfo.getCodec().fieldInfosFormat(); final String segmentSuffix = Long.toString(info.getFieldInfosGen(), Character.MAX_RADIX); return fisFormat.read(dir, segInfo, segmentSuffix, IOContext.READONCE); } } @Override Bits getHardLiveDocs() { return hardDeletes.getLiveDocs(); } @Override boolean mustInitOnDelete() { return liveDocsInitialized == false; } static int countSoftDeletes(DocIdSetIterator softDeletedDocs, Bits hardDeletes) throws IOException { int count = 0; if (softDeletedDocs != null) { int doc; while ((doc = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (hardDeletes == null || hardDeletes.get(doc)) { count++; } } } return count; } }