/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.index;


import java.io.IOException;
import java.util.Objects;
import java.util.function.Supplier;

import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOSupplier;

This MergePolicy allows to carry over soft deleted documents across merges. The policy wraps the merge reader and marks documents as "live" that have a value in the soft delete field and match the provided query. This allows for instance to keep documents alive based on time or any other constraint in the index. The main purpose for this merge policy is to implement retention policies for document modification to vanish in the index. Using this merge policy allows to control when soft deletes are claimed by merges.
@lucene.experimental
/** * This {@link MergePolicy} allows to carry over soft deleted documents across merges. The policy wraps * the merge reader and marks documents as "live" that have a value in the soft delete field and match the * provided query. This allows for instance to keep documents alive based on time or any other constraint in the index. * The main purpose for this merge policy is to implement retention policies for document modification to vanish in the * index. Using this merge policy allows to control when soft deletes are claimed by merges. * @lucene.experimental */
public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMergePolicy { private final String field; private final Supplier<Query> retentionQuerySupplier;
Params:
  • field – the soft deletes field
  • retentionQuerySupplier – a query supplier for the retention query
  • in – the wrapped MergePolicy
/** * Creates a new {@link SoftDeletesRetentionMergePolicy} * @param field the soft deletes field * @param retentionQuerySupplier a query supplier for the retention query * @param in the wrapped MergePolicy */
public SoftDeletesRetentionMergePolicy(String field, Supplier<Query> retentionQuerySupplier, MergePolicy in) { super(in, toWrap -> new MergePolicy.OneMerge(toWrap.segments) { @Override public CodecReader wrapForMerge(CodecReader reader) throws IOException { CodecReader wrapped = toWrap.wrapForMerge(reader); Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { // no deletes - just keep going return wrapped; } return applyRetentionQuery(field, retentionQuerySupplier.get(), wrapped); } }); Objects.requireNonNull(field, "field must not be null"); Objects.requireNonNull(retentionQuerySupplier, "retentionQuerySupplier must not be null"); this.field = field; this.retentionQuerySupplier = retentionQuerySupplier; } @Override public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) throws IOException { CodecReader reader = readerIOSupplier.get(); /* we only need a single hit to keep it no need for soft deletes to be checked*/ Scorer scorer = getScorer(retentionQuerySupplier.get(), FilterCodecReader.wrapLiveDocs(reader, null, reader.maxDoc())); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); boolean atLeastOneHit = iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; return atLeastOneHit; } return super.keepFullyDeletedSegment(readerIOSupplier) ; } // pkg private for testing static CodecReader applyRetentionQuery(String softDeleteField, Query retentionQuery, CodecReader reader) throws IOException { Bits liveDocs = reader.getLiveDocs(); if (liveDocs == null) { // no deletes - just keep going return reader; } CodecReader wrappedReader = FilterCodecReader.wrapLiveDocs(reader, new Bits() { // only search deleted @Override public boolean get(int index) { return liveDocs.get(index) == false; } @Override public int length() { return liveDocs.length(); } }, reader.maxDoc() - reader.numDocs()); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new DocValuesFieldExistsQuery(softDeleteField), BooleanClause.Occur.FILTER); builder.add(retentionQuery, BooleanClause.Occur.FILTER); Scorer scorer = getScorer(builder.build(), wrappedReader); if (scorer != null) { FixedBitSet cloneLiveDocs = FixedBitSet.copyOf(liveDocs); DocIdSetIterator iterator = scorer.iterator(); int numExtraLiveDocs = 0; while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (cloneLiveDocs.getAndSet(iterator.docID()) == false) { // if we bring one back to live we need to account for it numExtraLiveDocs++; } } assert reader.numDocs() + numExtraLiveDocs <= reader.maxDoc() : "numDocs: " + reader.numDocs() + " numExtraLiveDocs: " + numExtraLiveDocs + " maxDoc: " + reader.maxDoc(); return FilterCodecReader.wrapLiveDocs(reader, cloneLiveDocs, reader.numDocs() + numExtraLiveDocs); } else { return reader; } } private static Scorer getScorer(Query query, CodecReader reader) throws IOException { IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); Weight weight = s.createWeight(s.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1.0f); return weight.scorer(reader.getContext()); } @Override public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier<CodecReader> readerSupplier) throws IOException { final int numDeletesToMerge = super.numDeletesToMerge(info, delCount, readerSupplier); if (numDeletesToMerge != 0 && info.getSoftDelCount() > 0) { final CodecReader reader = readerSupplier.get(); if (reader.getLiveDocs() != null) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new DocValuesFieldExistsQuery(field), BooleanClause.Occur.FILTER); builder.add(retentionQuerySupplier.get(), BooleanClause.Occur.FILTER); Scorer scorer = getScorer(builder.build(), FilterCodecReader.wrapLiveDocs(reader, null, reader.maxDoc())); if (scorer != null) { DocIdSetIterator iterator = scorer.iterator(); Bits liveDocs = reader.getLiveDocs(); int numDeletedDocs = reader.numDeletedDocs(); while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (liveDocs.get(iterator.docID()) == false) { numDeletedDocs--; } } return numDeletedDocs; } } } assert numDeletesToMerge >= 0 : "numDeletesToMerge: " + numDeletesToMerge; assert numDeletesToMerge <= info.info.maxDoc() : "numDeletesToMerge: " + numDeletesToMerge + " maxDoc:" + info.info.maxDoc(); return numDeletesToMerge; } }