/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.util.Collections;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.Version;

Holds all the configuration used by IndexWriter with few setters for settings that can be changed on an IndexWriter instance "live".
Since:4.0
/** * Holds all the configuration used by {@link IndexWriter} with few setters for * settings that can be changed on an {@link IndexWriter} instance "live". * * @since 4.0 */
public class LiveIndexWriterConfig { private final Analyzer analyzer; private volatile int maxBufferedDocs; private volatile double ramBufferSizeMB; private volatile IndexReaderWarmer mergedSegmentWarmer; // modified by IndexWriterConfig
IndexDeletionPolicy controlling when commit points are deleted.
/** {@link IndexDeletionPolicy} controlling when commit * points are deleted. */
protected volatile IndexDeletionPolicy delPolicy;
IndexCommit that IndexWriter is opened on.
/** {@link IndexCommit} that {@link IndexWriter} is * opened on. */
protected volatile IndexCommit commit;
OpenMode that IndexWriter is opened with.
/** {@link OpenMode} that {@link IndexWriter} is opened * with. */
protected volatile OpenMode openMode;
Compatibility version to use for this index.
/** Compatibility version to use for this index. */
protected int createdVersionMajor = Version.LATEST.major;
Similarity to use when encoding norms.
/** {@link Similarity} to use when encoding norms. */
protected volatile Similarity similarity;
MergeScheduler to use for running merges.
/** {@link MergeScheduler} to use for running merges. */
protected volatile MergeScheduler mergeScheduler;
IndexingChain that determines how documents are indexed.
/** {@link IndexingChain} that determines how documents are * indexed. */
protected volatile IndexingChain indexingChain;
Codec used to write new segments.
/** {@link Codec} used to write new segments. */
protected volatile Codec codec;
InfoStream for debugging messages.
/** {@link InfoStream} for debugging messages. */
protected volatile InfoStream infoStream;
MergePolicy for selecting merges.
/** {@link MergePolicy} for selecting merges. */
protected volatile MergePolicy mergePolicy;
DocumentsWriterPerThreadPool to control how threads are allocated to DocumentsWriterPerThread.
/** {@code DocumentsWriterPerThreadPool} to control how * threads are allocated to {@code DocumentsWriterPerThread}. */
protected volatile DocumentsWriterPerThreadPool indexerThreadPool;
True if readers should be pooled.
/** True if readers should be pooled. */
protected volatile boolean readerPooling;
FlushPolicy to control when segments are flushed.
/** {@link FlushPolicy} to control when segments are * flushed. */
protected volatile FlushPolicy flushPolicy;
Sets the hard upper bound on RAM usage for a single segment, after which the segment is forced to flush.
/** Sets the hard upper bound on RAM usage for a single * segment, after which the segment is forced to flush. */
protected volatile int perThreadHardLimitMB;
True if segment flushes should use compound file format
/** True if segment flushes should use compound file format */
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
True if calls to IndexWriter.close() should first do a commit.
/** True if calls to {@link IndexWriter#close()} should first do a commit. */
protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;
The sort order to use to write merged segments.
/** The sort order to use to write merged segments. */
protected Sort indexSort = null;
The field names involved in the index sort
/** The field names involved in the index sort */
protected Set<String> indexSortFields = Collections.emptySet();
if an indexing thread should check for pending flushes on update in order to help out on a full flush
/** if an indexing thread should check for pending flushes on update in order to help out on a full flush*/
protected volatile boolean checkPendingFlushOnUpdate = true;
soft deletes field
/** soft deletes field */
protected String softDeletesField = null;
the attributes for the NRT readers
/** the attributes for the NRT readers */
protected Map<String, String> readerAttributes = Collections.emptyMap(); // used by IndexWriterConfig LiveIndexWriterConfig(Analyzer analyzer) { this.analyzer = analyzer; ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; mergedSegmentWarmer = null; delPolicy = new KeepOnlyLastCommitDeletionPolicy(); commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; openMode = OpenMode.CREATE_OR_APPEND; similarity = IndexSearcher.getDefaultSimilarity(); mergeScheduler = new ConcurrentMergeScheduler(); indexingChain = DocumentsWriterPerThread.defaultIndexingChain; codec = Codec.getDefault(); if (codec == null) { throw new NullPointerException(); } infoStream = InfoStream.getDefault(); mergePolicy = new TieredMergePolicy(); flushPolicy = new FlushByRamOrCountsPolicy(); readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; indexerThreadPool = new DocumentsWriterPerThreadPool(); perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; }
Returns the default analyzer to use for indexing documents.
/** Returns the default analyzer to use for indexing documents. */
public Analyzer getAnalyzer() { return analyzer; }
Determines the amount of RAM that may be used for buffering added documents and deletions before they are flushed to the Directory. Generally for faster indexing performance it's best to flush by RAM usage instead of document count and use as large a RAM buffer as you can.

When this is set, the writer will flush whenever buffered documents and deletions use this much RAM. Pass in IndexWriterConfig.DISABLE_AUTO_FLUSH to prevent triggering a flush due to RAM usage. Note that if flushing by document count is also enabled, then the flush will be triggered by whichever comes first.

The maximum RAM limit is inherently determined by the JVMs available memory. Yet, an IndexWriter session can consume a significantly larger amount of memory than the given RAM limit since this limit is just an indicator when to flush memory resident documents to the Directory. Flushes are likely happen concurrently while other threads adding documents to the writer. For application stability the available memory in the JVM should be significantly larger than the RAM buffer used for indexing.

NOTE: the account of RAM usage for pending deletions is only approximate. Specifically, if you delete by Query, Lucene currently has no way to measure the RAM usage of individual Queries so the accounting will under-estimate and you should compensate by either calling commit() or refresh() periodically yourself.

NOTE: It's not guaranteed that all memory resident documents are flushed once this limit is exceeded. Depending on the configured FlushPolicy only a subset of the buffered documents are flushed and therefore only parts of the RAM buffer is released.

The default value is IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB.

Takes effect immediately, but only the next time a document is added, updated or deleted.

Throws:
  • IllegalArgumentException – if ramBufferSize is enabled but non-positive, or it disables ramBufferSize when maxBufferedDocs is already disabled
See Also:
/** * Determines the amount of RAM that may be used for buffering added documents * and deletions before they are flushed to the Directory. Generally for * faster indexing performance it's best to flush by RAM usage instead of * document count and use as large a RAM buffer as you can. * <p> * When this is set, the writer will flush whenever buffered documents and * deletions use this much RAM. Pass in * {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent triggering a flush * due to RAM usage. Note that if flushing by document count is also enabled, * then the flush will be triggered by whichever comes first. * <p> * The maximum RAM limit is inherently determined by the JVMs available * memory. Yet, an {@link IndexWriter} session can consume a significantly * larger amount of memory than the given RAM limit since this limit is just * an indicator when to flush memory resident documents to the Directory. * Flushes are likely happen concurrently while other threads adding documents * to the writer. For application stability the available memory in the JVM * should be significantly larger than the RAM buffer used for indexing. * <p> * <b>NOTE</b>: the account of RAM usage for pending deletions is only * approximate. Specifically, if you delete by Query, Lucene currently has no * way to measure the RAM usage of individual Queries so the accounting will * under-estimate and you should compensate by either calling commit() or refresh() * periodically yourself. * <p> * <b>NOTE</b>: It's not guaranteed that all memory resident documents are * flushed once this limit is exceeded. Depending on the configured * {@link FlushPolicy} only a subset of the buffered documents are flushed and * therefore only parts of the RAM buffer is released. * <p> * * The default value is {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB}. * * <p> * Takes effect immediately, but only the next time a document is added, * updated or deleted. * * @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int) * * @throws IllegalArgumentException * if ramBufferSize is enabled but non-positive, or it disables * ramBufferSize when maxBufferedDocs is already disabled */
public synchronized LiveIndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) { throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled"); } if (ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH) { throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); } this.ramBufferSizeMB = ramBufferSizeMB; return this; }
Returns the value set by setRAMBufferSizeMB(double) if enabled.
/** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
public double getRAMBufferSizeMB() { return ramBufferSizeMB; }
Determines the minimal number of documents required before the buffered in-memory documents are flushed as a new Segment. Large values generally give faster indexing.

When this is set, the writer will flush every maxBufferedDocs added documents. Pass in IndexWriterConfig.DISABLE_AUTO_FLUSH to prevent triggering a flush due to number of buffered documents. Note that if flushing by RAM usage is also enabled, then the flush will be triggered by whichever comes first.

Disabled by default (writer flushes by RAM usage).

Takes effect immediately, but only the next time a document is added, updated or deleted.

Throws:
  • IllegalArgumentException – if maxBufferedDocs is enabled but smaller than 2, or it disables maxBufferedDocs when ramBufferSize is already disabled
See Also:
/** * Determines the minimal number of documents required before the buffered * in-memory documents are flushed as a new Segment. Large values generally * give faster indexing. * * <p> * When this is set, the writer will flush every maxBufferedDocs added * documents. Pass in {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent * triggering a flush due to number of buffered documents. Note that if * flushing by RAM usage is also enabled, then the flush will be triggered by * whichever comes first. * * <p> * Disabled by default (writer flushes by RAM usage). * * <p> * Takes effect immediately, but only the next time a document is added, * updated or deleted. * * @see #setRAMBufferSizeMB(double) * @throws IllegalArgumentException * if maxBufferedDocs is enabled but smaller than 2, or it disables * maxBufferedDocs when ramBufferSize is already disabled */
public synchronized LiveIndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) { if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) { throw new IllegalArgumentException("maxBufferedDocs must at least be 2 when enabled"); } if (maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH) { throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); } this.maxBufferedDocs = maxBufferedDocs; return this; }
Returns the number of buffered added documents that will trigger a flush if enabled.
See Also:
  • setMaxBufferedDocs(int)
/** * Returns the number of buffered added documents that will trigger a flush if * enabled. * * @see #setMaxBufferedDocs(int) */
public int getMaxBufferedDocs() { return maxBufferedDocs; }
Expert: MergePolicy is invoked whenever there are changes to the segments in the index. Its role is to select which merges to do, if any, and return a MergeSpecification describing the merges. It also selects merges to do for forceMerge.

Takes effect on subsequent merge selections. Any merges in flight or any merges already registered by the previous MergePolicy are not affected.

/** * Expert: {@link MergePolicy} is invoked whenever there are changes to the * segments in the index. Its role is to select which merges to do, if any, * and return a {@link MergePolicy.MergeSpecification} describing the merges. * It also selects merges to do for forceMerge. * * <p> * Takes effect on subsequent merge selections. Any merges in flight or any * merges already registered by the previous {@link MergePolicy} are not * affected. */
public LiveIndexWriterConfig setMergePolicy(MergePolicy mergePolicy) { if (mergePolicy == null) { throw new IllegalArgumentException("mergePolicy must not be null"); } this.mergePolicy = mergePolicy; return this; }
Set the merged segment warmer. See IndexReaderWarmer.

Takes effect on the next merge.

/** * Set the merged segment warmer. See {@link IndexReaderWarmer}. * * <p> * Takes effect on the next merge. */
public LiveIndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) { this.mergedSegmentWarmer = mergeSegmentWarmer; return this; }
Returns the current merged segment warmer. See IndexReaderWarmer.
/** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
public IndexReaderWarmer getMergedSegmentWarmer() { return mergedSegmentWarmer; } /** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { return openMode; }
Return the compatibility version to use for this index.
See Also:
  • setIndexCreatedVersionMajor.setIndexCreatedVersionMajor
/** * Return the compatibility version to use for this index. * @see IndexWriterConfig#setIndexCreatedVersionMajor */
public int getIndexCreatedVersionMajor() { return createdVersionMajor; } /** * Returns the {@link IndexDeletionPolicy} specified in * {@link IndexWriterConfig#setIndexDeletionPolicy(IndexDeletionPolicy)} or * the default {@link KeepOnlyLastCommitDeletionPolicy}/ */ public IndexDeletionPolicy getIndexDeletionPolicy() { return delPolicy; }
Returns the IndexCommit as specified in IndexWriterConfig.setIndexCommit(IndexCommit) or the default, null which specifies to open the latest index commit point.
/** * Returns the {@link IndexCommit} as specified in * {@link IndexWriterConfig#setIndexCommit(IndexCommit)} or the default, * {@code null} which specifies to open the latest index commit point. */
public IndexCommit getIndexCommit() { return commit; }
Expert: returns the Similarity implementation used by this IndexWriter.
/** * Expert: returns the {@link Similarity} implementation used by this * {@link IndexWriter}. */
public Similarity getSimilarity() { return similarity; } /** * Returns the {@link MergeScheduler} that was set by * {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)}. */ public MergeScheduler getMergeScheduler() { return mergeScheduler; }
Returns the current Codec.
/** Returns the current {@link Codec}. */
public Codec getCodec() { return codec; }
Returns the current MergePolicy in use by this writer.
See Also:
  • setMergePolicy.setMergePolicy(MergePolicy)
/** * Returns the current MergePolicy in use by this writer. * * @see IndexWriterConfig#setMergePolicy(MergePolicy) */
public MergePolicy getMergePolicy() { return mergePolicy; }
Returns the configured DocumentsWriterPerThreadPool instance.
See Also:
Returns:the configured DocumentsWriterPerThreadPool instance.
/** * Returns the configured {@link DocumentsWriterPerThreadPool} instance. * * @see IndexWriterConfig#setIndexerThreadPool(DocumentsWriterPerThreadPool) * @return the configured {@link DocumentsWriterPerThreadPool} instance. */
DocumentsWriterPerThreadPool getIndexerThreadPool() { return indexerThreadPool; }
Returns true if IndexWriter should pool readers even if DirectoryReader.open(IndexWriter) has not been called.
/** * Returns {@code true} if {@link IndexWriter} should pool readers even if * {@link DirectoryReader#open(IndexWriter)} has not been called. */
public boolean getReaderPooling() { return readerPooling; }
Returns the indexing chain.
/** * Returns the indexing chain. */
IndexingChain getIndexingChain() { return indexingChain; }
Returns the max amount of memory each DocumentsWriterPerThread can consume until forcefully flushed.
See Also:
/** * Returns the max amount of memory each {@link DocumentsWriterPerThread} can * consume until forcefully flushed. * * @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int) */
public int getRAMPerThreadHardLimitMB() { return perThreadHardLimitMB; }
See Also:
  • setFlushPolicy.setFlushPolicy(FlushPolicy)
/** * @see IndexWriterConfig#setFlushPolicy(FlushPolicy) */
FlushPolicy getFlushPolicy() { return flushPolicy; }
Returns InfoStream used for debugging.
See Also:
/** Returns {@link InfoStream} used for debugging. * * @see IndexWriterConfig#setInfoStream(InfoStream) */
public InfoStream getInfoStream() { return infoStream; }
Sets if the IndexWriter should pack newly written segments in a compound file. Default is true.

Use false for batch indexing with very large ram buffer settings.

Note: To control compound file usage during segment merges see MergePolicy.setNoCFSRatio(double) and MergePolicy.setMaxCFSSegmentSizeMB(double). This setting only applies to newly created segments.

/** * Sets if the {@link IndexWriter} should pack newly written segments in a * compound file. Default is <code>true</code>. * <p> * Use <code>false</code> for batch indexing with very large ram buffer * settings. * </p> * <p> * <b>Note: To control compound file usage during segment merges see * {@link MergePolicy#setNoCFSRatio(double)} and * {@link MergePolicy#setMaxCFSSegmentSizeMB(double)}. This setting only * applies to newly created segments.</b> * </p> */
public LiveIndexWriterConfig setUseCompoundFile(boolean useCompoundFile) { this.useCompoundFile = useCompoundFile; return this; }
Returns true iff the IndexWriter packs newly written segments in a compound file. Default is true.
/** * Returns <code>true</code> iff the {@link IndexWriter} packs * newly written segments in a compound file. Default is <code>true</code>. */
public boolean getUseCompoundFile() { return useCompoundFile ; }
Returns true if IndexWriter.close() should first commit before closing.
/** * Returns <code>true</code> if {@link IndexWriter#close()} should first commit before closing. */
public boolean getCommitOnClose() { return commitOnClose; }
Get the index-time Sort order, applied to all (flushed and merged) segments.
/** * Get the index-time {@link Sort} order, applied to all (flushed and merged) segments. */
public Sort getIndexSort() { return indexSort; }
Returns the field names involved in the index sort
/** * Returns the field names involved in the index sort */
public Set<String> getIndexSortFields() { return indexSortFields; }
Expert: Returns if indexing threads check for pending flushes on update in order to help our flushing indexing buffers to disk
@lucene.experimental
/** * Expert: Returns if indexing threads check for pending flushes on update in order * to help our flushing indexing buffers to disk * @lucene.experimental */
public boolean isCheckPendingFlushOnUpdate() { return checkPendingFlushOnUpdate; }
Expert: sets if indexing threads check for pending flushes on update in order to help our flushing indexing buffers to disk. As a consequence, threads calling DirectoryReader.openIfChanged(DirectoryReader, IndexWriter) or IndexWriter.flush() will be the only thread writing segments to disk unless flushes are falling behind. If indexing is stalled due to too many pending flushes indexing threads will help our writing pending segment flushes to disk.
@lucene.experimental
/** * Expert: sets if indexing threads check for pending flushes on update in order * to help our flushing indexing buffers to disk. As a consequence, threads calling * {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter)} or {@link IndexWriter#flush()} will * be the only thread writing segments to disk unless flushes are falling behind. If indexing is stalled * due to too many pending flushes indexing threads will help our writing pending segment flushes to disk. * * @lucene.experimental */
public LiveIndexWriterConfig setCheckPendingFlushUpdate(boolean checkPendingFlushOnUpdate) { this.checkPendingFlushOnUpdate = checkPendingFlushOnUpdate; return this; }
Returns the soft deletes field or null if soft-deletes are disabled. See IndexWriterConfig.setSoftDeletesField(String) for details.
/** * Returns the soft deletes field or <code>null</code> if soft-deletes are disabled. * See {@link IndexWriterConfig#setSoftDeletesField(String)} for details. */
public String getSoftDeletesField() { return softDeletesField; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n"); sb.append("ramBufferSizeMB=").append(getRAMBufferSizeMB()).append("\n"); sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n"); sb.append("mergedSegmentWarmer=").append(getMergedSegmentWarmer()).append("\n"); sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n"); IndexCommit commit = getIndexCommit(); sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); sb.append("openMode=").append(getOpenMode()).append("\n"); sb.append("similarity=").append(getSimilarity().getClass().getName()).append("\n"); sb.append("mergeScheduler=").append(getMergeScheduler()).append("\n"); sb.append("codec=").append(getCodec()).append("\n"); sb.append("infoStream=").append(getInfoStream().getClass().getName()).append("\n"); sb.append("mergePolicy=").append(getMergePolicy()).append("\n"); sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n"); sb.append("readerPooling=").append(getReaderPooling()).append("\n"); sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n"); sb.append("commitOnClose=").append(getCommitOnClose()).append("\n"); sb.append("indexSort=").append(getIndexSort()).append("\n"); sb.append("checkPendingFlushOnUpdate=").append(isCheckPendingFlushOnUpdate()).append("\n"); sb.append("softDeletesField=").append(getSoftDeletesField()).append("\n"); sb.append("readerAttributes=").append(getReaderAttributes()).append("\n"); return sb.toString(); }
Returns the reader attributes passed to all published readers opened on or within the IndexWriter
/** * Returns the reader attributes passed to all published readers opened on or within the IndexWriter */
public Map<String, String> getReaderAttributes() { return this.readerAttributes; } }