/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs;

import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.BytesRef;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

Codec API for writing stored fields:
  1. For every document, startDocument() is called, informing the Codec that a new document has started.
  2. writeField(FieldInfo, IndexableField) is called for each field in the document.
  3. After all documents have been written, finish(FieldInfos, int) is called for verification/sanity-checks.
  4. Finally the writer is closed (close())
@lucene.experimental
/** * Codec API for writing stored fields: * <ol> * <li>For every document, {@link #startDocument()} is called, * informing the Codec that a new document has started. * <li>{@link #writeField(FieldInfo, IndexableField)} is called for * each field in the document. * <li>After all documents have been written, {@link #finish(FieldInfos, int)} * is called for verification/sanity-checks. * <li>Finally the writer is closed ({@link #close()}) * </ol> * * @lucene.experimental */
public abstract class StoredFieldsWriter implements Closeable {
Sole constructor. (For invocation by subclass constructors, typically implicit.)
/** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */
protected StoredFieldsWriter() { }
Called before writing the stored fields of the document. writeField(FieldInfo, IndexableField) will be called for each stored field. Note that this is called even if the document has no stored fields.
/** Called before writing the stored fields of the document. * {@link #writeField(FieldInfo, IndexableField)} will be called * for each stored field. Note that this is * called even if the document has no stored fields. */
public abstract void startDocument() throws IOException;
Called when a document and all its fields have been added.
/** Called when a document and all its fields have been added. */
public void finishDocument() throws IOException {}
Writes a single stored field.
/** Writes a single stored field. */
public abstract void writeField(FieldInfo info, IndexableField field) throws IOException;
Called before close(), passing in the number of documents that were written. Note that this is intentionally redundant (equivalent to the number of calls to startDocument(), but a Codec should check that this is the case to detect the JRE bug described in LUCENE-1282.
/** Called before {@link #close()}, passing in the number * of documents that were written. Note that this is * intentionally redundant (equivalent to the number of * calls to {@link #startDocument()}, but a Codec should * check that this is the case to detect the JRE bug described * in LUCENE-1282. */
public abstract void finish(FieldInfos fis, int numDocs) throws IOException; private static class StoredFieldsMergeSub extends DocIDMerger.Sub { private final StoredFieldsReader reader; private final int maxDoc; private final MergeVisitor visitor; int docID = -1; public StoredFieldsMergeSub(MergeVisitor visitor, MergeState.DocMap docMap, StoredFieldsReader reader, int maxDoc) { super(docMap); this.maxDoc = maxDoc; this.reader = reader; this.visitor = visitor; } @Override public int nextDoc() { docID++; if (docID == maxDoc) { return NO_MORE_DOCS; } else { return docID; } } }
Merges in the stored fields from the readers in mergeState. The default implementation skips over deleted documents, and uses startDocument(), writeField(FieldInfo, IndexableField), and finish(FieldInfos, int), returning the number of documents that were written. Implementations can override this method for more sophisticated merging (bulk-byte copying, etc).
/** Merges in the stored fields from the readers in * <code>mergeState</code>. The default implementation skips * over deleted documents, and uses {@link #startDocument()}, * {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException { List<StoredFieldsMergeSub> subs = new ArrayList<>(); for(int i=0;i<mergeState.storedFieldsReaders.length;i++) { StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i]; storedFieldsReader.checkIntegrity(); subs.add(new StoredFieldsMergeSub(new MergeVisitor(mergeState, i), mergeState.docMaps[i], storedFieldsReader, mergeState.maxDocs[i])); } final DocIDMerger<StoredFieldsMergeSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); int docCount = 0; while (true) { StoredFieldsMergeSub sub = docIDMerger.next(); if (sub == null) { break; } assert sub.mappedDocID == docCount; startDocument(); sub.reader.visitDocument(sub.docID, sub.visitor); finishDocument(); docCount++; } finish(mergeState.mergeFieldInfos, docCount); return docCount; }
A visitor that adds every field it sees.

Use like this:

MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
for (...) {
  startDocument();
  storedFieldsReader.visitDocument(docID, visitor);
  finishDocument();
}
/** * A visitor that adds every field it sees. * <p> * Use like this: * <pre> * MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex); * for (...) { * startDocument(); * storedFieldsReader.visitDocument(docID, visitor); * finishDocument(); * } * </pre> */
protected class MergeVisitor extends StoredFieldVisitor implements IndexableField { BytesRef binaryValue; String stringValue; Number numericValue; FieldInfo currentField; FieldInfos remapper;
Create new merge visitor.
/** * Create new merge visitor. */
public MergeVisitor(MergeState mergeState, int readerIndex) { // if field numbers are aligned, we can save hash lookups // on every field access. Otherwise, we need to lookup // fieldname each time, and remap to a new number. for (FieldInfo fi : mergeState.fieldInfos[readerIndex]) { FieldInfo other = mergeState.mergeFieldInfos.fieldInfo(fi.number); if (other == null || !other.name.equals(fi.name)) { remapper = mergeState.mergeFieldInfos; break; } } } @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { reset(fieldInfo); // TODO: can we avoid new BR here? binaryValue = new BytesRef(value); write(); } @Override public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { reset(fieldInfo); // TODO: can we avoid new String here? stringValue = new String(value, StandardCharsets.UTF_8); write(); } @Override public void intField(FieldInfo fieldInfo, int value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public void longField(FieldInfo fieldInfo, long value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public void floatField(FieldInfo fieldInfo, float value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public void doubleField(FieldInfo fieldInfo, double value) throws IOException { reset(fieldInfo); numericValue = value; write(); } @Override public Status needsField(FieldInfo fieldInfo) throws IOException { return Status.YES; } @Override public String name() { return currentField.name; } @Override public IndexableFieldType fieldType() { return StoredField.TYPE; } @Override public BytesRef binaryValue() { return binaryValue; } @Override public String stringValue() { return stringValue; } @Override public Number numericValue() { return numericValue; } @Override public Reader readerValue() { return null; } @Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { return null; } void reset(FieldInfo field) { if (remapper != null) { // field numbers are not aligned, we need to remap to the new field number currentField = remapper.fieldInfo(field.name); } else { currentField = field; } binaryValue = null; stringValue = null; numericValue = null; } void write() throws IOException { writeField(currentField, this); } } @Override public abstract void close() throws IOException; }