/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs;


import java.io.IOException;

import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;

Extension of PostingsWriterBase, adding a push API for writing each element of the postings. This API is somewhat analogous to an XML SAX API, while PostingsWriterBase is more like an XML DOM API.
See Also:
@lucene.experimental
/** * Extension of {@link PostingsWriterBase}, adding a push * API for writing each element of the postings. This API * is somewhat analogous to an XML SAX API, while {@link * PostingsWriterBase} is more like an XML DOM API. * * @see PostingsReaderBase * @lucene.experimental */
// TODO: find a better name; this defines the API that the // terms dict impls use to talk to a postings impl. // TermsDict + PostingsReader/WriterBase == PostingsConsumer/Producer public abstract class PushPostingsWriterBase extends PostingsWriterBase { // Reused in writeTerm private PostingsEnum postingsEnum; private int enumFlags;
FieldInfo of current field being written.
/** {@link FieldInfo} of current field being written. */
protected FieldInfo fieldInfo;
IndexOptions of current field being written
/** {@link IndexOptions} of current field being written */
protected IndexOptions indexOptions;
True if the current field writes freqs.
/** True if the current field writes freqs. */
protected boolean writeFreqs;
True if the current field writes positions.
/** True if the current field writes positions. */
protected boolean writePositions;
True if the current field writes payloads.
/** True if the current field writes payloads. */
protected boolean writePayloads;
True if the current field writes offsets.
/** True if the current field writes offsets. */
protected boolean writeOffsets;
Sole constructor. (For invocation by subclass constructors, typically implicit.)
/** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */
protected PushPostingsWriterBase() { }
Return a newly created empty TermState
/** Return a newly created empty TermState */
public abstract BlockTermState newTermState() throws IOException;
Start a new term. Note that a matching call to finishTerm(BlockTermState) is done, only if the term has at least one document.
/** Start a new term. Note that a matching call to {@link * #finishTerm(BlockTermState)} is done, only if the term has at least one * document. */
public abstract void startTerm(NumericDocValues norms) throws IOException;
Finishes the current term. The provided BlockTermState contains the term's summary statistics, and will holds metadata from PBF when returned
/** Finishes the current term. The provided {@link * BlockTermState} contains the term's summary statistics, * and will holds metadata from PBF when returned */
public abstract void finishTerm(BlockTermState state) throws IOException;
Sets the current field for writing, and returns the fixed length of long[] metadata (which is fixed per field), called when the writing switches to another field.
/** * Sets the current field for writing, and returns the * fixed length of long[] metadata (which is fixed per * field), called when the writing switches to another field. */
@Override public int setField(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; indexOptions = fieldInfo.getIndexOptions(); writeFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; writePayloads = fieldInfo.hasPayloads(); if (writeFreqs == false) { enumFlags = 0; } else if (writePositions == false) { enumFlags = PostingsEnum.FREQS; } else if (writeOffsets == false) { if (writePayloads) { enumFlags = PostingsEnum.PAYLOADS; } else { enumFlags = PostingsEnum.POSITIONS; } } else { if (writePayloads) { enumFlags = PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS; } else { enumFlags = PostingsEnum.OFFSETS; } } return 0; } @Override public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException { NumericDocValues normValues; if (fieldInfo.hasNorms() == false) { normValues = null; } else { normValues = norms.getNorms(fieldInfo); } startTerm(normValues); postingsEnum = termsEnum.postings(postingsEnum, enumFlags); assert postingsEnum != null; int docFreq = 0; long totalTermFreq = 0; while (true) { int docID = postingsEnum.nextDoc(); if (docID == PostingsEnum.NO_MORE_DOCS) { break; } docFreq++; docsSeen.set(docID); int freq; if (writeFreqs) { freq = postingsEnum.freq(); totalTermFreq += freq; } else { freq = -1; } startDoc(docID, freq); if (writePositions) { for(int i=0;i<freq;i++) { int pos = postingsEnum.nextPosition(); BytesRef payload = writePayloads ? postingsEnum.getPayload() : null; int startOffset; int endOffset; if (writeOffsets) { startOffset = postingsEnum.startOffset(); endOffset = postingsEnum.endOffset(); } else { startOffset = -1; endOffset = -1; } addPosition(pos, payload, startOffset, endOffset); } } finishDoc(); } if (docFreq == 0) { return null; } else { BlockTermState state = newTermState(); state.docFreq = docFreq; state.totalTermFreq = writeFreqs ? totalTermFreq : -1; finishTerm(state); return state; } }
Adds a new doc in this term. freq will be -1 when term frequencies are omitted for the field.
/** Adds a new doc in this term. * <code>freq</code> will be -1 when term frequencies are omitted * for the field. */
public abstract void startDoc(int docID, int freq) throws IOException;
Add a new position and payload, and start/end offset. A null payload means no payload; a non-null payload with zero length also means no payload. Caller may reuse the BytesRef for the payload between calls (method must fully consume the payload). startOffset and endOffset will be -1 when offsets are not indexed.
/** Add a new position and payload, and start/end offset. A * null payload means no payload; a non-null payload with * zero length also means no payload. Caller may reuse * the {@link BytesRef} for the payload between calls * (method must fully consume the payload). <code>startOffset</code> * and <code>endOffset</code> will be -1 when offsets are not indexed. */
public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
Called when we are done adding positions and payloads for each doc.
/** Called when we are done adding positions and payloads * for each doc. */
public abstract void finishDoc() throws IOException; }