/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;

final class SortingTermVectorsConsumer extends TermVectorsConsumer {
  TrackingTmpOutputDirectoryWrapper tmpDirectory;

  public SortingTermVectorsConsumer(DocumentsWriterPerThread docWriter) {
    super(docWriter);
  }

  @Override
  void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
    super.flush(fieldsToFlush, state, sortMap, norms);
    if (tmpDirectory != null) {
      if (sortMap == null) {
        // we're lucky the index is already sorted, just rename the temporary file and return
        for (Map.Entry<String, String> entry : tmpDirectory.getTemporaryFiles().entrySet()) {
          tmpDirectory.rename(entry.getValue(), entry.getKey());
        }
        return;
      }
      TermVectorsReader reader = docWriter.codec.termVectorsFormat()
          .vectorsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
      TermVectorsReader mergeReader = reader.getMergeInstance();
      TermVectorsWriter writer = docWriter.codec.termVectorsFormat()
          .vectorsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
      try {
        reader.checkIntegrity();
        for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
          Fields vectors = mergeReader.get(sortMap.newToOld(docID));
          writeTermVectors(writer, vectors, state.fieldInfos);
        }
        writer.finish(state.fieldInfos, state.segmentInfo.maxDoc());
      } finally {
        IOUtils.close(reader, writer);
        IOUtils.deleteFiles(tmpDirectory,
            tmpDirectory.getTemporaryFiles().values());
      }
    }
  }

  @Override
  void initTermVectorsWriter() throws IOException {
    if (writer == null) {
      IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
      tmpDirectory = new TrackingTmpOutputDirectoryWrapper(docWriter.directory);
      writer = docWriter.codec.termVectorsFormat().vectorsWriter(tmpDirectory, docWriter.getSegmentInfo(), context);
      lastDocID = 0;
    }
  }

  @Override
  public void abort() {
    try {
      super.abort();
    } finally {
      if (tmpDirectory != null) {
        IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
            tmpDirectory.getTemporaryFiles().values());
      }
    }
  }

  
Safe (but, slowish) default method to copy every vector field in the provided TermVectorsWriter.
/** Safe (but, slowish) default method to copy every vector field in the provided {@link TermVectorsWriter}. */
private static void writeTermVectors(TermVectorsWriter writer, Fields vectors, FieldInfos fieldInfos) throws IOException { if (vectors == null) { writer.startDocument(0); writer.finishDocument(); return; } int numFields = vectors.size(); if (numFields == -1) { // count manually! TODO: Maybe enforce that Fields.size() returns something valid? numFields = 0; for (final Iterator<String> it = vectors.iterator(); it.hasNext(); ) { it.next(); numFields++; } } writer.startDocument(numFields); String lastFieldName = null; TermsEnum termsEnum = null; PostingsEnum docsAndPositionsEnum = null; int fieldCount = 0; for(String fieldName : vectors) { fieldCount++; final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName); assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName; lastFieldName = fieldName; final Terms terms = vectors.terms(fieldName); if (terms == null) { // FieldsEnum shouldn't lie... continue; } final boolean hasPositions = terms.hasPositions(); final boolean hasOffsets = terms.hasOffsets(); final boolean hasPayloads = terms.hasPayloads(); assert !hasPayloads || hasPositions; int numTerms = (int) terms.size(); if (numTerms == -1) { // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function numTerms = 0; termsEnum = terms.iterator(); while(termsEnum.next() != null) { numTerms++; } } writer.startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads); termsEnum = terms.iterator(); int termCount = 0; while(termsEnum.next() != null) { termCount++; final int freq = (int) termsEnum.totalTermFreq(); writer.startTerm(termsEnum.term(), freq); if (hasPositions || hasOffsets) { docsAndPositionsEnum = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS | PostingsEnum.PAYLOADS); assert docsAndPositionsEnum != null; final int docID = docsAndPositionsEnum.nextDoc(); assert docID != DocIdSetIterator.NO_MORE_DOCS; assert docsAndPositionsEnum.freq() == freq; for(int posUpto=0; posUpto<freq; posUpto++) { final int pos = docsAndPositionsEnum.nextPosition(); final int startOffset = docsAndPositionsEnum.startOffset(); final int endOffset = docsAndPositionsEnum.endOffset(); final BytesRef payload = docsAndPositionsEnum.getPayload(); assert !hasPositions || pos >= 0 ; writer.addPosition(pos, startOffset, endOffset, payload); } } writer.finishTerm(); } assert termCount == numTerms; writer.finishField(); } assert fieldCount == numFields; writer.finishDocument(); } }