org.apache.lucene/lucene-suggest/8.2.0 : org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java

CompletionPostingsFormat
http://lucene.apache.org/lucene-parent/lucene-suggest: Lucene Suggest Module (The Apache Software Foundation)
Apache 2
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import java.io.IOException;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.fst.FST;

 A PostingsFormat which supports document suggestion based on indexed SuggestFields. Document suggestion is based on an weighted FST which map analyzed terms of a SuggestField to its surface form and document id. 

Files:

  .lkp: Completion Dictionary
  .cmp: Completion Index



Completion Dictionary
The .lkp file contains an FST for each suggest field


  CompletionDict (.lkp) --> Header, FST^{NumSuggestFields}, Footer
  Header --> CodecHeader
  
  FST --> FST<Long, BytesRef>
  Footer --> CodecFooter

Notes:

  Header is a CodecHeader storing the version information for the Completion implementation.
  FST maps all analyzed forms to surface forms of a SuggestField


Completion Index
The .cmp file contains an index into the completion dictionary, so that it can be
accessed randomly.

  CompletionIndex (.cmp) --> Header, NumSuggestFields, Entry^{NumSuggestFields}, Footer
  Header --> CodecHeader
  NumSuggestFields --> Uint32
  Entry --> FieldNumber, CompletionDictionaryOffset, MinWeight, MaxWeight, Type
  FieldNumber --> Uint32
  CompletionDictionaryOffset -->  Uint64
  MinWeight -->  Uint64
  MaxWeight -->  Uint64
  Type -->  Byte
  Footer --> CodecFooter

Notes:

  Header is a CodecHeader storing the version information for the Completion implementation.
  NumSuggestFields is the number of suggest fields indexed
  FieldNumber is the fields number from FieldInfos. (.fnm)
  CompletionDictionaryOffset is the file offset of a field's FST in CompletionDictionary (.lkp)
  MinWeight and MaxWeight are the global minimum and maximum weight for the field
  Type indicates if the suggester has context or not

@lucene.experimental /**
 * <p>
 * A {@link PostingsFormat} which supports document suggestion based on
 * indexed {@link SuggestField}s.
 * Document suggestion is based on an weighted FST which map analyzed
 * terms of a {@link SuggestField} to its surface form and document id.
 * </p>
 * <p>
 * Files:
 * <ul>
 *   <li><tt>.lkp</tt>: <a href="#Completiondictionary">Completion Dictionary</a></li>
 *   <li><tt>.cmp</tt>: <a href="#Completionindex">Completion Index</a></li>
 * </ul>
 * <p>
 * <a name="Completionictionary"></a>
 * <h3>Completion Dictionary</h3>
 * <p>The .lkp file contains an FST for each suggest field
 * </p>
 * <ul>
 *   <li>CompletionDict (.lkp) --&gt; Header, FST<sup>NumSuggestFields</sup>, Footer</li>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <!-- TODO: should the FST output be mentioned at all? -->
 *   <li>FST --&gt; {@link FST FST&lt;Long, BytesRef&gt;}</li>
 *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *   <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
 *     for the Completion implementation.</li>
 *   <li>FST maps all analyzed forms to surface forms of a SuggestField</li>
 * </ul>
 * <a name="Completionindex"></a>
 * <h3>Completion Index</h3>
 * <p>The .cmp file contains an index into the completion dictionary, so that it can be
 * accessed randomly.</p>
 * <ul>
 *   <li>CompletionIndex (.cmp) --&gt; Header, NumSuggestFields, Entry<sup>NumSuggestFields</sup>, Footer</li>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>NumSuggestFields --&gt; {@link DataOutput#writeVInt Uint32}</li>
 *   <li>Entry --&gt; FieldNumber, CompletionDictionaryOffset, MinWeight, MaxWeight, Type</li>
 *   <li>FieldNumber --&gt; {@link DataOutput#writeVInt Uint32}</li>
 *   <li>CompletionDictionaryOffset --&gt; {@link DataOutput#writeVLong  Uint64}</li>
 *   <li>MinWeight --&gt; {@link DataOutput#writeVLong  Uint64}</li>
 *   <li>MaxWeight --&gt; {@link DataOutput#writeVLong  Uint64}</li>
 *   <li>Type --&gt; {@link DataOutput#writeByte  Byte}</li>
 *   <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
 *   <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
 *     for the Completion implementation.</li>
 *   <li>NumSuggestFields is the number of suggest fields indexed</li>
 *   <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li>
 *   <li>CompletionDictionaryOffset is the file offset of a field's FST in CompletionDictionary (.lkp)</li>
 *   <li>MinWeight and MaxWeight are the global minimum and maximum weight for the field</li>
 *   <li>Type indicates if the suggester has context or not</li>
 * </ul>
 *
 * @lucene.experimental
 */
public abstract class CompletionPostingsFormat extends PostingsFormat {

  static final String CODEC_NAME = "completion";
  static final int COMPLETION_CODEC_VERSION = 1;
  static final int COMPLETION_VERSION_CURRENT = COMPLETION_CODEC_VERSION;
  static final String INDEX_EXTENSION = "cmp";
  static final String DICT_EXTENSION = "lkp";

  An enum that allows to control if suggester FSTs are loaded into memory or read off-heap
/**
   * An enum that allows to control if suggester FSTs are loaded into memory or read off-heap
   */
  public enum FSTLoadMode {
    Always read FSTs from disk.
NOTE: If this option is used the FST will be read off-heap even if buffered directory implementations
are used.
/**
     * Always read FSTs from disk.
     * NOTE: If this option is used the FST will be read off-heap even if buffered directory implementations
     * are used.
     */
    OFF_HEAP,
    Never read FSTs from disk ie. all suggest fields FSTs are loaded into memory
/**
     * Never read FSTs from disk ie. all suggest fields FSTs are loaded into memory
     */
    ON_HEAP,
    Automatically make the decision if FSTs are read from disk depending if the segment read from an MMAPDirectory
/**
     * Automatically make the decision if FSTs are read from disk depending if the segment read from an MMAPDirectory
     */
    AUTO
  }

  private final FSTLoadMode fstLoadMode;

  Used only by core Lucene at read-time via Service Provider instantiation
/**
   * Used only by core Lucene at read-time via Service Provider instantiation
   */
  public CompletionPostingsFormat() {
    this(FSTLoadMode.ON_HEAP);
  }

  Creates a CompletionPostingsFormat that will use the provided fstLoadMode to determine
if the completion FST should be loaded on or off heap.
/**
   * Creates a {@link CompletionPostingsFormat} that will
   * use the provided <code>fstLoadMode</code> to determine
   * if the completion FST should be loaded on or off heap.
   */
  public CompletionPostingsFormat(FSTLoadMode fstLoadMode) {
    super(CODEC_NAME);
    this.fstLoadMode = fstLoadMode;
  }

  Concrete implementation should specify the delegating postings format
/**
   * Concrete implementation should specify the delegating postings format
   */
  protected abstract PostingsFormat delegatePostingsFormat();

  @Override
  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    PostingsFormat delegatePostingsFormat = delegatePostingsFormat();
    if (delegatePostingsFormat == null) {
      throw new UnsupportedOperationException("Error - " + getClass().getName()
          + " has been constructed without a choice of PostingsFormat");
    }
    return new CompletionFieldsConsumer(delegatePostingsFormat, state);
  }

  @Override
  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
    return new CompletionFieldsProducer(state, fstLoadMode);
  }
}
/

org.apache.lucene/ lucene-suggest/ 8.2.0/ org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java

Completion Dictionary

Completion Index