/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef;

Field that indexes a string value and a weight as a weighted completion against a named suggester. Field is tokenized, not stored and stores documents, frequencies and positions. Field can be used to provide near real time document suggestions.

Besides the usual Analyzers, CompletionAnalyzer can be used to tune suggest field only parameters (e.g. preserving token separators, preserving position increments when converting the token stream to an automaton)

Example indexing usage:

document.add(new SuggestField(name, "suggestion", 4));
To perform document suggestions based on the this field, use SuggestIndexSearcher.suggest(CompletionQuery, int, boolean)
@lucene.experimental
/** * <p> * Field that indexes a string value and a weight as a weighted completion * against a named suggester. * Field is tokenized, not stored and stores documents, frequencies and positions. * Field can be used to provide near real time document suggestions. * </p> * <p> * Besides the usual {@link org.apache.lucene.analysis.Analyzer}s, * {@link CompletionAnalyzer} * can be used to tune suggest field only parameters * (e.g. preserving token separators, preserving position increments * when converting the token stream to an automaton) * </p> * <p> * Example indexing usage: * <pre class="prettyprint"> * document.add(new SuggestField(name, "suggestion", 4)); * </pre> * To perform document suggestions based on the this field, use * {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} * * @lucene.experimental */
public class SuggestField extends Field {
Default field type for suggest field
/** Default field type for suggest field */
public static final FieldType FIELD_TYPE = new FieldType(); static { FIELD_TYPE.setTokenized(true); FIELD_TYPE.setStored(false); FIELD_TYPE.setStoreTermVectors(false); FIELD_TYPE.setOmitNorms(false); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); FIELD_TYPE.freeze(); } static final byte TYPE = 0; private final BytesRef surfaceForm; private final int weight;
Creates a SuggestField
Params:
  • name – field name
  • value – field value to get suggestions on
  • weight – field weight
Throws:
  • IllegalArgumentException – if either the name or value is null, if value is an empty string, if the weight is negative, if value contains any reserved characters
/** * Creates a {@link SuggestField} * * @param name field name * @param value field value to get suggestions on * @param weight field weight * * @throws IllegalArgumentException if either the name or value is null, * if value is an empty string, if the weight is negative, if value contains * any reserved characters */
public SuggestField(String name, String value, int weight) { super(name, value, FIELD_TYPE); if (weight < 0) { throw new IllegalArgumentException("weight must be >= 0"); } if (value.length() == 0) { throw new IllegalArgumentException("value must have a length > 0"); } for (int i = 0; i < value.length(); i++) { if (isReserved(value.charAt(i))) { throw new IllegalArgumentException("Illegal input [" + value + "] UTF-16 codepoint [0x" + Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character"); } } this.surfaceForm = new BytesRef(value); this.weight = weight; } @Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { CompletionTokenStream completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse)); completionStream.setPayload(buildSuggestPayload()); return completionStream; }
Wraps a stream with a CompletionTokenStream. Subclasses can override this method to change the indexing pipeline.
/** * Wraps a <code>stream</code> with a CompletionTokenStream. * * Subclasses can override this method to change the indexing pipeline. */
protected CompletionTokenStream wrapTokenStream(TokenStream stream) { if (stream instanceof CompletionTokenStream) { return (CompletionTokenStream) stream; } else { return new CompletionTokenStream(stream); } }
Returns a byte to denote the type of the field
/** * Returns a byte to denote the type of the field */
protected byte type() { return TYPE; } private BytesRef buildSuggestPayload() { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) { output.writeVInt(surfaceForm.length); output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); output.writeVInt(weight + 1); output.writeByte(type()); } catch (IOException e) { throw new RuntimeException(e); // not possible, it's a ByteArrayOutputStream! } return new BytesRef(byteArrayOutputStream.toByteArray()); } private boolean isReserved(char c) { switch (c) { case ConcatenateGraphFilter.SEP_LABEL: case CompletionAnalyzer.HOLE_CHARACTER: case NRTSuggesterBuilder.END_BYTE: return true; default: return false; } } }