/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

SuggestField which additionally takes in a set of contexts. Example usage of adding a suggestion with contexts is as follows:
 document.add(
  new ContextSuggestField(name, "suggestion", Arrays.asList("context1", "context2"),  4));
Use ContextQuery to boost and/or filter suggestions at query-time. Use PrefixCompletionQuery, RegexCompletionQuery or FuzzyCompletionQuery if context boost/filtering are not needed.
@lucene.experimental
/** * {@link SuggestField} which additionally takes in a set of * contexts. Example usage of adding a suggestion with contexts is as follows: * * <pre class="prettyprint"> * document.add( * new ContextSuggestField(name, "suggestion", Arrays.asList("context1", "context2"), 4)); * </pre> * * Use {@link ContextQuery} to boost and/or filter suggestions * at query-time. Use {@link PrefixCompletionQuery}, {@link RegexCompletionQuery} * or {@link FuzzyCompletionQuery} if context boost/filtering * are not needed. * * @lucene.experimental */
public class ContextSuggestField extends SuggestField {
Separator used between context value and the suggest field value
/** * Separator used between context value and the suggest field value */
public static final int CONTEXT_SEPARATOR = '\u001D'; static final byte TYPE = 1; private final Set<CharSequence> contexts;
Creates a context-enabled suggest field
Params:
  • name – field name
  • value – field value to get suggestion on
  • weight – field weight
  • contexts – associated contexts
Throws:
  • IllegalArgumentException – if either the name or value is null, if value is an empty string, if the weight is negative, if value or contexts contains any reserved characters
/** * Creates a context-enabled suggest field * * @param name field name * @param value field value to get suggestion on * @param weight field weight * @param contexts associated contexts * * @throws IllegalArgumentException if either the name or value is null, * if value is an empty string, if the weight is negative, if value or * contexts contains any reserved characters */
public ContextSuggestField(String name, String value, int weight, CharSequence... contexts) { super(name, value, weight); validate(value); this.contexts = new HashSet<>((contexts != null) ? contexts.length : 0); if (contexts != null) { Collections.addAll(this.contexts, contexts); } }
Expert: Sub-classes can inject contexts at index-time
/** * Expert: Sub-classes can inject contexts at * index-time */
protected Iterable<CharSequence> contexts() { return contexts; } @Override protected CompletionTokenStream wrapTokenStream(TokenStream stream) { final Iterable<CharSequence> contexts = contexts(); for (CharSequence context : contexts) { validate(context); } CompletionTokenStream completionTokenStream; if (stream instanceof CompletionTokenStream) { //TODO this is awkward; is there a better way avoiding re-creating the chain? completionTokenStream = (CompletionTokenStream) stream; PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(completionTokenStream.inputTokenStream, (char) CONTEXT_SEPARATOR, contexts); completionTokenStream = new CompletionTokenStream(prefixTokenFilter, completionTokenStream.preserveSep, completionTokenStream.preservePositionIncrements, completionTokenStream.maxGraphExpansions); } else { completionTokenStream = new CompletionTokenStream(new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts)); } return completionTokenStream; } @Override protected byte type() { return TYPE; }
The PrefixTokenFilter wraps a TokenStream and adds a set prefixes ahead. The position attribute will not be incremented for the prefixes.
/** * The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set * prefixes ahead. The position attribute will not be incremented for the prefixes. */
private static final class PrefixTokenFilter extends TokenFilter { private final char separator; private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); private final Iterable<CharSequence> prefixes; private Iterator<CharSequence> currentPrefix;
Create a new PrefixTokenFilter
Params:
  • input – TokenStream to wrap
  • separator – Character used separate prefixes from other tokens
  • prefixes – Iterable of CharSequence which keeps all prefixes
/** * Create a new {@link PrefixTokenFilter} * * @param input {@link TokenStream} to wrap * @param separator Character used separate prefixes from other tokens * @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes */
public PrefixTokenFilter(TokenStream input, char separator, Iterable<CharSequence> prefixes) { super(input); this.prefixes = prefixes; this.currentPrefix = null; this.separator = separator; } @Override public boolean incrementToken() throws IOException { if (currentPrefix != null) { if (!currentPrefix.hasNext()) { return input.incrementToken(); } else { posAttr.setPositionIncrement(0); } } else { currentPrefix = prefixes.iterator(); termAttr.setEmpty(); posAttr.setPositionIncrement(1); } termAttr.setEmpty(); if (currentPrefix.hasNext()) { termAttr.append(currentPrefix.next()); } termAttr.append(separator); return true; } @Override public void reset() throws IOException { super.reset(); currentPrefix = null; } } private void validate(final CharSequence value) { for (int i = 0; i < value.length(); i++) { if (CONTEXT_SEPARATOR == value.charAt(i)) { throw new IllegalArgumentException("Illegal value [" + value + "] UTF-16 codepoint [0x" + Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character"); } } } }