/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis;

import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.AttributeSource;

An abstract TokenFilter that exposes its input stream as a graph Call incrementBaseToken() to move the root of the graph to the next position in the TokenStream, incrementGraphToken() to move along the current graph, and incrementGraph() to reset to the next graph based at the current root. For example, given the stream 'a b/c:2 d e`, then with the base token at 'a', incrementGraphToken() will produce the stream 'a b d e', and then after calling incrementGraph() will produce the stream 'a c e'.
/** * An abstract TokenFilter that exposes its input stream as a graph * * Call {@link #incrementBaseToken()} to move the root of the graph to the next * position in the TokenStream, {@link #incrementGraphToken()} to move along * the current graph, and {@link #incrementGraph()} to reset to the next graph * based at the current root. * * For example, given the stream 'a b/c:2 d e`, then with the base token at * 'a', incrementGraphToken() will produce the stream 'a b d e', and then * after calling incrementGraph() will produce the stream 'a c e'. */
public abstract class GraphTokenFilter extends TokenFilter { private final Deque<Token> tokenPool = new ArrayDeque<>(); private final List<Token> currentGraph = new ArrayList<>();
The maximum permitted number of routes through a graph
/** * The maximum permitted number of routes through a graph */
public static final int MAX_GRAPH_STACK_SIZE = 1000;
The maximum permitted read-ahead in the token stream
/** * The maximum permitted read-ahead in the token stream */
public static final int MAX_TOKEN_CACHE_SIZE = 100; private Token baseToken; private int graphDepth; private int graphPos; private int trailingPositions = -1; private int finalOffsets = -1; private int stackSize; private int cacheSize; private final PositionIncrementAttribute posIncAtt; private final OffsetAttribute offsetAtt;
Create a new GraphTokenFilter
/** * Create a new GraphTokenFilter */
public GraphTokenFilter(TokenStream input) { super(input); this.posIncAtt = input.addAttribute(PositionIncrementAttribute.class); this.offsetAtt = input.addAttribute(OffsetAttribute.class); }
Move the root of the graph to the next token in the wrapped TokenStream
Returns:false if the underlying stream is exhausted
/** * Move the root of the graph to the next token in the wrapped TokenStream * * @return {@code false} if the underlying stream is exhausted */
protected final boolean incrementBaseToken() throws IOException { stackSize = 0; graphDepth = 0; graphPos = 0; Token oldBase = baseToken; baseToken = nextTokenInStream(baseToken); if (baseToken == null) { return false; } currentGraph.clear(); currentGraph.add(baseToken); baseToken.attSource.copyTo(this); recycleToken(oldBase); return true; }
Move to the next token in the current route through the graph
Returns:false if there are not more tokens in the current graph
/** * Move to the next token in the current route through the graph * * @return {@code false} if there are not more tokens in the current graph */
protected final boolean incrementGraphToken() throws IOException { if (graphPos < graphDepth) { graphPos++; currentGraph.get(graphPos).attSource.copyTo(this); return true; } Token token = nextTokenInGraph(currentGraph.get(graphDepth)); if (token == null) { return false; } graphDepth++; graphPos++; currentGraph.add(graphDepth, token); token.attSource.copyTo(this); return true; }
Reset to the root token again, and move down the next route through the graph
Returns:false if there are no more routes through the graph
/** * Reset to the root token again, and move down the next route through the graph * * @return false if there are no more routes through the graph */
protected final boolean incrementGraph() throws IOException { if (baseToken == null) { return false; } graphPos = 0; for (int i = graphDepth; i >= 1; i--) { if (lastInStack(currentGraph.get(i)) == false) { currentGraph.set(i, nextTokenInStream(currentGraph.get(i))); for (int j = i + 1; j < graphDepth; j++) { currentGraph.set(j, nextTokenInGraph(currentGraph.get(j))); } if (stackSize++ > MAX_GRAPH_STACK_SIZE) { throw new IllegalStateException("Too many graph paths (> " + MAX_GRAPH_STACK_SIZE + ")"); } currentGraph.get(0).attSource.copyTo(this); graphDepth = i; return true; } } return false; }
Return the number of trailing positions at the end of the graph NB this should only be called after incrementGraphToken() has returned false
/** * Return the number of trailing positions at the end of the graph * * NB this should only be called after {@link #incrementGraphToken()} has returned {@code false} */
public int getTrailingPositions() { return trailingPositions; } @Override public void end() throws IOException { if (trailingPositions == -1) { input.end(); trailingPositions = posIncAtt.getPositionIncrement(); finalOffsets = offsetAtt.endOffset(); } else { endAttributes(); this.posIncAtt.setPositionIncrement(trailingPositions); this.offsetAtt.setOffset(finalOffsets, finalOffsets); } } @Override public void reset() throws IOException { input.reset(); // new attributes can be added between reset() calls, so we can't reuse // token objects from a previous run tokenPool.clear(); cacheSize = 0; graphDepth = 0; trailingPositions = -1; finalOffsets = -1; baseToken = null; } int cachedTokenCount() { return cacheSize; } private Token newToken() { if (tokenPool.size() == 0) { cacheSize++; if (cacheSize > MAX_TOKEN_CACHE_SIZE) { throw new IllegalStateException("Too many cached tokens (> " + MAX_TOKEN_CACHE_SIZE + ")"); } return new Token(this.cloneAttributes()); } Token token = tokenPool.removeFirst(); token.reset(input); return token; } private void recycleToken(Token token) { if (token == null) return; token.nextToken = null; tokenPool.add(token); } private Token nextTokenInGraph(Token token) throws IOException { int remaining = token.length(); do { token = nextTokenInStream(token); if (token == null) { return null; } remaining -= token.posInc(); } while (remaining > 0); return token; } // check if the next token in the tokenstream is at the same position as this one private boolean lastInStack(Token token) throws IOException { Token next = nextTokenInStream(token); return next == null || next.posInc() != 0; } private Token nextTokenInStream(Token token) throws IOException { if (token != null && token.nextToken != null) { return token.nextToken; } if (this.trailingPositions != -1) { // already hit the end return null; } if (input.incrementToken() == false) { input.end(); trailingPositions = posIncAtt.getPositionIncrement(); finalOffsets = offsetAtt.endOffset(); return null; } if (token == null) { return newToken(); } token.nextToken = newToken(); return token.nextToken; } private static class Token { final AttributeSource attSource; final PositionIncrementAttribute posIncAtt; final PositionLengthAttribute lengthAtt; Token nextToken; Token(AttributeSource attSource) { this.attSource = attSource; this.posIncAtt = attSource.addAttribute(PositionIncrementAttribute.class); boolean hasLengthAtt = attSource.hasAttribute(PositionLengthAttribute.class); this.lengthAtt = hasLengthAtt ? attSource.addAttribute(PositionLengthAttribute.class) : null; } int posInc() { return this.posIncAtt.getPositionIncrement(); } int length() { if (this.lengthAtt == null) { return 1; } return this.lengthAtt.getPositionLength(); } void reset(AttributeSource attSource) { attSource.copyTo(this.attSource); this.nextToken = null; } @Override public String toString() { return attSource.toString(); } } }