/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.analyzing;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;

// TODO: move to core?  nobody else uses it yet though...

Exposes a utility method to enumerate all paths intersecting an Automaton with an FST.
/** * Exposes a utility method to enumerate all paths * intersecting an {@link Automaton} with an {@link FST}. */
public class FSTUtil { private FSTUtil() { }
Holds a pair (automaton, fst) of states and accumulated output in the intersected machine.
/** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */
public static final class Path<T> {
Node in the automaton where path ends:
/** Node in the automaton where path ends: */
public final int state;
Node in the FST where path ends:
/** Node in the FST where path ends: */
public final FST.Arc<T> fstNode;
Output of the path so far:
/** Output of the path so far: */
public final T output;
Input of the path so far:
/** Input of the path so far: */
public final IntsRefBuilder input;
Sole constructor.
/** Sole constructor. */
public Path(int state, FST.Arc<T> fstNode, T output, IntsRefBuilder input) { this.state = state; this.fstNode = fstNode; this.output = output; this.input = input; } }
Enumerates all minimal prefix paths in the automaton that also intersect the FST, accumulating the FST end node and output for each path.
/** * Enumerates all minimal prefix paths in the automaton that also intersect the FST, * accumulating the FST end node and output for each path. */
public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst) throws IOException { assert a.isDeterministic(); final List<Path<T>> queue = new ArrayList<>(); final List<Path<T>> endNodes = new ArrayList<>(); if (a.getNumStates() == 0) { return endNodes; } queue.add(new Path<>(0, fst .getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(), new IntsRefBuilder())); final FST.Arc<T> scratchArc = new FST.Arc<>(); final FST.BytesReader fstReader = fst.getBytesReader(); Transition t = new Transition(); while (queue.size() != 0) { final Path<T> path = queue.remove(queue.size() - 1); if (a.isAccept(path.state)) { endNodes.add(path); // we can stop here if we accept this path, // we accept all further paths too continue; } IntsRefBuilder currentInput = path.input; int count = a.initTransition(path.state, t); for (int i=0;i<count;i++) { a.getNextTransition(t); final int min = t.min; final int max = t.max; if (min == max) { final FST.Arc<T> nextArc = fst.findTargetArc(t.min, path.fstNode, scratchArc, fstReader); if (nextArc != null) { final IntsRefBuilder newInput = new IntsRefBuilder(); newInput.copyInts(currentInput.get()); newInput.append(t.min); queue.add(new Path<>(t.dest, new FST.Arc<T>() .copyFrom(nextArc), fst.outputs .add(path.output, nextArc.output), newInput)); } } else { // TODO: if this transition's TO state is accepting, and // it accepts the entire range possible in the FST (ie. 0 to 255), // we can simply use the prefix as the accepted state instead of // looking up all the ranges and terminate early // here. This just shifts the work from one queue // (this one) to another (the completion search // done in AnalyzingSuggester). FST.Arc<T> nextArc = Util.readCeilArc(min, fst, path.fstNode, scratchArc, fstReader); while (nextArc != null && nextArc.label <= max) { assert nextArc.label <= max; assert nextArc.label >= min : nextArc.label + " " + min; final IntsRefBuilder newInput = new IntsRefBuilder(); newInput.copyInts(currentInput.get()); newInput.append(nextArc.label); queue.add(new Path<>(t.dest, new FST.Arc<T>() .copyFrom(nextArc), fst.outputs .add(path.output, nextArc.output), newInput)); final int label = nextArc.label; // used in assert nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader); assert nextArc == null || label < nextArc.label : "last: " + label + " next: " + nextArc.label; } } } } return endNodes; } }