package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
private final CharacterRunAutomaton[] combinedAutomata;
public TokenStreamOffsetStrategy(UHComponents components, Analyzer indexAnalyzer) {
super(components, indexAnalyzer);
assert components.getPhraseHelper().hasPositionSensitivity() == false;
combinedAutomata = convertTermsToAutomata(components.getTerms(), components.getAutomata());
}
private static CharacterRunAutomaton[] convertTermsToAutomata(BytesRef[] terms, CharacterRunAutomaton[] automata) {
CharacterRunAutomaton[] newAutomata = new CharacterRunAutomaton[terms.length + automata.length];
for (int i = 0; i < terms.length; i++) {
String termString = terms[i].utf8ToString();
newAutomata[i] = new CharacterRunAutomaton(Automata.makeString(termString)) {
@Override
public String toString() {
return termString;
}
};
}
System.arraycopy(automata, 0, newAutomata, terms.length, automata.length);
return newAutomata;
}
@Override
public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
return new TokenStreamOffsetsEnum(tokenStream(content), combinedAutomata);
}
private static class TokenStreamOffsetsEnum extends OffsetsEnum {
TokenStream stream;
final CharacterRunAutomaton[] matchers;
final CharTermAttribute charTermAtt;
final OffsetAttribute offsetAtt;
int currentMatch = -1;
final BytesRef matchDescriptions[];
TokenStreamOffsetsEnum(TokenStream ts, CharacterRunAutomaton[] matchers) throws IOException {
this.stream = ts;
this.matchers = matchers;
matchDescriptions = new BytesRef[matchers.length];
charTermAtt = ts.addAttribute(CharTermAttribute.class);
offsetAtt = ts.addAttribute(OffsetAttribute.class);
ts.reset();
}
@Override
public boolean nextPosition() throws IOException {
if (stream != null) {
while (stream.incrementToken()) {
for (int i = 0; i < matchers.length; i++) {
if (matchers[i].run(charTermAtt.buffer(), 0, charTermAtt.length())) {
currentMatch = i;
return true;
}
}
}
stream.end();
close();
}
return false;
}
@Override
public int freq() throws IOException {
return Integer.MAX_VALUE;
}
@Override
public int startOffset() throws IOException {
return offsetAtt.startOffset();
}
@Override
public int endOffset() throws IOException {
return offsetAtt.endOffset();
}
@Override
public BytesRef getTerm() throws IOException {
if (matchDescriptions[currentMatch] == null) {
matchDescriptions[currentMatch] = new BytesRef(matchers[currentMatch].toString());
}
return matchDescriptions[currentMatch];
}
@Override
public void close() throws IOException {
if (stream != null) {
stream.close();
stream = null;
}
}
}
}