package org.apache.lucene.search.uhighlight;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Predicate;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.Operations;
final class MultiTermHighlighting {
private MultiTermHighlighting() {
}
static CharacterRunAutomaton[] (Query query, Predicate<String> fieldMatcher, boolean lookInSpan) {
AutomataCollector collector = new AutomataCollector(lookInSpan, fieldMatcher);
query.visit(collector);
return collector.runAutomata.toArray(new CharacterRunAutomaton[0]);
}
public static boolean (Query query) {
return query instanceof AutomatonQuery || query instanceof FuzzyQuery;
}
private static class AutomataCollector extends QueryVisitor {
List<CharacterRunAutomaton> runAutomata = new ArrayList<>();
final boolean lookInSpan;
final Predicate<String> fieldMatcher;
private AutomataCollector(boolean lookInSpan, Predicate<String> fieldMatcher) {
this.lookInSpan = lookInSpan;
this.fieldMatcher = fieldMatcher;
}
@Override
public boolean acceptField(String field) {
return fieldMatcher.test(field);
}
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (lookInSpan == false && parent instanceof SpanQuery) {
return QueryVisitor.EMPTY_VISITOR;
}
return super.getSubVisitor(occur, parent);
}
@Override
public void visitLeaf(Query query) {
if (query instanceof AutomatonQuery) {
AutomatonQuery aq = (AutomatonQuery) query;
if (aq.isAutomatonBinary() == false) {
runAutomata.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return query.toString();
}
});
}
else {
runAutomata.add(binaryToCharRunAutomaton(aq.getAutomaton(), query.toString()));
}
}
else if (query instanceof FuzzyQuery) {
FuzzyQuery fq = (FuzzyQuery) query;
if (fq.getMaxEdits() == 0 || fq.getPrefixLength() >= fq.getTerm().text().length()) {
consumeTerms(query, fq.getTerm());
}
else {
runAutomata.add(new CharacterRunAutomaton(fq.toAutomaton()){
@Override
public String toString() {
return query.toString();
}
});
}
}
}
}
private static CharacterRunAutomaton binaryToCharRunAutomaton(Automaton binaryAutomaton, String description) {
return new CharacterRunAutomaton(Automata.makeEmpty()) {
ByteRunAutomaton byteRunAutomaton =
new ByteRunAutomaton(binaryAutomaton, true, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
@Override
public String toString() {
return description;
}
@Override
public boolean run(char[] chars, int offset, int length) {
int state = 0;
final int maxIdx = offset + length;
for (int i = offset; i < maxIdx; i++) {
final int code = chars[i];
int b;
if (code < 0x80) {
state = byteRunAutomaton.step(state, code);
if (state == -1) return false;
} else if (code < 0x800) {
b = (0xC0 | (code >> 6));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
b = (0x80 | (code & 0x3F));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
} else {
byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
state = byteRunAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
if (state == -1) return false;
}
break;
}
}
return byteRunAutomaton.isAccept(state);
}
};
}
}