package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.PriorityQueue;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.BytesRef;
public class FieldHighlighter {
protected final String field;
protected final FieldOffsetStrategy fieldOffsetStrategy;
protected final BreakIterator breakIterator;
protected final PassageScorer passageScorer;
protected final int maxPassages;
protected final int maxNoHighlightPassages;
protected final PassageFormatter passageFormatter;
public FieldHighlighter(String field, FieldOffsetStrategy fieldOffsetStrategy, BreakIterator breakIterator,
PassageScorer passageScorer, int maxPassages, int maxNoHighlightPassages,
PassageFormatter passageFormatter) {
this.field = field;
this.fieldOffsetStrategy = fieldOffsetStrategy;
this.breakIterator = breakIterator;
this.passageScorer = passageScorer;
this.maxPassages = maxPassages;
this.maxNoHighlightPassages = maxNoHighlightPassages;
this.passageFormatter = passageFormatter;
}
public String getField() {
return field;
}
public UnifiedHighlighter.OffsetSource getOffsetSource() {
return fieldOffsetStrategy.getOffsetSource();
}
public Object highlightFieldForDoc(LeafReader reader, int docId, String content) throws IOException {
if (content.length() == 0) {
return null;
}
breakIterator.setText(content);
try (OffsetsEnum offsetsEnums = fieldOffsetStrategy.getOffsetsEnum(reader, docId, content)) {
Passage[] passages = highlightOffsetsEnums(offsetsEnums);
if (passages.length == 0) {
passages = getSummaryPassagesNoHighlight(maxNoHighlightPassages == -1 ? maxPassages : maxNoHighlightPassages);
}
if (passages.length > 0) {
return passageFormatter.format(passages, content);
} else {
return null;
}
}
}
protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
assert breakIterator.current() == breakIterator.first();
List<Passage> passages = new ArrayList<>(Math.min(maxPassages, 10));
int pos = breakIterator.current();
assert pos == 0;
while (passages.size() < maxPassages) {
int next = breakIterator.next();
if (next == BreakIterator.DONE) {
break;
}
Passage passage = new Passage();
passage.setStartOffset(pos);
passage.setEndOffset(next);
passages.add(passage);
pos = next;
}
return passages.toArray(new Passage[passages.size()]);
}
protected Passage[] highlightOffsetsEnums(OffsetsEnum off)
throws IOException {
final int contentLength = this.breakIterator.getText().getEndIndex();
if (off.nextPosition() == false) {
return new Passage[0];
}
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> {
if (left.getScore() < right.getScore()) {
return -1;
} else if (left.getScore() > right.getScore()) {
return 1;
} else {
return left.getStartOffset() - right.getStartOffset();
}
});
Passage passage = new Passage();
do {
int start = off.startOffset();
if (start == -1) {
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
}
int end = off.endOffset();
if (start < contentLength && end > contentLength) {
continue;
}
if (start >= passage.getEndOffset()) {
passage = maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
if (start >= contentLength) {
break;
}
passage.setStartOffset(Math.max(this.breakIterator.preceding(start + 1), 0));
passage.setEndOffset(Math.min(this.breakIterator.following(start), contentLength));
}
BytesRef term = off.getTerm();
assert term != null;
passage.addMatch(start, end, term, off.freq());
} while (off.nextPosition());
maybeAddPassage(passageQueue, passageScorer, passage, contentLength);
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset));
return passages;
}
private Passage maybeAddPassage(PriorityQueue<Passage> passageQueue, PassageScorer scorer, Passage passage, int contentLength) {
if (passage.getStartOffset() == -1) {
return passage;
}
passage.setScore(scorer.score(passage, contentLength));
if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) {
passage.reset();
} else {
passageQueue.offer(passage);
if (passageQueue.size() > maxPassages) {
passage = passageQueue.poll();
passage.reset();
} else {
passage = new Passage();
}
}
return passage;
}
}