package org.apache.lucene.search.vectorhighlight;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
import org.apache.lucene.util.MergedIterator;
public class FieldPhraseList {
LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<>();
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){
this (fieldTermStack, fieldQuery, Integer.MAX_VALUE);
}
public List<WeightedPhraseInfo> getPhraseList() {
return phraseList;
}
public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit ){
final String field = fieldTermStack.getFieldName();
LinkedList<TermInfo> phraseCandidate = new LinkedList<>();
QueryPhraseMap currMap = null;
QueryPhraseMap nextMap = null;
while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) )
{
phraseCandidate.clear();
TermInfo ti = null;
TermInfo first = null;
first = ti = fieldTermStack.pop();
currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
while (currMap == null && ti.getNext() != first) {
ti = ti.getNext();
currMap = fieldQuery.getFieldTermMap( field, ti.getText() );
}
if( currMap == null ) continue;
phraseCandidate.add( ti );
while( true ){
first = ti = fieldTermStack.pop();
nextMap = null;
if( ti != null ) {
nextMap = currMap.getTermMap( ti.getText() );
while (nextMap == null && ti.getNext() != first) {
ti = ti.getNext();
nextMap = currMap.getTermMap( ti.getText() );
}
}
if( ti == null || nextMap == null ){
if( ti != null )
fieldTermStack.push( ti );
if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
}
else{
while( phraseCandidate.size() > 1 ){
fieldTermStack.push( phraseCandidate.removeLast() );
currMap = fieldQuery.searchPhrase( field, phraseCandidate );
if( currMap != null ){
addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
break;
}
}
}
break;
}
else{
phraseCandidate.add( ti );
currMap = nextMap;
}
}
}
}
public FieldPhraseList( FieldPhraseList[] toMerge ) {
@SuppressWarnings( { "rawtypes", "unchecked" } )
Iterator< WeightedPhraseInfo >[] allInfos = new Iterator[ toMerge.length ];
int index = 0;
for ( FieldPhraseList fplToMerge : toMerge ) {
allInfos[ index++ ] = fplToMerge.phraseList.iterator();
}
MergedIterator< WeightedPhraseInfo > itr = new MergedIterator<>( false, allInfos );
phraseList = new LinkedList<>();
if ( !itr.hasNext() ) {
return;
}
List< WeightedPhraseInfo > work = new ArrayList<>();
WeightedPhraseInfo first = itr.next();
work.add( first );
int workEndOffset = first.getEndOffset();
while ( itr.hasNext() ) {
WeightedPhraseInfo current = itr.next();
if ( current.getStartOffset() <= workEndOffset ) {
workEndOffset = Math.max( workEndOffset, current.getEndOffset() );
work.add( current );
} else {
if ( work.size() == 1 ) {
phraseList.add( work.get( 0 ) );
work.set( 0, current );
} else {
phraseList.add( new WeightedPhraseInfo( work ) );
work.clear();
work.add( current );
}
workEndOffset = current.getEndOffset();
}
}
if ( work.size() == 1 ) {
phraseList.add( work.get( 0 ) );
} else {
phraseList.add( new WeightedPhraseInfo( work ) );
work.clear();
}
}
public void addIfNoOverlap( WeightedPhraseInfo wpi ){
for( WeightedPhraseInfo existWpi : getPhraseList() ){
if( existWpi.isOffsetOverlap( wpi ) ) {
existWpi.getTermsInfos().addAll( wpi.getTermsInfos() );
return;
}
}
getPhraseList().add( wpi );
}
public static class WeightedPhraseInfo implements Comparable< WeightedPhraseInfo > {
private List<Toffs> termsOffsets;
private float boost;
private int seqnum;
private ArrayList<TermInfo> termsInfos;
public String getText() {
StringBuilder text = new StringBuilder();
for ( TermInfo ti: termsInfos ) {
text.append( ti.getText() );
}
return text.toString();
}
public List<Toffs> getTermsOffsets() {
return termsOffsets;
}
public float getBoost() {
return boost;
}
public List<TermInfo> getTermsInfos() {
return termsInfos;
}
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
this( terms, boost, 0 );
}
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){
this.boost = boost;
this.seqnum = seqnum;
termsInfos = new ArrayList<>( terms );
termsOffsets = new ArrayList<>( terms.size() );
TermInfo ti = terms.get( 0 );
termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
if( terms.size() == 1 ){
return;
}
int pos = ti.getPosition();
for( int i = 1; i < terms.size(); i++ ){
ti = terms.get( i );
if( ti.getPosition() - pos == 1 ){
Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
to.setEndOffset( ti.getEndOffset() );
}
else{
termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
}
pos = ti.getPosition();
}
}
public WeightedPhraseInfo( Collection< WeightedPhraseInfo > toMerge ) {
Iterator< WeightedPhraseInfo > toMergeItr = toMerge.iterator();
if ( !toMergeItr.hasNext() ) {
throw new IllegalArgumentException( "toMerge must contain at least one WeightedPhraseInfo." );
}
WeightedPhraseInfo first = toMergeItr.next();
@SuppressWarnings( { "rawtypes", "unchecked" } )
Iterator< Toffs >[] allToffs = new Iterator[ toMerge.size() ];
termsInfos = new ArrayList<>();
seqnum = first.seqnum;
boost = first.boost;
allToffs[ 0 ] = first.termsOffsets.iterator();
int index = 1;
while ( toMergeItr.hasNext() ) {
WeightedPhraseInfo info = toMergeItr.next();
boost += info.boost;
termsInfos.addAll( info.termsInfos );
allToffs[ index++ ] = info.termsOffsets.iterator();
}
MergedIterator< Toffs > itr = new MergedIterator<>( false, allToffs );
termsOffsets = new ArrayList<>();
if ( !itr.hasNext() ) {
return;
}
Toffs work = itr.next();
while ( itr.hasNext() ) {
Toffs current = itr.next();
if ( current.startOffset <= work.endOffset ) {
work.endOffset = Math.max( work.endOffset, current.endOffset );
} else {
termsOffsets.add( work );
work = current;
}
}
termsOffsets.add( work );
}
public int getStartOffset(){
return termsOffsets.get( 0 ).startOffset;
}
public int getEndOffset(){
return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
}
public boolean isOffsetOverlap( WeightedPhraseInfo other ){
int so = getStartOffset();
int eo = getEndOffset();
int oso = other.getStartOffset();
int oeo = other.getEndOffset();
if( so <= oso && oso < eo ) return true;
if( so < oeo && oeo <= eo ) return true;
if( oso <= so && so < oeo ) return true;
if( oso < eo && eo <= oeo ) return true;
return false;
}
@Override
public String toString(){
StringBuilder sb = new StringBuilder();
sb.append( getText() ).append( '(' ).append( boost ).append( ")(" );
for( Toffs to : termsOffsets ){
sb.append( to );
}
sb.append( ')' );
return sb.toString();
}
public int getSeqnum() {
return seqnum;
}
@Override
public int compareTo( WeightedPhraseInfo other ) {
int diff = getStartOffset() - other.getStartOffset();
if ( diff != 0 ) {
return diff;
}
diff = getEndOffset() - other.getEndOffset();
if ( diff != 0 ) {
return diff;
}
return (int) Math.signum( getBoost() - other.getBoost() );
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + getStartOffset();
result = prime * result + getEndOffset();
long b = Double.doubleToLongBits( getBoost() );
result = prime * result + ( int )( b ^ ( b >>> 32 ) );
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
WeightedPhraseInfo other = (WeightedPhraseInfo) obj;
if (getStartOffset() != other.getStartOffset()) {
return false;
}
if (getEndOffset() != other.getEndOffset()) {
return false;
}
if (getBoost() != other.getBoost()) {
return false;
}
return true;
}
public static class Toffs implements Comparable< Toffs > {
private int startOffset;
private int endOffset;
public Toffs( int startOffset, int endOffset ){
this.startOffset = startOffset;
this.endOffset = endOffset;
}
public void setEndOffset( int endOffset ){
this.endOffset = endOffset;
}
public int getStartOffset(){
return startOffset;
}
public int getEndOffset(){
return endOffset;
}
@Override
public int compareTo( Toffs other ) {
int diff = getStartOffset() - other.getStartOffset();
if ( diff != 0 ) {
return diff;
}
return getEndOffset() - other.getEndOffset();
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + getStartOffset();
result = prime * result + getEndOffset();
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
Toffs other = (Toffs) obj;
if (getStartOffset() != other.getStartOffset()) {
return false;
}
if (getEndOffset() != other.getEndOffset()) {
return false;
}
return true;
}
@Override
public String toString(){
StringBuilder sb = new StringBuilder();
sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
return sb.toString();
}
}
}
}