/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;

import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;

FieldTermStack is a stack that keeps query terms in the specified field of the document to be highlighted.
/** * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field * of the document to be highlighted. */
public class FieldTermStack { private final String fieldName; LinkedList<TermInfo> termList = new LinkedList<>(); //public static void main( String[] args ) throws Exception { // Analyzer analyzer = new WhitespaceAnalyzer(Version.LATEST); // QueryParser parser = new QueryParser(Version.LATEST, "f", analyzer ); // Query query = parser.parse( "a x:b" ); // FieldQuery fieldQuery = new FieldQuery( query, true, false ); // Directory dir = new RAMDirectory(); // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LATEST, analyzer)); // Document doc = new Document(); // FieldType ft = new FieldType(TextField.TYPE_STORED); // ft.setStoreTermVectors(true); // ft.setStoreTermVectorOffsets(true); // ft.setStoreTermVectorPositions(true); // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); // doc.add( new Field( "f", ft, "b a b a f" ) ); // writer.addDocument( doc ); // writer.close(); // IndexReader reader = IndexReader.open(dir1); // new FieldTermStack( reader, 0, "f", fieldQuery ); // reader.close(); //}
a constructor.
Params:
  • reader – IndexReader of the index
  • docId – document id to be highlighted
  • fieldName – field of the document to be highlighted
  • fieldQuery – FieldQuery object
Throws:
/** * a constructor. * * @param reader IndexReader of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fieldQuery FieldQuery object * @throws IOException If there is a low-level I/O error */
public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException { this.fieldName = fieldName; Set<String> termSet = fieldQuery.getTermSet( fieldName ); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if( termSet == null ) return; final Fields vectors = reader.getTermVectors(docId); if (vectors == null) { // null snippet return; } final Terms vector = vectors.terms(fieldName); if (vector == null || vector.hasPositions() == false) { // null snippet return; } final CharsRefBuilder spare = new CharsRefBuilder(); final TermsEnum termsEnum = vector.iterator(); PostingsEnum dpEnum = null; BytesRef text; int numDocs = reader.maxDoc(); while ((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (!termSet.contains(term)) { continue; } dpEnum = termsEnum.postings(dpEnum, PostingsEnum.POSITIONS); dpEnum.nextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html final float weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( new Term(fieldName, text) ) + 1 ) ) + 1.0 ); final int freq = dpEnum.freq(); for(int i = 0;i < freq;i++) { int pos = dpEnum.nextPosition(); if (dpEnum.startOffset() < 0) { return; // no offsets, null snippet } termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight ) ); } } // sort by position Collections.sort(termList); // now look for dups at the same position, linking them together int currentPos = -1; TermInfo previous = null; TermInfo first = null; Iterator<TermInfo> iterator = termList.iterator(); while (iterator.hasNext()) { TermInfo current = iterator.next(); if (current.position == currentPos) { assert previous != null; previous.setNext(current); previous = current; iterator.remove(); } else { if (previous != null) { previous.setNext(first); } previous = first = current; currentPos = current.position; } } if (previous != null) { previous.setNext(first); } }
Returns:field name
/** * @return field name */
public String getFieldName(){ return fieldName; }
Returns:the top TermInfo object of the stack
/** * @return the top TermInfo object of the stack */
public TermInfo pop(){ return termList.poll(); }
Params:
  • termInfo – the TermInfo object to be put on the top of the stack
/** * @param termInfo the TermInfo object to be put on the top of the stack */
public void push( TermInfo termInfo ){ termList.push( termInfo ); }
to know whether the stack is empty
Returns:true if the stack is empty, false if not
/** * to know whether the stack is empty * * @return true if the stack is empty, false if not */
public boolean isEmpty(){ return termList == null || termList.size() == 0; }
Single term with its position/offsets in the document and IDF weight. It is Comparable but considers only position.
/** * Single term with its position/offsets in the document and IDF weight. * It is Comparable but considers only position. */
public static class TermInfo implements Comparable<TermInfo>{ private final String text; private final int startOffset; private final int endOffset; private final int position; // IDF-weight of this term private final float weight; // pointer to other TermInfo's at the same position. // this is a circular list, so with no syns, just points to itself private TermInfo next; TermInfo(String text, int startOffset, int endOffset, int position, float weight){ this.text = text; this.startOffset = startOffset; this.endOffset = endOffset; this.position = position; this.weight = weight; this.next = this; } void setNext(TermInfo next) { this.next = next; }
Returns the next TermInfo at this same position. This is a circular list!
/** * Returns the next TermInfo at this same position. * This is a circular list! */
public TermInfo getNext() { return next; } public String getText(){ return text; } public int getStartOffset(){ return startOffset; } public int getEndOffset(){ return endOffset; } public int getPosition(){ return position; } public float getWeight(){ return weight; } @Override public String toString(){ return text + '(' + startOffset + ',' + endOffset + ',' + position + ')'; } @Override public int compareTo( TermInfo o ){ return ( this.position - o.position ); } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + position; return result; } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } TermInfo other = (TermInfo) obj; return position == other.position; } } }