/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.highlight;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;

Scorer implementation which scores text fragments by the number of unique query terms found. This class converts appropriate Querys to SpanQuerys and attempts to score only those terms that participated in generating the 'hit' on the document.
/** * {@link Scorer} implementation which scores text fragments by the number of * unique query terms found. This class converts appropriate {@link Query}s to * {@link SpanQuery}s and attempts to score only those terms that participated in * generating the 'hit' on the document. */
public class QueryScorer implements Scorer { private float totalScore; private Set<String> foundTerms; private Map<String,WeightedSpanTerm> fieldWeightedSpanTerms; private float maxTermWeight; private int position = -1; private String defaultField; private CharTermAttribute termAtt; private PositionIncrementAttribute posIncAtt; private boolean expandMultiTermQuery = true; private Query query; private String field; private IndexReader reader; private boolean skipInitExtractor; private boolean wrapToCaching = true; private int maxCharsToAnalyze; private boolean usePayloads = false;
Params:
  • query – Query to use for highlighting
/** * @param query Query to use for highlighting */
public QueryScorer(Query query) { init(query, null, null, true); }
Params:
  • query – Query to use for highlighting
  • field – Field to highlight - pass null to ignore fields
/** * @param query Query to use for highlighting * @param field Field to highlight - pass null to ignore fields */
public QueryScorer(Query query, String field) { init(query, field, null, true); }
Params:
  • query – Query to use for highlighting
  • field – Field to highlight - pass null to ignore fields
  • reader – IndexReader to use for quasi tf/idf scoring
/** * @param query Query to use for highlighting * @param field Field to highlight - pass null to ignore fields * @param reader {@link IndexReader} to use for quasi tf/idf scoring */
public QueryScorer(Query query, IndexReader reader, String field) { init(query, field, reader, true); }
Params:
  • query – to use for highlighting
  • reader – IndexReader to use for quasi tf/idf scoring
  • field – to highlight - pass null to ignore fields
/** * @param query to use for highlighting * @param reader {@link IndexReader} to use for quasi tf/idf scoring * @param field to highlight - pass null to ignore fields */
public QueryScorer(Query query, IndexReader reader, String field, String defaultField) { this.defaultField = defaultField; init(query, field, reader, true); }
Params:
  • defaultField – - The default field for queries with the field name unspecified
/** * @param defaultField - The default field for queries with the field name unspecified */
public QueryScorer(Query query, String field, String defaultField) { this.defaultField = defaultField; init(query, field, null, true); }
Params:
/** * @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s */
public QueryScorer(WeightedSpanTerm[] weightedTerms) { this.fieldWeightedSpanTerms = new HashMap<>(weightedTerms.length); for (int i = 0; i < weightedTerms.length; i++) { WeightedSpanTerm existingTerm = fieldWeightedSpanTerms.get(weightedTerms[i].term); if ((existingTerm == null) || (existingTerm.weight < weightedTerms[i].weight)) { // if a term is defined more than once, always use the highest // scoring weight fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]); maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight()); } } skipInitExtractor = true; } /* * (non-Javadoc) * * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore() */ @Override public float getFragmentScore() { return totalScore; }
Returns:The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale).
/** * * @return The highest weighted term (useful for passing to * GradientFormatter to set top end of coloring scale). */
public float getMaxTermWeight() { return maxTermWeight; } /* * (non-Javadoc) * * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token, * int) */ @Override public float getTokenScore() { position += posIncAtt.getPositionIncrement(); String termText = termAtt.toString(); WeightedSpanTerm weightedSpanTerm; if ((weightedSpanTerm = fieldWeightedSpanTerms.get( termText)) == null) { return 0; } if (weightedSpanTerm.positionSensitive && !weightedSpanTerm.checkPosition(position)) { return 0; } float score = weightedSpanTerm.getWeight(); // found a query term - is it unique in this doc? if (!foundTerms.contains(termText)) { totalScore += score; foundTerms.add(termText); } return score; } /* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ @Override public TokenStream init(TokenStream tokenStream) throws IOException { position = -1; termAtt = tokenStream.addAttribute(CharTermAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); if(!skipInitExtractor) { if(fieldWeightedSpanTerms != null) { fieldWeightedSpanTerms.clear(); } return initExtractor(tokenStream); } return null; }
Retrieve the WeightedSpanTerm for the specified token. Useful for passing Span information to a Fragmenter.
Params:
Returns:WeightedSpanTerm for token
/** * Retrieve the {@link WeightedSpanTerm} for the specified token. Useful for passing * Span information to a {@link Fragmenter}. * * @param token to get {@link WeightedSpanTerm} for * @return WeightedSpanTerm for token */
public WeightedSpanTerm getWeightedSpanTerm(String token) { return fieldWeightedSpanTerms.get(token); } /** */ private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) { this.reader = reader; this.expandMultiTermQuery = expandMultiTermQuery; this.query = query; this.field = field; } private TokenStream initExtractor(TokenStream tokenStream) throws IOException { WeightedSpanTermExtractor qse = newTermExtractor(defaultField); qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze); qse.setExpandMultiTermQuery(expandMultiTermQuery); qse.setWrapIfNotCachingTokenFilter(wrapToCaching); qse.setUsePayloads(usePayloads); if (reader == null) { this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query, 1f, tokenStream, field); } else { this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query, 1f, tokenStream, field, reader); } if(qse.isCachedTokenStream()) { return qse.getTokenStream(); } return null; } protected WeightedSpanTermExtractor newTermExtractor(String defaultField) { return new WeightedSpanTermExtractor(defaultField); } /* * (non-Javadoc) * * @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment) */ @Override public void startFragment(TextFragment newFragment) { foundTerms = new HashSet<>(); totalScore = 0; }
Returns:true if multi-term queries should be expanded
/** * @return true if multi-term queries should be expanded */
public boolean isExpandMultiTermQuery() { return expandMultiTermQuery; }
Controls whether or not multi-term queries are expanded against a MemoryIndex IndexReader.
Params:
  • expandMultiTermQuery – true if multi-term queries should be expanded
/** * Controls whether or not multi-term queries are expanded * against a {@link MemoryIndex} {@link IndexReader}. * * @param expandMultiTermQuery true if multi-term queries should be expanded */
public void setExpandMultiTermQuery(boolean expandMultiTermQuery) { this.expandMultiTermQuery = expandMultiTermQuery; }
Whether or not we should capture payloads in MemoryIndex at each position so that queries can access them. This does not apply to term vector based TokenStreams, which support payloads only when the term vector has them.
/** * Whether or not we should capture payloads in {@link MemoryIndex} at each position so that queries can access them. * This does not apply to term vector based TokenStreams, which support payloads only when the term vector has them. */
public boolean isUsePayloads() { return usePayloads; } public void setUsePayloads(boolean usePayloads) { this.usePayloads = usePayloads; }
By default, TokenStreams that are not of the type CachingTokenFilter are wrapped in a CachingTokenFilter to ensure an efficient reset - if you are already using a different caching TokenStream impl and you don't want it to be wrapped, set this to false. Note that term-vector based tokenstreams are detected and won't be wrapped either.
/** * By default, {@link TokenStream}s that are not of the type * {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to * ensure an efficient reset - if you are already using a different caching * {@link TokenStream} impl and you don't want it to be wrapped, set this to * false. Note that term-vector based tokenstreams are detected and won't be * wrapped either. */
public void setWrapIfNotCachingTokenFilter(boolean wrap) { this.wrapToCaching = wrap; } public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) { this.maxCharsToAnalyze = maxDocCharsToAnalyze; } }