/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.payloads;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;

Experimental class to get set of payloads for most standard Lucene queries. Operates like Highlighter - IndexReader should only contain doc of interest, best to use MemoryIndex.
@lucene.experimental
/** * Experimental class to get set of payloads for most standard Lucene queries. * Operates like Highlighter - IndexReader should only contain doc of interest, * best to use MemoryIndex. * * @lucene.experimental * */
public class PayloadSpanUtil { private IndexReaderContext context;
Params:
  • context – that contains doc with payloads to extract
See Also:
/** * @param context * that contains doc with payloads to extract * * @see IndexReader#getContext() */
public PayloadSpanUtil(IndexReaderContext context) { this.context = context; }
Query should be rewritten for wild/fuzzy support.
Params:
  • query – rewritten query
Throws:
Returns:payloads Collection
/** * Query should be rewritten for wild/fuzzy support. * * @param query rewritten query * @return payloads Collection * @throws IOException if there is a low-level I/O error */
public Collection<byte[]> getPayloadsForQuery(Query query) throws IOException { Collection<byte[]> payloads = new ArrayList<>(); queryToSpanQuery(query, payloads); return payloads; } private void queryToSpanQuery(Query query, Collection<byte[]> payloads) throws IOException { if (query instanceof BooleanQuery) { for (BooleanClause clause : (BooleanQuery) query) { if (!clause.isProhibited()) { queryToSpanQuery(clause.getQuery(), payloads); } } } else if (query instanceof PhraseQuery) { Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = ((PhraseQuery) query).getSlop(); boolean inorder = false; if (slop == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); getPayloads(payloads, sp); } else if (query instanceof TermQuery) { SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm()); getPayloads(payloads, stq); } else if (query instanceof SpanQuery) { getPayloads(payloads, (SpanQuery) query); } else if (query instanceof DisjunctionMaxQuery) { for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator .hasNext();) { queryToSpanQuery(iterator.next(), payloads); } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final Term[][] termArrays = mpq.getTermArrays(); final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings({"rawtypes","unchecked"}) final List<Query>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.length; ++i) { final Term[] termArray = termArrays[i]; List<Query> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<>( termArray.length)); ++distinctPositions; } for (final Term term : termArray) { disjuncts.add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<Query> disjuncts = disjunctLists[i]; if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts .toArray(new SpanQuery[disjuncts.size()])); } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); getPayloads(payloads, sp); } } } private void getPayloads(Collection<byte []> payloads, SpanQuery query) throws IOException { final IndexSearcher searcher = new IndexSearcher(context); searcher.setQueryCache(null); query = (SpanQuery) searcher.rewrite(query); SpanWeight w = (SpanWeight) searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1); PayloadSpanCollector collector = new PayloadSpanCollector(); for (LeafReaderContext leafReaderContext : context.leaves()) { final Spans spans = w.getSpans(leafReaderContext, SpanWeight.Postings.PAYLOADS); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { collector.reset(); spans.collect(collector); payloads.addAll(collector.getPayloads()); } } } } } }