/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet.taxonomy;

import java.io.IOException;
import java.util.Map;

import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig.DimConfig;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.TopOrdAndIntQueue;

import com.carrotsearch.hppc.IntIntScatterMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;

Base class for all taxonomy-based facets that aggregate to a per-ords int[].
/** Base class for all taxonomy-based facets that aggregate * to a per-ords int[]. */
public abstract class IntTaxonomyFacets extends TaxonomyFacets {
Per-ordinal value.
/** Per-ordinal value. */
private final int[] values; private final IntIntScatterMap sparseValues;
Sole constructor.
/** Sole constructor. */
protected IntTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { super(indexFieldName, taxoReader, config); if (useHashTable(fc, taxoReader)) { sparseValues = new IntIntScatterMap(); values = null; } else { sparseValues = null; values = new int[taxoReader.getSize()]; } }
Return true if a sparse hash table should be used for counting, instead of a dense int[].
/** Return true if a sparse hash table should be used for counting, instead of a dense int[]. */
protected boolean useHashTable(FacetsCollector fc, TaxonomyReader taxoReader) { if (taxoReader.getSize() < 1024) { // small number of unique values: use an array return false; } if (fc == null) { // counting all docs: use an array return false; } int maxDoc = 0; int sumTotalHits = 0; for (MatchingDocs docs : fc.getMatchingDocs()) { sumTotalHits += docs.totalHits; maxDoc += docs.context.reader().maxDoc(); } // if our result set is < 10% of the index, we collect sparsely (use hash map): return sumTotalHits < maxDoc/10; }
Increment the count for this ordinal by 1.
/** Increment the count for this ordinal by 1. */
protected void increment(int ordinal) { increment(ordinal, 1); }
Increment the count for this ordinal by amount..
/** Increment the count for this ordinal by {@code amount}.. */
protected void increment(int ordinal, int amount) { if (sparseValues != null) { sparseValues.addTo(ordinal, amount); } else { values[ordinal] += amount; } } private int getValue(int ordinal) { if (sparseValues != null) { return sparseValues.get(ordinal); } else { return values[ordinal]; } }
Rolls up any single-valued hierarchical dimensions.
/** Rolls up any single-valued hierarchical dimensions. */
protected void rollup() throws IOException { // Rollup any necessary dims: int[] children = null; for(Map.Entry<String,DimConfig> ent : config.getDimConfigs().entrySet()) { String dim = ent.getKey(); DimConfig ft = ent.getValue(); if (ft.hierarchical && ft.multiValued == false) { int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); // It can be -1 if this field was declared in the // config but never indexed: if (dimRootOrd > 0) { if (children == null) { // lazy init children = getChildren(); } increment(dimRootOrd, rollup(children[dimRootOrd])); } } } } private int rollup(int ord) throws IOException { int[] children = getChildren(); int[] siblings = getSiblings(); int sum = 0; while (ord != TaxonomyReader.INVALID_ORDINAL) { increment(ord, rollup(children[ord])); sum += getValue(ord); ord = siblings[ord]; } return sum; } @Override public Number getSpecificValue(String dim, String... path) throws IOException { DimConfig dimConfig = verifyDim(dim); if (path.length == 0) { if (dimConfig.hierarchical && dimConfig.multiValued == false) { // ok: rolled up at search time } else if (dimConfig.requireDimCount && dimConfig.multiValued) { // ok: we indexed all ords at index time } else { throw new IllegalArgumentException("cannot return dimension-level value alone; use getTopChildren instead"); } } int ord = taxoReader.getOrdinal(new FacetLabel(dim, path)); if (ord < 0) { return -1; } return getValue(ord); } @Override public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { if (topN <= 0) { throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); } DimConfig dimConfig = verifyDim(dim); FacetLabel cp = new FacetLabel(dim, path); int dimOrd = taxoReader.getOrdinal(cp); if (dimOrd == -1) { return null; } TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); int bottomValue = 0; int totValue = 0; int childCount = 0; TopOrdAndIntQueue.OrdAndValue reuse = null; // TODO: would be faster if we had a "get the following children" API? then we // can make a single pass over the hashmap if (sparseValues != null) { for (IntIntCursor c : sparseValues) { int count = c.value; int ord = c.key; if (parents[ord] == dimOrd && count > 0) { totValue += count; childCount++; if (count > bottomValue) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } reuse.ord = ord; reuse.value = count; reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomValue = q.top().value; } } } } } else { int[] children = getChildren(); int[] siblings = getSiblings(); int ord = children[dimOrd]; while(ord != TaxonomyReader.INVALID_ORDINAL) { int value = values[ord]; if (value > 0) { totValue += value; childCount++; if (value > bottomValue) { if (reuse == null) { reuse = new TopOrdAndIntQueue.OrdAndValue(); } reuse.ord = ord; reuse.value = value; reuse = q.insertWithOverflow(reuse); if (q.size() == topN) { bottomValue = q.top().value; } } } ord = siblings[ord]; } } if (totValue == 0) { return null; } if (dimConfig.multiValued) { if (dimConfig.requireDimCount) { totValue = getValue(dimOrd); } else { // Our sum'd value is not correct, in general: totValue = -1; } } else { // Our sum'd dim value is accurate, so we keep it } LabelAndValue[] labelValues = new LabelAndValue[q.size()]; for(int i=labelValues.length-1;i>=0;i--) { TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); FacetLabel child = taxoReader.getPath(ordAndValue.ord); labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); } return new FacetResult(dim, path, totValue, labelValues, childCount); } }