/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;

Holds statistics for a DocValues field.
/** Holds statistics for a DocValues field. */
public abstract class DocValuesStats<T> { private int missing = 0; private int count = 0; protected final String field; protected T min; protected T max; protected DocValuesStats(String field, T initialMin, T initialMax) { this.field = field; this.min = initialMin; this.max = initialMax; }
Called after accumulate(int) was processed and verified that the document has a value for the field. Implementations should update the statistics based on the value of the current document.
Params:
  • count – the updated number of documents with value for this field.
/** * Called after {@link #accumulate(int)} was processed and verified that the document has a value for * the field. Implementations should update the statistics based on the value of the current document. * * @param count * the updated number of documents with value for this field. */
protected abstract void doAccumulate(int count) throws IOException;
Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e. it does have the requested DocValues field).
/** * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e. * it does have the requested DocValues field). */
protected abstract boolean init(LeafReaderContext context) throws IOException;
Returns whether the given document has a value for the requested DocValues field.
/** Returns whether the given document has a value for the requested DocValues field. */
protected abstract boolean hasValue(int doc) throws IOException; final void accumulate(int doc) throws IOException { if (hasValue(doc)) { ++count; doAccumulate(count); } else { ++missing; } } final void addMissing() { ++missing; }
The field for which these stats were computed.
/** The field for which these stats were computed. */
public final String field() { return field; }
The number of documents which have a value of the field.
/** The number of documents which have a value of the field. */
public final int count() { return count; }
The number of documents which do not have a value of the field.
/** The number of documents which do not have a value of the field. */
public final int missing() { return missing; }
The minimum value of the field. Undefined when count() is zero.
/** The minimum value of the field. Undefined when {@link #count()} is zero. */
public final T min() { return min; }
The maximum value of the field. Undefined when count() is zero.
/** The maximum value of the field. Undefined when {@link #count()} is zero. */
public final T max() { return max; }
Holds statistics for a numeric DocValues field.
/** Holds statistics for a numeric DocValues field. */
public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> { protected double mean = 0.0; protected double variance = 0.0; protected NumericDocValues ndv; protected NumericDocValuesStats(String field, T initialMin, T initialMax) { super(field, initialMin, initialMax); } @Override protected final boolean init(LeafReaderContext context) throws IOException { ndv = context.reader().getNumericDocValues(field); return ndv != null; } @Override protected final boolean hasValue(int doc) throws IOException { return ndv.advanceExact(doc); }
The mean of all values of the field.
/** The mean of all values of the field. */
public final double mean() { return mean; }
Returns the variance of all values of the field.
/** Returns the variance of all values of the field. */
public final double variance() { int count = count(); return count > 0 ? variance / count : 0; }
Returns the stdev of all values of the field.
/** Returns the stdev of all values of the field. */
public final double stdev() { return Math.sqrt(variance()); }
Returns the sum of values of the field. Note that if the values are large, the sum might overflow.
/** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */
public abstract T sum(); }
Holds DocValues statistics for a numeric field storing long values.
/** Holds DocValues statistics for a numeric field storing {@code long} values. */
public static final class LongDocValuesStats extends NumericDocValuesStats<Long> { // To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable. private long sum = 0; public LongDocValuesStats(String field) { super(field, Long.MAX_VALUE, Long.MIN_VALUE); } @Override protected void doAccumulate(int count) throws IOException { long val = ndv.longValue(); if (val > max) { max = val; } if (val < min) { min = val; } sum += val; double oldMean = mean; mean += (val - mean) / count; variance += (val - mean) * (val - oldMean); } @Override public Long sum() { return sum; } }
Holds DocValues statistics for a numeric field storing double values.
/** Holds DocValues statistics for a numeric field storing {@code double} values. */
public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> { // To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable. private double sum = 0; public DoubleDocValuesStats(String field) { super(field, Double.MAX_VALUE, Double.MIN_VALUE); } @Override protected void doAccumulate(int count) throws IOException { double val = Double.longBitsToDouble(ndv.longValue()); if (Double.compare(val, max) > 0) { max = val; } if (Double.compare(val, min) < 0) { min = val; } sum += val; double oldMean = mean; mean += (val - mean) / count; variance += (val - mean) * (val - oldMean); } @Override public Double sum() { return sum; } }
Holds statistics for a sorted-numeric DocValues field.
/** Holds statistics for a sorted-numeric DocValues field. */
public static abstract class SortedNumericDocValuesStats<T extends Number> extends DocValuesStats<T> { protected long valuesCount = 0; protected double mean = 0.0; protected double variance = 0.0; protected SortedNumericDocValues sndv; protected SortedNumericDocValuesStats(String field, T initialMin, T initialMax) { super(field, initialMin, initialMax); } @Override protected final boolean init(LeafReaderContext context) throws IOException { sndv = context.reader().getSortedNumericDocValues(field); return sndv != null; } @Override protected final boolean hasValue(int doc) throws IOException { return sndv.advanceExact(doc); }
The mean of all values of the field.
/** The mean of all values of the field. */
public final double mean() { return mean; }
Returns the variance of all values of the field.
/** Returns the variance of all values of the field. */
public final double variance() { int count = count(); return count > 0 ? variance / count : 0; }
Returns the stdev of all values of the field.
/** Returns the stdev of all values of the field. */
public final double stdev() { return Math.sqrt(variance()); }
Returns the total number of values for this field.
/** Returns the total number of values for this field. */
public final long valuesCount() { return valuesCount; }
Returns the sum of values of the field. Note that if the values are large, the sum might overflow.
/** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */
public abstract T sum(); }
Holds DocValues statistics for a sorted-numeric field storing long values.
/** Holds DocValues statistics for a sorted-numeric field storing {@code long} values. */
public static final class SortedLongDocValuesStats extends SortedNumericDocValuesStats<Long> { // To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable. private long sum = 0; public SortedLongDocValuesStats(String field) { super(field, Long.MAX_VALUE, Long.MIN_VALUE); } @Override protected void doAccumulate(int count) throws IOException { int numValues = sndv.docValueCount(); while (numValues-- > 0) { long val = sndv.nextValue(); if (val > max) { max = val; } if (val < min) { min = val; } sum += val; double oldMean = mean; // for correct "running average computation", increase valuesCount with each value, rather than once before the // loop stats. ++valuesCount; mean += (val - mean) / valuesCount; variance += (val - mean) * (val - oldMean); } } @Override public Long sum() { return sum; } }
Holds DocValues statistics for a sorted-numeric field storing double values.
/** Holds DocValues statistics for a sorted-numeric field storing {@code double} values. */
public static final class SortedDoubleDocValuesStats extends SortedNumericDocValuesStats<Double> { // To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable. private double sum = 0; public SortedDoubleDocValuesStats(String field) { super(field, Double.MAX_VALUE, Double.MIN_VALUE); } @Override protected void doAccumulate(int count) throws IOException { int numValues = sndv.docValueCount(); while (numValues-- > 0) { double val = Double.longBitsToDouble(sndv.nextValue()); if (Double.compare(val, max) > 0) { max = val; } if (Double.compare(val, min) < 0) { min = val; } sum += val; double oldMean = mean; // for correct "running average computation", increase valuesCount with each value, rather than once before the // loop stats. ++valuesCount; mean += (val - mean) / valuesCount; variance += (val - mean) * (val - oldMean); } } @Override public Double sum() { return sum; } } private static BytesRef copyFrom(BytesRef src, BytesRef dest) { if (dest == null) { return BytesRef.deepCopyOf(src); } dest.bytes = ArrayUtil.grow(dest.bytes, src.length); System.arraycopy(src.bytes, src.offset, dest.bytes, 0, src.length); dest.offset = 0; dest.length = src.length; return dest; }
Holds statistics for a sorted DocValues field.
/** Holds statistics for a sorted DocValues field. */
public static class SortedDocValuesStats extends DocValuesStats<BytesRef> { protected SortedDocValues sdv; protected SortedDocValuesStats(String field) { super(field, null, null); } @Override protected final boolean init(LeafReaderContext context) throws IOException { sdv = context.reader().getSortedDocValues(field); return sdv != null; } @Override protected final boolean hasValue(int doc) throws IOException { return sdv.advanceExact(doc); } @Override protected void doAccumulate(int count) throws IOException { BytesRef val = sdv.binaryValue(); if (max == null || val.compareTo(max) > 0) { max = copyFrom(val, max); } if (min == null || val.compareTo(min) < 0) { min = copyFrom(val, min); } } }
Holds statistics for a sorted-set DocValues field.
/** Holds statistics for a sorted-set DocValues field. */
public static class SortedSetDocValuesStats extends DocValuesStats<BytesRef> { protected SortedSetDocValues ssdv; protected SortedSetDocValuesStats(String field) { super(field, null, null); } @Override protected final boolean init(LeafReaderContext context) throws IOException { ssdv = context.reader().getSortedSetDocValues(field); return ssdv != null; } @Override protected final boolean hasValue(int doc) throws IOException { return ssdv.advanceExact(doc); } @Override protected void doAccumulate(int count) throws IOException { long ord; while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { BytesRef val = ssdv.lookupOrd(ord); if (max == null || val.compareTo(max) > 0) { max = copyFrom(val, max); } if (min == null || val.compareTo(min) < 0) { min = copyFrom(val, min); } } } } }