org.apache.cassandra/cassandra-all/3.11.5 : org/apache/cassandra/utils/EstimatedHistogram.java

EstimatedHistogram
https://cassandra.apache.org: The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
The Apache Software License, Version 2.0
Andres de la Peña
Avinash Lakshman
Aleksey Yeschenko
Aaron Morton
Ariel Weisberg
Blake Eggleston
Benedict Elliott Smith
Benjamin Lerer
Branimir Lambov
Brandon Williams
Carl Yeksigian
David Brosiusd
Dikang Gu
Eric Evans
Gary Dusbabek
Chris Goffinet
Alex Petrov
Laine Jaakko Olavi
T Jake Luciani
Jason Brown
Jonathan Ellis
Jake Farrell
Jeff Jirsa
Joel Knighton
Josh McKenzie
Johan Oskarsson
Jun Rao
Jay Zhuang
Sankalp Kohli
Marcus Eriksson
Michael Semb Wever
Mikhail Stepura
Michael Shuler
Paulo Motta
Prashant Malik
Robert Stupp
Peter Schuller
Sam Tunnicliffe
Sylvain Lebresne
Stefania Alborghetti
Tyler Hobbs
Vijay Parthasarathy
Pavel Yaskevich
Yuki Morishita
Nate McCall
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.utils;

import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;

import com.google.common.base.Objects;

import org.apache.cassandra.db.TypeSizes;
import org.apache.cassandra.io.ISerializer;
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.DataOutputPlus;
import org.slf4j.Logger;

public class EstimatedHistogram
{
    public static final EstimatedHistogramSerializer serializer = new EstimatedHistogramSerializer();

    The series of values to which the counts in `buckets` correspond:
1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc.
Thus, a `buckets` of [0, 0, 1, 10] would mean we had seen one value of 3 and 10 values of 4.
The series starts at 1 and grows by 1.2 each time (rounding and removing duplicates). It goes from 1
to around 36M by default (creating 90+1 buckets), which will give us timing resolution from microseconds to
36 seconds, with less precision as the numbers get larger.
Each bucket represents values from (previous bucket offset, current offset].
/**
     * The series of values to which the counts in `buckets` correspond:
     * 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc.
     * Thus, a `buckets` of [0, 0, 1, 10] would mean we had seen one value of 3 and 10 values of 4.
     *
     * The series starts at 1 and grows by 1.2 each time (rounding and removing duplicates). It goes from 1
     * to around 36M by default (creating 90+1 buckets), which will give us timing resolution from microseconds to
     * 36 seconds, with less precision as the numbers get larger.
     *
     * Each bucket represents values from (previous bucket offset, current offset].
     */
    private final long[] bucketOffsets;

    // buckets is one element longer than bucketOffsets -- the last element is values greater than the last offset
    final AtomicLongArray buckets;

    public EstimatedHistogram()
    {
        this(90);
    }

    public EstimatedHistogram(int bucketCount)
    {
        this(bucketCount, false);
    }

    public EstimatedHistogram(int bucketCount, boolean considerZeroes)
    {
        bucketOffsets = newOffsets(bucketCount, considerZeroes);
        buckets = new AtomicLongArray(bucketOffsets.length + 1);
    }

    Create EstimatedHistogram from only bucket data.
Params: bucketData – bucket data/**
     * Create EstimatedHistogram from only bucket data.
     *
     * @param bucketData bucket data
     */
    public EstimatedHistogram(long[] bucketData)
    {
        assert bucketData != null && bucketData.length > 0 : "Bucket data must be an array of size more than 0";
        bucketOffsets = newOffsets(bucketData.length - 1, false);
        buckets = new AtomicLongArray(bucketData);
    }

    public EstimatedHistogram(long[] offsets, long[] bucketData)
    {
        assert bucketData.length == offsets.length +1;
        bucketOffsets = offsets;
        buckets = new AtomicLongArray(bucketData);
    }

    public static long[] newOffsets(int size, boolean considerZeroes)
    {
        long[] result = new long[size + (considerZeroes ? 1 : 0)];
        int i = 0;
        if (considerZeroes)
            result[i++] = 0;
        long last = 1;
        result[i++] = last;
        for (; i < result.length; i++)
        {
            long next = Math.round(last * 1.2);
            if (next == last)
                next++;
            result[i] = next;
            last = next;
        }

        return result;
    }

    Returns: the histogram values corresponding to each bucket index/**
     * @return the histogram values corresponding to each bucket index
     */
    public long[] getBucketOffsets()
    {
        return bucketOffsets;
    }

    Increments the count of the bucket closest to n, rounding UP.
Params: n – /**
     * Increments the count of the bucket closest to n, rounding UP.
     * @param n
     */
    public void add(long n)
    {
        int index = Arrays.binarySearch(bucketOffsets, n);
        if (index < 0)
        {
            // inexact match, take the first bucket higher than n
            index = -index - 1;
        }
        // else exact match; we're good
        buckets.incrementAndGet(index);
    }

    Returns: the count in the given bucket/**
     * @return the count in the given bucket
     */
    long get(int bucket)
    {
        return buckets.get(bucket);
    }

    Params: reset – zero out buckets afterwards if true
Returns: a long[] containing the current histogram buckets/**
     * @param reset zero out buckets afterwards if true
     * @return a long[] containing the current histogram buckets
     */
    public long[] getBuckets(boolean reset)
    {
        final int len = buckets.length();
        long[] rv = new long[len];

        if (reset)
            for (int i = 0; i < len; i++)
                rv[i] = buckets.getAndSet(i, 0L);
        else
            for (int i = 0; i < len; i++)
                rv[i] = buckets.get(i);

        return rv;
    }

    Returns: the smallest value that could have been added to this histogram/**
     * @return the smallest value that could have been added to this histogram
     */
    public long min()
    {
        for (int i = 0; i < buckets.length(); i++)
        {
            if (buckets.get(i) > 0)
                return i == 0 ? 0 : 1 + bucketOffsets[i - 1];
        }
        return 0;
    }

    Returns: the largest value that could have been added to this histogram.  If the histogram
overflowed, returns Long.MAX_VALUE./**
     * @return the largest value that could have been added to this histogram.  If the histogram
     * overflowed, returns Long.MAX_VALUE.
     */
    public long max()
    {
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0)
            return Long.MAX_VALUE;

        for (int i = lastBucket - 1; i >= 0; i--)
        {
            if (buckets.get(i) > 0)
                return bucketOffsets[i];
        }
        return 0;
    }

    Params: percentile – 
Returns: estimated value at given percentile/**
     * @param percentile
     * @return estimated value at given percentile
     */
    public long percentile(double percentile)
    {
        assert percentile >= 0 && percentile <= 1.0;
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0)
            throw new IllegalStateException("Unable to compute when histogram overflowed");

        long pcount = (long) Math.ceil(count() * percentile);
        if (pcount == 0)
            return 0;

        long elements = 0;
        for (int i = 0; i < lastBucket; i++)
        {
            elements += buckets.get(i);
            if (elements >= pcount)
                return bucketOffsets[i];
        }
        return 0;
    }

    Throws: IllegalStateException – if any values were greater than the largest bucket threshold
Returns: the ceil of mean histogram value (average of bucket offsets, weighted by count)/**
     * @return the ceil of mean histogram value (average of bucket offsets, weighted by count)
     * @throws IllegalStateException if any values were greater than the largest bucket threshold
     */
    public long mean()
    {
        return (long) Math.ceil(rawMean());
    }

    Throws: IllegalStateException – if any values were greater than the largest bucket threshold
Returns: the mean histogram value (average of bucket offsets, weighted by count)/**
     * @return the mean histogram value (average of bucket offsets, weighted by count)
     * @throws IllegalStateException if any values were greater than the largest bucket threshold
     */
    public double rawMean()
    {
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0)
            throw new IllegalStateException("Unable to compute ceiling for max when histogram overflowed");

        long elements = 0;
        long sum = 0;
        for (int i = 0; i < lastBucket; i++)
        {
            long bCount = buckets.get(i);
            elements += bCount;
            sum += bCount * bucketOffsets[i];
        }

        return (double) sum / elements;
    }

    Returns: the total number of non-zero values/**
     * @return the total number of non-zero values
     */
    public long count()
    {
       long sum = 0L;
       for (int i = 0; i < buckets.length(); i++)
           sum += buckets.get(i);
       return sum;
    }

    Returns: the largest bucket offset/**
     * @return the largest bucket offset
     */
    public long getLargestBucketOffset()
    {
        return bucketOffsets[bucketOffsets.length - 1];
    }

    Returns: true if this histogram has overflowed -- that is, a value larger than our largest bucket could bound was added/**
     * @return true if this histogram has overflowed -- that is, a value larger than our largest bucket could bound was added
     */
    public boolean isOverflowed()
    {
        return buckets.get(buckets.length() - 1) > 0;
    }

    log.debug() every record in the histogram
Params: log – /**
     * log.debug() every record in the histogram
     *
     * @param log
     */
    public void log(Logger log)
    {
        // only print overflow if there is any
        int nameCount;
        if (buckets.get(buckets.length() - 1) == 0)
            nameCount = buckets.length() - 1;
        else
            nameCount = buckets.length();
        String[] names = new String[nameCount];

        int maxNameLength = 0;
        for (int i = 0; i < nameCount; i++)
        {
            names[i] = nameOfRange(bucketOffsets, i);
            maxNameLength = Math.max(maxNameLength, names[i].length());
        }

        // emit log records
        String formatstr = "%" + maxNameLength + "s: %d";
        for (int i = 0; i < nameCount; i++)
        {
            long count = buckets.get(i);
            // sort-of-hack to not print empty ranges at the start that are only used to demarcate the
            // first populated range. for code clarity we don't omit this record from the maxNameLength
            // calculation, and accept the unnecessary whitespace prefixes that will occasionally occur
            if (i == 0 && count == 0)
                continue;
            log.debug(String.format(formatstr, names[i], count));
        }
    }

    private static String nameOfRange(long[] bucketOffsets, int index)
    {
        StringBuilder sb = new StringBuilder();
        appendRange(sb, bucketOffsets, index);
        return sb.toString();
    }

    private static void appendRange(StringBuilder sb, long[] bucketOffsets, int index)
    {
        sb.append("[");
        if (index == 0)
            if (bucketOffsets[0] > 0)
                // by original definition, this histogram is for values greater than zero only;
                // if values of 0 or less are required, an entry of lb-1 must be inserted at the start
                sb.append("1");
            else
                sb.append("-Inf");
        else
            sb.append(bucketOffsets[index - 1] + 1);
        sb.append("..");
        if (index == bucketOffsets.length)
            sb.append("Inf");
        else
            sb.append(bucketOffsets[index]);
        sb.append("]");
    }

    @Override
    public boolean equals(Object o)
    {
        if (this == o)
            return true;

        if (!(o instanceof EstimatedHistogram))
            return false;

        EstimatedHistogram that = (EstimatedHistogram) o;
        return Arrays.equals(getBucketOffsets(), that.getBucketOffsets()) &&
               Arrays.equals(getBuckets(false), that.getBuckets(false));
    }

    @Override
    public int hashCode()
    {
        return Objects.hashCode(getBucketOffsets(), getBuckets(false));
    }

    public static class EstimatedHistogramSerializer implements ISerializer<EstimatedHistogram>
    {
        public void serialize(EstimatedHistogram eh, DataOutputPlus out) throws IOException
        {
            long[] offsets = eh.getBucketOffsets();
            long[] buckets = eh.getBuckets(false);
            out.writeInt(buckets.length);
            for (int i = 0; i < buckets.length; i++)
            {
                out.writeLong(offsets[i == 0 ? 0 : i - 1]);
                out.writeLong(buckets[i]);
            }
        }

        public EstimatedHistogram deserialize(DataInputPlus in) throws IOException
        {
            int size = in.readInt();
            long[] offsets = new long[size - 1];
            long[] buckets = new long[size];

            for (int i = 0; i < size; i++)
            {
                offsets[i == 0 ? 0 : i - 1] = in.readLong();
                buckets[i] = in.readLong();
            }
            return new EstimatedHistogram(offsets, buckets);
        }

        public long serializedSize(EstimatedHistogram eh)
        {
            int size = 0;

            long[] offsets = eh.getBucketOffsets();
            long[] buckets = eh.getBuckets(false);
            size += TypeSizes.sizeof(buckets.length);
            for (int i = 0; i < buckets.length; i++)
            {
                size += TypeSizes.sizeof(offsets[i == 0 ? 0 : i - 1]);
                size += TypeSizes.sizeof(buckets[i]);
            }
            return size;
        }
    }
}
Params:	reset – zero out buckets afterwards if true
Returns:	a long[] containing the current histogram buckets
Params:	percentile –
Returns:	estimated value at given percentile
Throws:	IllegalStateException – if any values were greater than the largest bucket threshold
Returns:	the ceil of mean histogram value (average of bucket offsets, weighted by count)
/

org.apache.cassandra/ cassandra-all/ 3.11.5/ org/apache/cassandra/utils/EstimatedHistogram.java