org.apache.cassandra/cassandra-all/3.11.5 : org/apache/cassandra/db/columniterator/SSTableIterator.java

SSTableIterator
https://cassandra.apache.org: The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
The Apache Software License, Version 2.0
Andres de la Peña
Avinash Lakshman
Aleksey Yeschenko
Aaron Morton
Ariel Weisberg
Blake Eggleston
Benedict Elliott Smith
Benjamin Lerer
Branimir Lambov
Brandon Williams
Carl Yeksigian
David Brosiusd
Dikang Gu
Eric Evans
Gary Dusbabek
Chris Goffinet
Alex Petrov
Laine Jaakko Olavi
T Jake Luciani
Jason Brown
Jonathan Ellis
Jake Farrell
Jeff Jirsa
Joel Knighton
Josh McKenzie
Johan Oskarsson
Jun Rao
Jay Zhuang
Sankalp Kohli
Marcus Eriksson
Michael Semb Wever
Mikhail Stepura
Michael Shuler
Paulo Motta
Prashant Malik
Robert Stupp
Peter Schuller
Sam Tunnicliffe
Sylvain Lebresne
Stefania Alborghetti
Tyler Hobbs
Vijay Parthasarathy
Pavel Yaskevich
Yuki Morishita
Nate McCall
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.columniterator;

import java.io.IOException;
import java.util.NoSuchElementException;

import org.apache.cassandra.db.*;
import org.apache.cassandra.db.filter.ColumnFilter;
import org.apache.cassandra.db.rows.*;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.util.FileDataInput;
import org.apache.cassandra.io.util.FileHandle;

 A Cell Iterator over SSTable
/**
 *  A Cell Iterator over SSTable
 */
public class SSTableIterator extends AbstractSSTableIterator
{
    The index of the slice being processed.
/**
     * The index of the slice being processed.
     */
    private int slice;

    public SSTableIterator(SSTableReader sstable,
                           FileDataInput file,
                           DecoratedKey key,
                           RowIndexEntry indexEntry,
                           Slices slices,
                           ColumnFilter columns,
                           boolean isForThrift,
                           FileHandle ifile)
    {
        super(sstable, file, key, indexEntry, slices, columns, isForThrift, ifile);
    }

    protected Reader createReaderInternal(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile)
    {
        return indexEntry.isIndexed()
             ? new ForwardIndexedReader(indexEntry, file, shouldCloseFile)
             : new ForwardReader(file, shouldCloseFile);
    }

    protected int nextSliceIndex()
    {
        int next = slice;
        slice++;
        return next;
    }

    protected boolean hasMoreSlices()
    {
        return slice < slices.size();
    }

    public boolean isReverseOrder()
    {
        return false;
    }

    private class ForwardReader extends Reader
    {
        // The start of the current slice. This will be null as soon as we know we've passed that bound.
        protected ClusteringBound start;
        // The end of the current slice. Will never be null.
        protected ClusteringBound end = ClusteringBound.TOP;

        protected Unfiltered next; // the next element to return: this is computed by hasNextInternal().

        protected boolean sliceDone; // set to true once we know we have no more result for the slice. This is in particular
                                     // used by the indexed reader when we know we can't have results based on the index.

        private ForwardReader(FileDataInput file, boolean shouldCloseFile)
        {
            super(file, shouldCloseFile);
        }

        public void setForSlice(Slice slice) throws IOException
        {
            start = slice.start() == ClusteringBound.BOTTOM ? null : slice.start();
            end = slice.end();

            sliceDone = false;
            next = null;
        }

        // Skip all data that comes before the currently set slice.
        // Return what should be returned at the end of this, or null if nothing should.
        private Unfiltered handlePreSliceData() throws IOException
        {
            assert deserializer != null;

            // Note that the following comparison is not strict. The reason is that the only cases
            // where it can be == is if the "next" is a RT start marker (either a '[' of a ')[' boundary),
            // and if we had a strict inequality and an open RT marker before this, we would issue
            // the open marker first, and then return then next later, which would send in the
            // stream both '[' (or '(') and then ')[' for the same clustering value, which is wrong.
            // By using a non-strict inequality, we avoid that problem (if we do get ')[' for the same
            // clustering value than the slice, we'll simply record it in 'openMarker').
            while (deserializer.hasNext() && deserializer.compareNextTo(start) <= 0)
            {
                if (deserializer.nextIsRow())
                    deserializer.skipNext();
                else
                    updateOpenMarker((RangeTombstoneMarker)deserializer.readNext());
            }

            ClusteringBound sliceStart = start;
            start = null;

            // We've reached the beginning of our queried slice. If we have an open marker
            // we should return that first.
            if (openMarker != null)
                return new RangeTombstoneBoundMarker(sliceStart, openMarker);

            return null;
        }

        // Compute the next element to return, assuming we're in the middle to the slice
        // and the next element is either in the slice, or just after it. Returns null
        // if we're done with the slice.
        protected Unfiltered computeNext() throws IOException
        {
            assert deserializer != null;

            while (true)
            {
                // We use a same reasoning as in handlePreSliceData regarding the strictness of the inequality below.
                // We want to exclude deserialized unfiltered equal to end, because 1) we won't miss any rows since those
                // woudn't be equal to a slice bound and 2) a end bound can be equal to a start bound
                // (EXCL_END(x) == INCL_START(x) for instance) and in that case we don't want to return start bound because
                // it's fundamentally excluded. And if the bound is a  end (for a range tombstone), it means it's exactly
                // our slice end, but in that  case we will properly close the range tombstone anyway as part of our "close
                // an open marker" code in hasNextInterna
                if (!deserializer.hasNext() || deserializer.compareNextTo(end) >= 0)
                    return null;

                Unfiltered next = deserializer.readNext();
                // We may get empty row for the same reason expressed on UnfilteredSerializer.deserializeOne.
                if (next.isEmpty())
                    continue;

                if (next.kind() == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
                    updateOpenMarker((RangeTombstoneMarker) next);
                return next;
            }
        }

        protected boolean hasNextInternal() throws IOException
        {
            if (next != null)
                return true;

            if (sliceDone)
                return false;

            if (start != null)
            {
                Unfiltered unfiltered = handlePreSliceData();
                if (unfiltered != null)
                {
                    next = unfiltered;
                    return true;
                }
            }

            next = computeNext();
            if (next != null)
                return true;

            // for current slice, no data read from deserialization
            sliceDone = true;
            // If we have an open marker, we should not close it, there could be more slices
            if (openMarker != null)
            {
                next = new RangeTombstoneBoundMarker(end, openMarker);
                return true;
            }
            return false;
        }

        protected Unfiltered nextInternal() throws IOException
        {
            if (!hasNextInternal())
                throw new NoSuchElementException();

            Unfiltered toReturn = next;
            next = null;
            return toReturn;
        }
    }

    private class ForwardIndexedReader extends ForwardReader
    {
        private final IndexState indexState;

        private int lastBlockIdx; // the last index block that has data for the current query

        private ForwardIndexedReader(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile)
        {
            super(file, shouldCloseFile);
            this.indexState = new IndexState(this, sstable.metadata.comparator, indexEntry, false, ifile);
            this.lastBlockIdx = indexState.blocksCount(); // if we never call setForSlice, that's where we want to stop
        }

        @Override
        public void close() throws IOException
        {
            super.close();
            this.indexState.close();
        }

        @Override
        public void setForSlice(Slice slice) throws IOException
        {
            super.setForSlice(slice);

            // if our previous slicing already got us the biggest row in the sstable, we're done
            if (indexState.isDone())
            {
                sliceDone = true;
                return;
            }

            // Find the first index block we'll need to read for the slice.
            int startIdx = indexState.findBlockIndex(slice.start(), indexState.currentBlockIdx());
            if (startIdx >= indexState.blocksCount())
            {
                sliceDone = true;
                return;
            }

            // Find the last index block we'll need to read for the slice.
            lastBlockIdx = indexState.findBlockIndex(slice.end(), startIdx);

            // If the slice end is before the very first block, we have nothing for that slice
            if (lastBlockIdx < 0)
            {
                assert startIdx < 0;
                sliceDone = true;
                return;
            }

            // If we start before the very first block, just read from the first one.
            if (startIdx < 0)
                startIdx = 0;

            // If that's the last block we were reading, we're already where we want to be. Otherwise,
            // seek to that first block
            if (startIdx != indexState.currentBlockIdx())
                indexState.setToBlock(startIdx);

            // The index search is based on the last name of the index blocks, so at that point we have that:
            //   1) indexes[currentIdx - 1].lastName < slice.start <= indexes[currentIdx].lastName
            //   2) indexes[lastBlockIdx - 1].lastName < slice.end <= indexes[lastBlockIdx].lastName
            // so if currentIdx == lastBlockIdx and slice.end < indexes[currentIdx].firstName, we're guaranteed that the
            // whole slice is between the previous block end and this block start, and thus has no corresponding
            // data. One exception is if the previous block ends with an openMarker as it will cover our slice
            // and we need to return it (we also don't skip the slice for the old format because we didn't have the openMarker
            // info in that case and can't rely on this optimization).
            if (indexState.currentBlockIdx() == lastBlockIdx
                && metadata().comparator.compare(slice.end(), indexState.currentIndex().firstName) < 0
                && openMarker == null
                && sstable.descriptor.version.storeRows())
            {
                sliceDone = true;
            }
        }

        @Override
        protected Unfiltered computeNext() throws IOException
        {
            while (true)
            {
                // Our previous read might have made us cross an index block boundary. If so, update our informations.
                // If we read from the beginning of the partition, this is also what will initialize the index state.
                indexState.updateBlock();

                // Return the next unfiltered unless we've reached the end, or we're beyond our slice
                // end (note that unless we're on the last block for the slice, there is no point
                // in checking the slice end).
                if (indexState.isDone()
                    || indexState.currentBlockIdx() > lastBlockIdx
                    || !deserializer.hasNext()
                    || (indexState.currentBlockIdx() == lastBlockIdx && deserializer.compareNextTo(end) >= 0))
                    return null;


                Unfiltered next = deserializer.readNext();
                // We may get empty row for the same reason expressed on UnfilteredSerializer.deserializeOne.
                if (next.isEmpty())
                    continue;

                if (next.kind() == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
                    updateOpenMarker((RangeTombstoneMarker) next);
                return next;
            }
        }
    }
}
/

org.apache.cassandra/ cassandra-all/ 3.11.5/ org/apache/cassandra/db/columniterator/SSTableIterator.java