org.apache.lucene/lucene-core/8.2.0 : org/apache/lucene/index/Sorter.java

Sorter
http://lucene.apache.org/lucene-parent/lucene-core: Apache Lucene Java Core (The Apache Software Foundation)
Apache 2
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;

import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

Sorts documents of a given index by returning a permutation on the document
IDs.
@lucene.experimental /**
 * Sorts documents of a given index by returning a permutation on the document
 * IDs.
 * @lucene.experimental
 */
final class Sorter {
  final Sort sort;
  
  Creates a new Sorter to sort the index with sort /** Creates a new Sorter to sort the index with {@code sort} */
  Sorter(Sort sort) {
    if (sort.needsScores()) {
      throw new IllegalArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
    }
    this.sort = sort;
  }

  A permutation of doc IDs. For every document ID between 0 and IndexReader.maxDoc(), oldToNew(newToOld(docID)) must
return docID.
/**
   * A permutation of doc IDs. For every document ID between <tt>0</tt> and
   * {@link IndexReader#maxDoc()}, <code>oldToNew(newToOld(docID))</code> must
   * return <code>docID</code>.
   */
  static abstract class DocMap {

    Given a doc ID from the original index, return its ordinal in the
 sorted index. /** Given a doc ID from the original index, return its ordinal in the
     *  sorted index. */
    abstract int oldToNew(int docID);

    Given the ordinal of a doc ID, return its doc ID in the original index. /** Given the ordinal of a doc ID, return its doc ID in the original index. */
    abstract int newToOld(int docID);

    Return the number of documents in this map. This must be equal to the number of documents of the LeafReader which is sorted. /** Return the number of documents in this map. This must be equal to the
     *  {@link org.apache.lucene.index.LeafReader#maxDoc() number of documents} of the
     *  {@link org.apache.lucene.index.LeafReader} which is sorted. */
    abstract int size();
  }

  Check consistency of a DocMap, useful for assertions. /** Check consistency of a {@link DocMap}, useful for assertions. */
  static boolean isConsistent(DocMap docMap) {
    final int maxDoc = docMap.size();
    for (int i = 0; i < maxDoc; ++i) {
      final int newID = docMap.oldToNew(i);
      final int oldID = docMap.newToOld(newID);
      assert newID >= 0 && newID < maxDoc : "doc IDs must be in [0-" + maxDoc + "[, got " + newID;
      assert i == oldID : "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID;
      if (i != oldID || newID < 0 || newID >= maxDoc) {
        return false;
      }
    }
    return true;
  }

  A comparator of doc IDs. /** A comparator of doc IDs. */
  static abstract class DocComparator {

    Compare docID1 against docID2. The contract for the return value is the same as Comparator.compare(Object, Object). /** Compare docID1 against docID2. The contract for the return value is the
     *  same as {@link Comparator#compare(Object, Object)}. */
    public abstract int compare(int docID1, int docID2);
  }

  private static final class DocValueSorter extends TimSorter {
    
    private final int[] docs;
    private final Sorter.DocComparator comparator;
    private final int[] tmp;
    
    DocValueSorter(int[] docs, Sorter.DocComparator comparator) {
      super(docs.length / 64);
      this.docs = docs;
      this.comparator = comparator;
      tmp = new int[docs.length / 64];
    }
    
    @Override
    protected int compare(int i, int j) {
      return comparator.compare(docs[i], docs[j]);
    }
    
    @Override
    protected void swap(int i, int j) {
      int tmpDoc = docs[i];
      docs[i] = docs[j];
      docs[j] = tmpDoc;
    }

    @Override
    protected void copy(int src, int dest) {
      docs[dest] = docs[src];
    }

    @Override
    protected void save(int i, int len) {
      System.arraycopy(docs, i, tmp, 0, len);
    }

    @Override
    protected void restore(int i, int j) {
      docs[j] = tmp[i];
    }

    @Override
    protected int compareSaved(int i, int j) {
      return comparator.compare(tmp[i], docs[j]);
    }
  }

  Computes the old-to-new permutation over the given comparator. /** Computes the old-to-new permutation over the given comparator. */
  private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) {
    // check if the index is sorted
    boolean sorted = true;
    for (int i = 1; i < maxDoc; ++i) {
      if (comparator.compare(i-1, i) > 0) {
        sorted = false;
        break;
      }
    }
    if (sorted) {
      return null;
    }

    // sort doc IDs
    final int[] docs = new int[maxDoc];
    for (int i = 0; i < maxDoc; i++) {
      docs[i] = i;
    }
    
    DocValueSorter sorter = new DocValueSorter(docs, comparator);
    // It can be common to sort a reader, add docs, sort it again, ... and in
    // that case timSort can save a lot of time
    sorter.sort(0, docs.length); // docs is now the newToOld mapping

    // The reason why we use MonotonicAppendingLongBuffer here is that it
    // wastes very little memory if the index is in random order but can save
    // a lot of memory if the index is already "almost" sorted
    final PackedLongValues.Builder newToOldBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    for (int i = 0; i < maxDoc; ++i) {
      newToOldBuilder.add(docs[i]);
    }
    final PackedLongValues newToOld = newToOldBuilder.build();

    // invert the docs mapping:
    for (int i = 0; i < maxDoc; ++i) {
      docs[(int) newToOld.get(i)] = i;
    } // docs is now the oldToNew mapping

    final PackedLongValues.Builder oldToNewBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    for (int i = 0; i < maxDoc; ++i) {
      oldToNewBuilder.add(docs[i]);
    }
    final PackedLongValues oldToNew = oldToNewBuilder.build();
    
    return new Sorter.DocMap() {

      @Override
      public int oldToNew(int docID) {
        return (int) oldToNew.get(docID);
      }

      @Override
      public int newToOld(int docID) {
        return (int) newToOld.get(docID);
      }

      @Override
      public int size() {
        return maxDoc;
      }
    };
  }

  Returns the native sort type for SortedSetSortField and SortedNumericSortField, SortField.getType() otherwise /** Returns the native sort type for {@link SortedSetSortField} and {@link SortedNumericSortField},
   * {@link SortField#getType()} otherwise */
  static SortField.Type getSortFieldType(SortField sortField) {
    if (sortField instanceof SortedSetSortField) {
      return SortField.Type.STRING;
    } else if (sortField instanceof SortedNumericSortField) {
      return ((SortedNumericSortField) sortField).getNumericType();
    } else {
      return sortField.getType();
    }
  }

  Wraps a SortedNumericDocValues as a single-valued view if the field is an instance of SortedNumericSortField, returns NumericDocValues for the field otherwise. /** Wraps a {@link SortedNumericDocValues} as a single-valued view if the field is an instance of {@link SortedNumericSortField},
   * returns {@link NumericDocValues} for the field otherwise. */
  static NumericDocValues getOrWrapNumeric(LeafReader reader, SortField sortField) throws IOException {
    if (sortField instanceof SortedNumericSortField) {
      SortedNumericSortField sf = (SortedNumericSortField) sortField;
      return SortedNumericSelector.wrap(DocValues.getSortedNumeric(reader, sf.getField()), sf.getSelector(), sf.getNumericType());
    } else {
      return DocValues.getNumeric(reader, sortField.getField());
    }
  }

  Wraps a SortedSetDocValues as a single-valued view if the field is an instance of SortedSetSortField, returns SortedDocValues for the field otherwise. /** Wraps a {@link SortedSetDocValues} as a single-valued view if the field is an instance of {@link SortedSetSortField},
   * returns {@link SortedDocValues} for the field otherwise. */
  static SortedDocValues getOrWrapSorted(LeafReader reader, SortField sortField) throws IOException {
    if (sortField instanceof SortedSetSortField) {
      SortedSetSortField sf = (SortedSetSortField) sortField;
      return SortedSetSelector.wrap(DocValues.getSortedSet(reader, sf.getField()), sf.getSelector());
    } else {
      return DocValues.getSorted(reader, sortField.getField());
    }
  }

  static DocComparator getDocComparator(LeafReader reader, SortField sortField) throws IOException {
    return getDocComparator(reader.maxDoc(), sortField,
        () -> getOrWrapSorted(reader, sortField),
        () -> getOrWrapNumeric(reader, sortField));
  }

  interface NumericDocValuesSupplier {
    NumericDocValues get() throws IOException;
  }

  interface SortedDocValuesSupplier {
    SortedDocValues get() throws IOException;
  }

  We cannot use the FieldComparator API because that API requires that you send it docIDs in order. Note that this API allocates arrays[maxDoc] to hold the native values needed for comparison, but 1) they are transient (only alive while sorting this one segment), and 2) in the typical index sorting case, they are only used to sort newly flushed segments, which will be smaller than merged segments. /** We cannot use the {@link FieldComparator} API because that API requires that you send it docIDs in order.  Note that this API
   *  allocates arrays[maxDoc] to hold the native values needed for comparison, but 1) they are transient (only alive while sorting this one
   *  segment), and 2) in the typical index sorting case, they are only used to sort newly flushed segments, which will be smaller than
   *  merged segments.  */
  static DocComparator getDocComparator(int maxDoc,
                                        SortField sortField,
                                        SortedDocValuesSupplier sortedProvider,
                                        NumericDocValuesSupplier numericProvider) throws IOException {

    final int reverseMul = sortField.getReverse() ? -1 : 1;
    final SortField.Type sortType = getSortFieldType(sortField);

    switch(sortType) {

      case STRING:
      {
        final SortedDocValues sorted = sortedProvider.get();
        final int missingOrd;
        if (sortField.getMissingValue() == SortField.STRING_LAST) {
          missingOrd = Integer.MAX_VALUE;
        } else {
          missingOrd = Integer.MIN_VALUE;
        }

        final int[] ords = new int[maxDoc];
        Arrays.fill(ords, missingOrd);
        int docID;
        while ((docID = sorted.nextDoc()) != NO_MORE_DOCS) {
          ords[docID] = sorted.ordValue();
        }

        return new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            return reverseMul * Integer.compare(ords[docID1], ords[docID2]);
          }
        };
      }

      case LONG:
      {
        final NumericDocValues dvs = numericProvider.get();
        long[] values = new long[maxDoc];
        if (sortField.getMissingValue() != null) {
          Arrays.fill(values, (Long) sortField.getMissingValue());
        }
        while (true) {
          int docID = dvs.nextDoc();
          if (docID == NO_MORE_DOCS) {
            break;
          }
          values[docID] = dvs.longValue();
        }

        return new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            return reverseMul * Long.compare(values[docID1], values[docID2]);
          }
        };
      }

      case INT:
      {
        final NumericDocValues dvs = numericProvider.get();
        int[] values = new int[maxDoc];
        if (sortField.getMissingValue() != null) {
          Arrays.fill(values, (Integer) sortField.getMissingValue());
        }

        while (true) {
          int docID = dvs.nextDoc();
          if (docID == NO_MORE_DOCS) {
            break;
          }
          values[docID] = (int) dvs.longValue();
        }

        return new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            return reverseMul * Integer.compare(values[docID1], values[docID2]);
          }
        };
      }

      case DOUBLE:
      {
        final NumericDocValues dvs = numericProvider.get();
        double[] values = new double[maxDoc];
        if (sortField.getMissingValue() != null) {
          Arrays.fill(values, (Double) sortField.getMissingValue());
        }
        while (true) {
          int docID = dvs.nextDoc();
          if (docID == NO_MORE_DOCS) {
            break;
          }
          values[docID] = Double.longBitsToDouble(dvs.longValue());
        }

        return new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            return reverseMul * Double.compare(values[docID1], values[docID2]);
          }
        };
      }

      case FLOAT:
      {
        final NumericDocValues dvs = numericProvider.get();
        float[] values = new float[maxDoc];
        if (sortField.getMissingValue() != null) {
          Arrays.fill(values, (Float) sortField.getMissingValue());
        }
        while (true) {
          int docID = dvs.nextDoc();
          if (docID == NO_MORE_DOCS) {
            break;
          }
          values[docID] = Float.intBitsToFloat((int) dvs.longValue());
        }

        return new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            return reverseMul * Float.compare(values[docID1], values[docID2]);
          }
        };
      }

      default:
        throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
    }
  }


  Returns a mapping from the old document ID to its new location in the sorted index. Implementations can use the auxiliary sort(int, DocComparator) to compute the old-to-new permutation given a list of documents and their corresponding values. 
A return value of null is allowed and means that
reader is already sorted.

NOTE: deleted documents are expected to appear in the mapping as
well, they will however be marked as deleted in the sorted view.
/**
   * Returns a mapping from the old document ID to its new location in the
   * sorted index. Implementations can use the auxiliary
   * {@link #sort(int, DocComparator)} to compute the old-to-new permutation
   * given a list of documents and their corresponding values.
   * <p>
   * A return value of <tt>null</tt> is allowed and means that
   * <code>reader</code> is already sorted.
   * <p>
   * <b>NOTE:</b> deleted documents are expected to appear in the mapping as
   * well, they will however be marked as deleted in the sorted view.
   */
  DocMap sort(LeafReader reader) throws IOException {
    SortField fields[] = sort.getSort();
    final DocComparator comparators[] = new DocComparator[fields.length];

    for (int i = 0; i < fields.length; i++) {
      comparators[i] = getDocComparator(reader, fields[i]);
    }
    return sort(reader.maxDoc(), comparators);
  }


  DocMap sort(int maxDoc, DocComparator[] comparators) throws IOException {
    final DocComparator comparator = new DocComparator() {
      @Override
      public int compare(int docID1, int docID2) {
        for (int i = 0; i < comparators.length; i++) {
          int comp = comparators[i].compare(docID1, docID2);
          if (comp != 0) {
            return comp;
          }
        }
        return Integer.compare(docID1, docID2); // docid order tiebreak
      }
    };

    return sort(maxDoc, comparator);
  }

  Returns the identifier of this Sorter. This identifier is similar to Object.hashCode() and should be chosen so that two instances of this class that sort documents likewise will have the same identifier. On the contrary, this identifier should be different on different sorts. /**
   * Returns the identifier of this {@link Sorter}.
   * <p>This identifier is similar to {@link Object#hashCode()} and should be
   * chosen so that two instances of this class that sort documents likewise
   * will have the same identifier. On the contrary, this identifier should be
   * different on different {@link Sort sorts}.
   */
  public String getID() {
    return sort.toString();
  }

  @Override
  public String toString() {
    return getID();
  }
  
}
/

org.apache.lucene/ lucene-core/ 8.2.0/ org/apache/lucene/index/Sorter.java