/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;


import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;

BitSet of fixed length (numBits), backed by accessible (getBits) long[], accessed with an int index, implementing Bits and DocIdSet. If you need to manage more than 2.1B bits, use LongBitSet.
@lucene.internal
/** * BitSet of fixed length (numBits), backed by accessible ({@link #getBits}) * long[], accessed with an int index, implementing {@link Bits} and * {@link DocIdSet}. If you need to manage more than 2.1B bits, use * {@link LongBitSet}. * * @lucene.internal */
public final class FixedBitSet extends BitSet implements Bits, Accountable { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class); private final long[] bits; // Array of longs holding the bits private final int numBits; // The number of bits in use private final int numWords; // The exact number of longs needed to hold numBits (<= bits.length)
If the given FixedBitSet is large enough to hold numBits+1, returns the given bits, otherwise returns a new FixedBitSet which can hold the requested number of bits.

NOTE: the returned bitset reuses the underlying long[] of the given bits if possible. Also, calling length() on the returned bits may return a value greater than numBits.

/** * If the given {@link FixedBitSet} is large enough to hold {@code numBits+1}, * returns the given bits, otherwise returns a new {@link FixedBitSet} which * can hold the requested number of bits. * <p> * <b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of * the given {@code bits} if possible. Also, calling {@link #length()} on the * returned bits may return a value greater than {@code numBits}. */
public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) { if (numBits < bits.numBits) { return bits; } else { // Depends on the ghost bits being clear! // (Otherwise, they may become visible in the new instance) int numWords = bits2words(numBits); long[] arr = bits.getBits(); if (numWords >= arr.length) { arr = ArrayUtil.grow(arr, numWords + 1); } return new FixedBitSet(arr, arr.length << 6); } }
returns the number of 64 bit words it would take to hold numBits
/** returns the number of 64 bit words it would take to hold numBits */
public static int bits2words(int numBits) { return ((numBits - 1) >> 6) + 1; // I.e.: get the word-offset of the last bit and add one (make sure to use >> so 0 returns 0!) }
Returns the popcount or cardinality of the intersection of the two sets. Neither set is modified.
/** * Returns the popcount or cardinality of the intersection of the two sets. * Neither set is modified. */
public static long intersectionCount(FixedBitSet a, FixedBitSet b) { // Depends on the ghost bits being clear! return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.min(a.numWords, b.numWords)); }
Returns the popcount or cardinality of the union of the two sets. Neither set is modified.
/** * Returns the popcount or cardinality of the union of the two sets. Neither * set is modified. */
public static long unionCount(FixedBitSet a, FixedBitSet b) { // Depends on the ghost bits being clear! long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.min(a.numWords, b.numWords)); if (a.numWords < b.numWords) { tot += BitUtil.pop_array(b.bits, a.numWords, b.numWords - a.numWords); } else if (a.numWords > b.numWords) { tot += BitUtil.pop_array(a.bits, b.numWords, a.numWords - b.numWords); } return tot; }
Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". Neither set is modified.
/** * Returns the popcount or cardinality of "a and not b" or * "intersection(a, not(b))". Neither set is modified. */
public static long andNotCount(FixedBitSet a, FixedBitSet b) { // Depends on the ghost bits being clear! long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.min(a.numWords, b.numWords)); if (a.numWords > b.numWords) { tot += BitUtil.pop_array(a.bits, b.numWords, a.numWords - b.numWords); } return tot; }
Creates a new LongBitSet. The internally allocated long array will be exactly the size needed to accommodate the numBits specified.
Params:
  • numBits – the number of bits needed
/** * Creates a new LongBitSet. * The internally allocated long array will be exactly the size needed to accommodate the numBits specified. * @param numBits the number of bits needed */
public FixedBitSet(int numBits) { this.numBits = numBits; bits = new long[bits2words(numBits)]; numWords = bits.length; }
Creates a new LongBitSet using the provided long[] array as backing store. The storedBits array must be large enough to accommodate the numBits specified, but may be larger. In that case the 'extra' or 'ghost' bits must be clear (or they may provoke spurious side-effects)
Params:
  • storedBits – the array to use as backing store
  • numBits – the number of bits actually needed
/** * Creates a new LongBitSet using the provided long[] array as backing store. * The storedBits array must be large enough to accommodate the numBits specified, but may be larger. * In that case the 'extra' or 'ghost' bits must be clear (or they may provoke spurious side-effects) * @param storedBits the array to use as backing store * @param numBits the number of bits actually needed */
public FixedBitSet(long[] storedBits, int numBits) { this.numWords = bits2words(numBits); if (numWords > storedBits.length) { throw new IllegalArgumentException("The given long array is too small to hold " + numBits + " bits"); } this.numBits = numBits; this.bits = storedBits; assert verifyGhostBitsClear(); }
Checks if the bits past numBits are clear. Some methods rely on this implicit assumption: search for "Depends on the ghost bits being clear!"
Returns:true if the bits past numBits are clear.
/** * Checks if the bits past numBits are clear. * Some methods rely on this implicit assumption: search for "Depends on the ghost bits being clear!" * @return true if the bits past numBits are clear. */
private boolean verifyGhostBitsClear() { for (int i = numWords; i < bits.length; i++) { if (bits[i] != 0) return false; } if ((numBits & 0x3f) == 0) return true; long mask = -1L << numBits; return (bits[numWords - 1] & mask) == 0; } @Override public int length() { return numBits; } @Override public long ramBytesUsed() { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits); }
Expert.
/** Expert. */
public long[] getBits() { return bits; }
Returns number of set bits. NOTE: this visits every long in the backing bits array, and the result is not internally cached!
/** Returns number of set bits. NOTE: this visits every * long in the backing bits array, and the result is not * internally cached! */
@Override public int cardinality() { // Depends on the ghost bits being clear! return (int) BitUtil.pop_array(bits, 0, numWords); } @Override public boolean get(int index) { assert index >= 0 && index < numBits: "index=" + index + ", numBits=" + numBits; int i = index >> 6; // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an explicit check. long bitmask = 1L << index; return (bits[i] & bitmask) != 0; } public void set(int index) { assert index >= 0 && index < numBits: "index=" + index + ", numBits=" + numBits; int wordNum = index >> 6; // div 64 long bitmask = 1L << index; bits[wordNum] |= bitmask; } public boolean getAndSet(int index) { assert index >= 0 && index < numBits: "index=" + index + ", numBits=" + numBits; int wordNum = index >> 6; // div 64 long bitmask = 1L << index; boolean val = (bits[wordNum] & bitmask) != 0; bits[wordNum] |= bitmask; return val; } @Override public void clear(int index) { assert index >= 0 && index < numBits: "index=" + index + ", numBits=" + numBits; int wordNum = index >> 6; long bitmask = 1L << index; bits[wordNum] &= ~bitmask; } public boolean getAndClear(int index) { assert index >= 0 && index < numBits: "index=" + index + ", numBits=" + numBits; int wordNum = index >> 6; // div 64 long bitmask = 1L << index; boolean val = (bits[wordNum] & bitmask) != 0; bits[wordNum] &= ~bitmask; return val; } @Override public int nextSetBit(int index) { // Depends on the ghost bits being clear! assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits; int i = index >> 6; long word = bits[i] >> index; // skip all the bits to the right of index if (word!=0) { return index + Long.numberOfTrailingZeros(word); } while(++i < numWords) { word = bits[i]; if (word != 0) { return (i<<6) + Long.numberOfTrailingZeros(word); } } return DocIdSetIterator.NO_MORE_DOCS; } @Override public int prevSetBit(int index) { assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits; int i = index >> 6; final int subIndex = index & 0x3f; // index within the word long word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index if (word != 0) { return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197 } while (--i >= 0) { word = bits[i]; if (word !=0 ) { return (i << 6) + 63 - Long.numberOfLeadingZeros(word); } } return -1; } @Override public void or(DocIdSetIterator iter) throws IOException { if (BitSetIterator.getFixedBitSetOrNull(iter) != null) { checkUnpositioned(iter); final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter); or(bits); } else { super.or(iter); } }
this = this OR other
/** this = this OR other */
public void or(FixedBitSet other) { or(other.bits, other.numWords); } private void or(final long[] otherArr, final int otherNumWords) { assert otherNumWords <= numWords : "numWords=" + numWords + ", otherNumWords=" + otherNumWords; final long[] thisArr = this.bits; int pos = Math.min(numWords, otherNumWords); while (--pos >= 0) { thisArr[pos] |= otherArr[pos]; } }
this = this XOR other
/** this = this XOR other */
public void xor(FixedBitSet other) { xor(other.bits, other.numWords); }
Does in-place XOR of the bits provided by the iterator.
/** Does in-place XOR of the bits provided by the iterator. */
public void xor(DocIdSetIterator iter) throws IOException { checkUnpositioned(iter); if (BitSetIterator.getFixedBitSetOrNull(iter) != null) { final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter); xor(bits); } else { int doc; while ((doc = iter.nextDoc()) < numBits) { flip(doc); } } } private void xor(long[] otherBits, int otherNumWords) { assert otherNumWords <= numWords : "numWords=" + numWords + ", other.numWords=" + otherNumWords; final long[] thisBits = this.bits; int pos = Math.min(numWords, otherNumWords); while (--pos >= 0) { thisBits[pos] ^= otherBits[pos]; } }
returns true if the sets have any elements in common
/** returns true if the sets have any elements in common */
public boolean intersects(FixedBitSet other) { // Depends on the ghost bits being clear! int pos = Math.min(numWords, other.numWords); while (--pos>=0) { if ((bits[pos] & other.bits[pos]) != 0) return true; } return false; }
this = this AND other
/** this = this AND other */
public void and(FixedBitSet other) { and(other.bits, other.numWords); } private void and(final long[] otherArr, final int otherNumWords) { final long[] thisArr = this.bits; int pos = Math.min(this.numWords, otherNumWords); while(--pos >= 0) { thisArr[pos] &= otherArr[pos]; } if (this.numWords > otherNumWords) { Arrays.fill(thisArr, otherNumWords, this.numWords, 0L); } }
this = this AND NOT other
/** this = this AND NOT other */
public void andNot(FixedBitSet other) { andNot(other.bits, other.numWords); } private void andNot(final long[] otherArr, final int otherNumWords) { final long[] thisArr = this.bits; int pos = Math.min(this.numWords, otherNumWords); while(--pos >= 0) { thisArr[pos] &= ~otherArr[pos]; } }
Scans the backing store to check if all bits are clear. The method is deliberately not called "isEmpty" to emphasize it is not low cost (as isEmpty usually is).
Returns:true if all bits are clear.
/** * Scans the backing store to check if all bits are clear. * The method is deliberately not called "isEmpty" to emphasize it is not low cost (as isEmpty usually is). * @return true if all bits are clear. */
public boolean scanIsEmpty() { // This 'slow' implementation is still faster than any external one could be // (e.g.: (bitSet.length() == 0 || bitSet.nextSetBit(0) == -1)) // especially for small BitSets // Depends on the ghost bits being clear! final int count = numWords; for (int i = 0; i < count; i++) { if (bits[i] != 0) return false; } return true; }
Flips a range of bits
Params:
  • startIndex – lower index
  • endIndex – one-past the last bit to flip
/** Flips a range of bits * * @param startIndex lower index * @param endIndex one-past the last bit to flip */
public void flip(int startIndex, int endIndex) { assert startIndex >= 0 && startIndex < numBits; assert endIndex >= 0 && endIndex <= numBits; if (endIndex <= startIndex) { return; } int startWord = startIndex >> 6; int endWord = (endIndex-1) >> 6; /*** Grrr, java shifting uses only the lower 6 bits of the count so -1L>>>64 == -1 * for that reason, make sure not to use endmask if the bits to flip will * be zero in the last word (redefine endWord to be the last changed...) long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000 long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111 ***/ long startmask = -1L << startIndex; long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex since only the lowest 6 bits are used if (startWord == endWord) { bits[startWord] ^= (startmask & endmask); return; } bits[startWord] ^= startmask; for (int i=startWord+1; i<endWord; i++) { bits[i] = ~bits[i]; } bits[endWord] ^= endmask; }
Flip the bit at the provided index.
/** Flip the bit at the provided index. */
public void flip(int index) { assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits; int wordNum = index >> 6; // div 64 long bitmask = 1L << index; // mod 64 is implicit bits[wordNum] ^= bitmask; }
Sets a range of bits
Params:
  • startIndex – lower index
  • endIndex – one-past the last bit to set
/** Sets a range of bits * * @param startIndex lower index * @param endIndex one-past the last bit to set */
public void set(int startIndex, int endIndex) { assert startIndex >= 0 && startIndex < numBits : "startIndex=" + startIndex + ", numBits=" + numBits; assert endIndex >= 0 && endIndex <= numBits : "endIndex=" + endIndex + ", numBits=" + numBits; if (endIndex <= startIndex) { return; } int startWord = startIndex >> 6; int endWord = (endIndex-1) >> 6; long startmask = -1L << startIndex; long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex since only the lowest 6 bits are used if (startWord == endWord) { bits[startWord] |= (startmask & endmask); return; } bits[startWord] |= startmask; Arrays.fill(bits, startWord+1, endWord, -1L); bits[endWord] |= endmask; } @Override public void clear(int startIndex, int endIndex) { assert startIndex >= 0 && startIndex < numBits : "startIndex=" + startIndex + ", numBits=" + numBits; assert endIndex >= 0 && endIndex <= numBits : "endIndex=" + endIndex + ", numBits=" + numBits; if (endIndex <= startIndex) { return; } int startWord = startIndex >> 6; int endWord = (endIndex-1) >> 6; long startmask = -1L << startIndex; long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex since only the lowest 6 bits are used // invert masks since we are clearing startmask = ~startmask; endmask = ~endmask; if (startWord == endWord) { bits[startWord] &= (startmask | endmask); return; } bits[startWord] &= startmask; Arrays.fill(bits, startWord+1, endWord, 0L); bits[endWord] &= endmask; } @Override public FixedBitSet clone() { long[] bits = new long[this.bits.length]; System.arraycopy(this.bits, 0, bits, 0, numWords); return new FixedBitSet(bits, numBits); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof FixedBitSet)) { return false; } FixedBitSet other = (FixedBitSet) o; if (numBits != other.numBits) { return false; } // Depends on the ghost bits being clear! return Arrays.equals(bits, other.bits); } @Override public int hashCode() { // Depends on the ghost bits being clear! long h = 0; for (int i = numWords; --i>=0;) { h ^= bits[i]; h = (h << 1) | (h >>> 63); // rotate left } // fold leftmost bits into right and add a constant to prevent // empty sets from returning 0, which is too common. return (int) ((h>>32) ^ h) + 0x98761234; }
Make a copy of the given bits.
/** * Make a copy of the given bits. */
public static FixedBitSet copyOf(Bits bits) { if (bits instanceof FixedBits) { // restore the original FixedBitSet FixedBits fixedBits = (FixedBits) bits; bits = new FixedBitSet(fixedBits.bits, fixedBits.length); } if (bits instanceof FixedBitSet) { return ((FixedBitSet)bits).clone(); } else { int length = bits.length(); FixedBitSet bitSet = new FixedBitSet(length); bitSet.set(0, length); for (int i = 0; i < length; ++i) { if (bits.get(i) == false) { bitSet.clear(i); } } return bitSet; } }
Convert this instance to read-only Bits. This is useful in the case that this FixedBitSet is returned as a Bits instance, to make sure that consumers may not get write access back by casting to a FixedBitSet. NOTE: Changes to this FixedBitSet will be reflected on the returned Bits.
/** * Convert this instance to read-only {@link Bits}. * This is useful in the case that this {@link FixedBitSet} is returned as a * {@link Bits} instance, to make sure that consumers may not get write access * back by casting to a {@link FixedBitSet}. * NOTE: Changes to this {@link FixedBitSet} will be reflected on the returned * {@link Bits}. */
public Bits asReadOnlyBits() { return new FixedBits(bits, numBits); } }