package org.apache.lucene.codecs.simpletext;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.bkd.BKDReader;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_COUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_DOC_ID;
import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_VALUE;
final class SimpleTextBKDReader extends PointValues implements Accountable {
final private byte[] splitPackedValues;
final long[] leafBlockFPs;
final private int leafNodeOffset;
final int numDataDims;
final int numIndexDims;
final int bytesPerDim;
final int bytesPerIndexEntry;
final IndexInput in;
final int maxPointsInLeafNode;
final byte[] minPackedValue;
final byte[] maxPackedValue;
final long pointCount;
final int docCount;
final int version;
protected final int packedBytesLength;
protected final int packedIndexBytesLength;
public SimpleTextBKDReader(IndexInput in, int numDataDims, int numIndexDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
byte[] minPackedValue, byte[] maxPackedValue, long pointCount, int docCount) throws IOException {
this.in = in;
this.numDataDims = numDataDims;
this.numIndexDims = numIndexDims;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.bytesPerDim = bytesPerDim;
bytesPerIndexEntry = numIndexDims == 1 ? bytesPerDim : bytesPerDim + 1;
packedBytesLength = numDataDims * bytesPerDim;
packedIndexBytesLength = numIndexDims * bytesPerDim;
this.leafNodeOffset = leafBlockFPs.length;
this.leafBlockFPs = leafBlockFPs;
this.splitPackedValues = splitPackedValues;
this.minPackedValue = minPackedValue;
this.maxPackedValue = maxPackedValue;
this.pointCount = pointCount;
this.docCount = docCount;
this.version = SimpleTextBKDWriter.VERSION_CURRENT;
assert minPackedValue.length == packedIndexBytesLength;
assert maxPackedValue.length == packedIndexBytesLength;
}
public static final class IntersectState {
final IndexInput in;
final int[] scratchDocIDs;
final byte[] scratchPackedValue;
final int[] commonPrefixLengths;
final IntersectVisitor visitor;
public IntersectState(IndexInput in, int numDims,
int packedBytesLength,
int maxPointsInLeafNode,
IntersectVisitor visitor) {
this.in = in;
this.visitor = visitor;
this.commonPrefixLengths = new int[numDims];
this.scratchDocIDs = new int[maxPointsInLeafNode];
this.scratchPackedValue = new byte[packedBytesLength];
}
}
public void intersect(IntersectVisitor visitor) throws IOException {
intersect(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
}
private void addAll(IntersectState state, int nodeID) throws IOException {
if (nodeID >= leafNodeOffset) {
visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
} else {
addAll(state, 2*nodeID);
addAll(state, 2*nodeID+1);
}
}
public IntersectState getIntersectState(IntersectVisitor visitor) {
return new IntersectState(in.clone(), numDataDims,
packedBytesLength,
maxPointsInLeafNode,
visitor);
}
public void visitLeafBlockValues(int nodeID, IntersectState state) throws IOException {
int leafID = nodeID - leafNodeOffset;
int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
}
void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
in.seek(blockFP);
readLine(in, scratch);
int count = parseInt(scratch, BLOCK_COUNT);
visitor.grow(count);
for(int i=0;i<count;i++) {
readLine(in, scratch);
visitor.visit(parseInt(scratch, BLOCK_DOC_ID));
}
}
int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
in.seek(blockFP);
readLine(in, scratch);
int count = parseInt(scratch, BLOCK_COUNT);
for(int i=0;i<count;i++) {
readLine(in, scratch);
docIDs[i] = parseInt(scratch, BLOCK_DOC_ID);
}
return count;
}
void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
visitor.grow(count);
assert scratchPackedValue.length == packedBytesLength;
BytesRefBuilder scratch = new BytesRefBuilder();
for(int i=0;i<count;i++) {
readLine(in, scratch);
assert startsWith(scratch, BLOCK_VALUE);
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(scratch, BLOCK_VALUE));
assert br.length == packedBytesLength;
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, packedBytesLength);
visitor.visit(docIDs[i], scratchPackedValue);
}
}
private void visitCompressedDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor, int compressedDim) throws IOException {
final int compressedByteOffset = compressedDim * bytesPerDim + commonPrefixLengths[compressedDim];
commonPrefixLengths[compressedDim]++;
int i;
for (i = 0; i < count; ) {
scratchPackedValue[compressedByteOffset] = in.readByte();
final int runLen = Byte.toUnsignedInt(in.readByte());
for (int j = 0; j < runLen; ++j) {
for(int dim=0;dim<numDataDims;dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
}
visitor.visit(docIDs[i+j], scratchPackedValue);
}
i += runLen;
}
if (i != count) {
throw new CorruptIndexException("Sub blocks do not add up to the expected count: " + count + " != " + i, in);
}
}
private int readCompressedDim(IndexInput in) throws IOException {
int compressedDim = in.readByte();
if (compressedDim < -1 || compressedDim >= numIndexDims) {
throw new CorruptIndexException("Got compressedDim="+compressedDim, in);
}
return compressedDim;
}
private void readCommonPrefixes(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
for(int dim=0;dim<numDataDims;dim++) {
int prefix = in.readVInt();
commonPrefixLengths[dim] = prefix;
if (prefix > 0) {
in.readBytes(scratchPackedValue, dim*bytesPerDim, prefix);
}
}
}
private void intersect(IntersectState state,
int nodeID,
byte[] cellMinPacked, byte[] cellMaxPacked)
throws IOException {
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return;
} else if (r == Relation.CELL_INSIDE_QUERY) {
addAll(state, nodeID);
return;
} else {
}
if (nodeID >= leafNodeOffset) {
int leafID = nodeID - leafNodeOffset;
if (leafID < leafBlockFPs.length) {
int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
}
} else {
int address = nodeID * bytesPerIndexEntry;
int splitDim;
if (numIndexDims == 1) {
splitDim = 0;
} else {
splitDim = splitPackedValues[address++] & 0xff;
}
assert splitDim < numIndexDims;
byte[] splitPackedValue = new byte[packedIndexBytesLength];
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
intersect(state,
2*nodeID,
cellMinPacked, splitPackedValue);
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
intersect(state,
2*nodeID+1,
splitPackedValue, cellMaxPacked);
}
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
return estimatePointCount(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
}
private long estimatePointCount(IntersectState state,
int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked) {
Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
if (r == Relation.CELL_OUTSIDE_QUERY) {
return 0L;
} else if (nodeID >= leafNodeOffset) {
return maxPointsInLeafNode;
} else {
int address = nodeID * bytesPerIndexEntry;
int splitDim;
if (numIndexDims == 1) {
splitDim = 0;
} else {
splitDim = splitPackedValues[address++] & 0xff;
}
assert splitDim < numIndexDims;
byte[] splitPackedValue = new byte[packedIndexBytesLength];
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
final long leftCost = estimatePointCount(state,
2*nodeID,
cellMinPacked, splitPackedValue);
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
final long rightCost = estimatePointCount(state,
2*nodeID+1,
splitPackedValue, cellMaxPacked);
return leftCost + rightCost;
}
}
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
int address = nodeID * bytesPerIndexEntry;
int splitDim;
if (numIndexDims == 1) {
splitDim = 0;
} else {
splitDim = splitPackedValues[address++] & 0xff;
}
assert splitDim < numIndexDims;
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(splitPackedValues) +
RamUsageEstimator.sizeOf(leafBlockFPs);
}
@Override
public byte[] getMinPackedValue() {
return minPackedValue.clone();
}
@Override
public byte[] getMaxPackedValue() {
return maxPackedValue.clone();
}
@Override
public int getNumDataDimensions() {
return numDataDims;
}
@Override
public int getNumIndexDimensions() {
return numIndexDims;
}
@Override
public int getBytesPerDimension() {
return bytesPerDim;
}
@Override
public long size() {
return pointCount;
}
@Override
public int getDocCount() {
return docCount;
}
public boolean isLeafNode(int nodeID) {
return nodeID >= leafNodeOffset;
}
private int parseInt(BytesRefBuilder scratch, BytesRef prefix) {
assert startsWith(scratch, prefix);
return Integer.parseInt(stripPrefix(scratch, prefix));
}
private String stripPrefix(BytesRefBuilder scratch, BytesRef prefix) {
return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
}
private boolean startsWith(BytesRefBuilder scratch, BytesRef prefix) {
return StringHelper.startsWith(scratch.get(), prefix);
}
private void readLine(IndexInput in, BytesRefBuilder scratch) throws IOException {
SimpleTextUtil.readLine(in, scratch);
}
}