/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util.fst;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.IntsRef; // javadocs
import org.apache.lucene.util.RamUsageEstimator;

Wraps another Outputs implementation and encodes one or more of its output values. You can use this when a single input may need to map to more than one output, maintaining order: pass the same input with a different output by calling Builder.add(IntsRef, Object) multiple times. The builder will then combine the outputs using the Outputs.merge(Object, Object) method.

The resulting FST may not be minimal when an input has more than one output, as this requires pushing all multi-output values to a final state.

NOTE: the only way to create multiple outputs is to add the same input to the FST multiple times in a row. This is how the FST maps a single input to multiple outputs (e.g. you cannot pass a List<Object> to Builder.add). If your outputs are longs, and you need at most 2, then use UpToTwoPositiveIntOutputs instead since it stores the outputs more compactly (by stealing a bit from each long value).

NOTE: this cannot wrap itself (ie you cannot make an FST with List<List<Object>> outputs using this).

@lucene.experimental
/** * Wraps another Outputs implementation and encodes one or * more of its output values. You can use this when a single * input may need to map to more than one output, * maintaining order: pass the same input with a different * output by calling {@link Builder#add(IntsRef,Object)} multiple * times. The builder will then combine the outputs using * the {@link Outputs#merge(Object,Object)} method. * * <p>The resulting FST may not be minimal when an input has * more than one output, as this requires pushing all * multi-output values to a final state. * * <p>NOTE: the only way to create multiple outputs is to * add the same input to the FST multiple times in a row. This is * how the FST maps a single input to multiple outputs (e.g. you * cannot pass a List&lt;Object&gt; to {@link Builder#add}). If * your outputs are longs, and you need at most 2, then use * {@link UpToTwoPositiveIntOutputs} instead since it stores * the outputs more compactly (by stealing a bit from each * long value). * * <p>NOTE: this cannot wrap itself (ie you cannot make an * FST with List&lt;List&lt;Object&gt;&gt; outputs using this). * * @lucene.experimental */
// NOTE: i think we could get a more compact FST if, instead // of adding the same input multiple times with a different // output each time, we added it only once with a // pre-constructed List<T> output. This way the "multiple // values" is fully opaque to the Builder/FST. It would // require implementing the full algebra using set // arithmetic (I think?); maybe SetOfOutputs is a good name. @SuppressWarnings("unchecked") public final class ListOfOutputs<T> extends Outputs<Object> { private final Outputs<T> outputs; public ListOfOutputs(Outputs<T> outputs) { this.outputs = outputs; } @Override public Object common(Object output1, Object output2) { // These will never be a list: return outputs.common((T) output1, (T) output2); } @Override public Object subtract(Object object, Object inc) { // These will never be a list: return outputs.subtract((T) object, (T) inc); } @Override public Object add(Object prefix, Object output) { assert !(prefix instanceof List); if (!(output instanceof List)) { return outputs.add((T) prefix, (T) output); } else { List<T> outputList = (List<T>) output; List<T> addedList = new ArrayList<>(outputList.size()); for(T _output : outputList) { addedList.add(outputs.add((T) prefix, _output)); } return addedList; } } @Override public void write(Object output, DataOutput out) throws IOException { assert !(output instanceof List); outputs.write((T) output, out); } @Override public void writeFinalOutput(Object output, DataOutput out) throws IOException { if (!(output instanceof List)) { out.writeVInt(1); outputs.write((T) output, out); } else { List<T> outputList = (List<T>) output; out.writeVInt(outputList.size()); for(T eachOutput : outputList) { outputs.write(eachOutput, out); } } } @Override public Object read(DataInput in) throws IOException { return outputs.read(in); } @Override public void skipOutput(DataInput in) throws IOException { outputs.skipOutput(in); } @Override public Object readFinalOutput(DataInput in) throws IOException { int count = in.readVInt(); if (count == 1) { return outputs.read(in); } else { List<T> outputList = new ArrayList<>(count); for(int i=0;i<count;i++) { outputList.add(outputs.read(in)); } return outputList; } } @Override public void skipFinalOutput(DataInput in) throws IOException { int count = in.readVInt(); for(int i=0;i<count;i++) { outputs.skipOutput(in); } } @Override public Object getNoOutput() { return outputs.getNoOutput(); } @Override public String outputToString(Object output) { if (!(output instanceof List)) { return outputs.outputToString((T) output); } else { List<T> outputList = (List<T>) output; StringBuilder b = new StringBuilder(); b.append('['); for(int i=0;i<outputList.size();i++) { if (i > 0) { b.append(", "); } b.append(outputs.outputToString(outputList.get(i))); } b.append(']'); return b.toString(); } } @Override public Object merge(Object first, Object second) { List<T> outputList = new ArrayList<>(); if (!(first instanceof List)) { outputList.add((T) first); } else { outputList.addAll((List<T>) first); } if (!(second instanceof List)) { outputList.add((T) second); } else { outputList.addAll((List<T>) second); } //System.out.println("MERGE: now " + outputList.size() + " first=" + outputToString(first) + " second=" + outputToString(second)); //System.out.println(" return " + outputToString(outputList)); return outputList; } @Override public String toString() { return "OneOrMoreOutputs(" + outputs + ")"; } public List<T> asList(Object output) { if (!(output instanceof List)) { List<T> result = new ArrayList<>(1); result.add((T) output); return result; } else { return (List<T>) output; } } private static final long BASE_LIST_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new ArrayList<Object>()); @Override public long ramBytesUsed(Object output) { long bytes = 0; if (output instanceof List) { bytes += BASE_LIST_NUM_BYTES; List<T> outputList = (List<T>) output; for(T _output : outputList) { bytes += outputs.ramBytesUsed(_output); } // 2 * to allow for ArrayList's oversizing: bytes += 2 * outputList.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF; } else { bytes += outputs.ramBytesUsed((T) output); } return bytes; } }