/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.document;


import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;

Expert: directly create a field for a document. Most users should use one of the sugar subclasses:

A field is a section of a Document. Each field has three parts: name, type and value. Values may be text (String, Reader or pre-analyzed TokenStream), binary (byte[]), or numeric (a Number). Fields are optionally stored in the index, so that they may be returned with hits on the document.

NOTE: the field type is an IndexableFieldType. Making changes to the state of the IndexableFieldType will impact any Field it is used in. It is strongly recommended that no changes be made after Field instantiation.

/** * Expert: directly create a field for a document. Most * users should use one of the sugar subclasses: * <ul> * <li>{@link TextField}: {@link Reader} or {@link String} indexed for full-text search * <li>{@link StringField}: {@link String} indexed verbatim as a single token * <li>{@link IntPoint}: {@code int} indexed for exact/range queries. * <li>{@link LongPoint}: {@code long} indexed for exact/range queries. * <li>{@link FloatPoint}: {@code float} indexed for exact/range queries. * <li>{@link DoublePoint}: {@code double} indexed for exact/range queries. * <li>{@link SortedDocValuesField}: {@code byte[]} indexed column-wise for sorting/faceting * <li>{@link SortedSetDocValuesField}: {@code SortedSet<byte[]>} indexed column-wise for sorting/faceting * <li>{@link NumericDocValuesField}: {@code long} indexed column-wise for sorting/faceting * <li>{@link SortedNumericDocValuesField}: {@code SortedSet<long>} indexed column-wise for sorting/faceting * <li>{@link StoredField}: Stored-only value for retrieving in summary results * </ul> * * <p> A field is a section of a Document. Each field has three * parts: name, type and value. Values may be text * (String, Reader or pre-analyzed TokenStream), binary * (byte[]), or numeric (a Number). Fields are optionally stored in the * index, so that they may be returned with hits on the document. * * <p> * NOTE: the field type is an {@link IndexableFieldType}. Making changes * to the state of the IndexableFieldType will impact any * Field it is used in. It is strongly recommended that no * changes be made after Field instantiation. */
public class Field implements IndexableField {
Field's type
/** * Field's type */
protected final IndexableFieldType type;
Field's name
/** * Field's name */
protected final String name;
Field's value
/** Field's value */
protected Object fieldsData;
Pre-analyzed tokenStream for indexed fields; this is separate from fieldsData because you are allowed to have both; eg maybe field has a String value but you customize how it's tokenized
/** Pre-analyzed tokenStream for indexed fields; this is * separate from fieldsData because you are allowed to * have both; eg maybe field has a String value but you * customize how it's tokenized */
protected TokenStream tokenStream;
Expert: creates a field with no initial value. Intended only for custom Field subclasses.
Params:
  • name – field name
  • type – field type
Throws:
/** * Expert: creates a field with no initial value. * Intended only for custom Field subclasses. * @param name field name * @param type field type * @throws IllegalArgumentException if either the name or type * is null. */
protected Field(String name, IndexableFieldType type) { if (name == null) { throw new IllegalArgumentException("name must not be null"); } this.name = name; if (type == null) { throw new IllegalArgumentException("type must not be null"); } this.type = type; }
Create field with Reader value.
Params:
  • name – field name
  • reader – reader value
  • type – field type
Throws:
/** * Create field with Reader value. * @param name field name * @param reader reader value * @param type field type * @throws IllegalArgumentException if either the name or type * is null, or if the field's type is stored(), or * if tokenized() is false. * @throws NullPointerException if the reader is null */
public Field(String name, Reader reader, IndexableFieldType type) { if (name == null) { throw new IllegalArgumentException("name must not be null"); } if (type == null) { throw new IllegalArgumentException("type must not be null"); } if (reader == null) { throw new NullPointerException("reader must not be null"); } if (type.stored()) { throw new IllegalArgumentException("fields with a Reader value cannot be stored"); } if (type.indexOptions() != IndexOptions.NONE && !type.tokenized()) { throw new IllegalArgumentException("non-tokenized fields must use String values"); } this.name = name; this.fieldsData = reader; this.type = type; }
Create field with TokenStream value.
Params:
  • name – field name
  • tokenStream – TokenStream value
  • type – field type
Throws:
/** * Create field with TokenStream value. * @param name field name * @param tokenStream TokenStream value * @param type field type * @throws IllegalArgumentException if either the name or type * is null, or if the field's type is stored(), or * if tokenized() is false, or if indexed() is false. * @throws NullPointerException if the tokenStream is null */
public Field(String name, TokenStream tokenStream, IndexableFieldType type) { if (name == null) { throw new IllegalArgumentException("name must not be null"); } if (tokenStream == null) { throw new NullPointerException("tokenStream must not be null"); } if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) { throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized"); } if (type.stored()) { throw new IllegalArgumentException("TokenStream fields cannot be stored"); } this.name = name; this.fieldsData = null; this.tokenStream = tokenStream; this.type = type; }
Create field with binary value.

NOTE: the provided byte[] is not copied so be sure not to change it until you're done with this field.

Params:
  • name – field name
  • value – byte array pointing to binary content (not copied)
  • type – field type
Throws:
/** * Create field with binary value. * * <p>NOTE: the provided byte[] is not copied so be sure * not to change it until you're done with this field. * @param name field name * @param value byte array pointing to binary content (not copied) * @param type field type * @throws IllegalArgumentException if the field name, value or type * is null, or the field's type is indexed(). */
public Field(String name, byte[] value, IndexableFieldType type) { this(name, value, 0, value.length, type); }
Create field with binary value.

NOTE: the provided byte[] is not copied so be sure not to change it until you're done with this field.

Params:
  • name – field name
  • value – byte array pointing to binary content (not copied)
  • offset – starting position of the byte array
  • length – valid length of the byte array
  • type – field type
Throws:
/** * Create field with binary value. * * <p>NOTE: the provided byte[] is not copied so be sure * not to change it until you're done with this field. * @param name field name * @param value byte array pointing to binary content (not copied) * @param offset starting position of the byte array * @param length valid length of the byte array * @param type field type * @throws IllegalArgumentException if the field name, value or type * is null, or the field's type is indexed(). */
public Field(String name, byte[] value, int offset, int length, IndexableFieldType type) { this(name, value != null ? new BytesRef(value, offset, length) : null, type); }
Create field with binary value.

NOTE: the provided BytesRef is not copied so be sure not to change it until you're done with this field.

Params:
  • name – field name
  • bytes – BytesRef pointing to binary content (not copied)
  • type – field type
Throws:
/** * Create field with binary value. * * <p>NOTE: the provided BytesRef is not copied so be sure * not to change it until you're done with this field. * @param name field name * @param bytes BytesRef pointing to binary content (not copied) * @param type field type * @throws IllegalArgumentException if the field name, bytes or type * is null, or the field's type is indexed(). */
public Field(String name, BytesRef bytes, IndexableFieldType type) { if (name == null) { throw new IllegalArgumentException("name must not be null"); } if (bytes == null) { throw new IllegalArgumentException("bytes must not be null"); } if (type == null) { throw new IllegalArgumentException("type must not be null"); } this.name = name; this.fieldsData = bytes; this.type = type; } // TODO: allow direct construction of int, long, float, double value too..?
Create field with String value.
Params:
  • name – field name
  • value – string value
  • type – field type
Throws:
  • IllegalArgumentException – if either the name, value or type is null, or if the field's type is neither indexed() nor stored(), or if indexed() is false but storeTermVectors() is true.
/** * Create field with String value. * @param name field name * @param value string value * @param type field type * @throws IllegalArgumentException if either the name, value or type * is null, or if the field's type is neither indexed() nor stored(), * or if indexed() is false but storeTermVectors() is true. */
public Field(String name, CharSequence value, IndexableFieldType type) { if (name == null) { throw new IllegalArgumentException("name must not be null"); } if (value == null) { throw new IllegalArgumentException("value must not be null"); } if (type == null) { throw new IllegalArgumentException("type must not be null"); } if (!type.stored() && type.indexOptions() == IndexOptions.NONE) { throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); } this.name = name; this.fieldsData = value; this.type = type; }
The value of the field as a String, or null. If null, the Reader value or binary value is used. Exactly one of stringValue(), readerValue(), and binaryValue() must be set.
/** * The value of the field as a String, or null. If null, the Reader value or * binary value is used. Exactly one of stringValue(), readerValue(), and * binaryValue() must be set. */
@Override public String stringValue() { if (fieldsData instanceof CharSequence || fieldsData instanceof Number) { return fieldsData.toString(); } else { return null; } } @Override public CharSequence getCharSequenceValue() { return fieldsData instanceof CharSequence ? (CharSequence) fieldsData : stringValue(); }
The value of the field as a Reader, or null. If null, the String value or binary value is used. Exactly one of stringValue(), readerValue(), and binaryValue() must be set.
/** * The value of the field as a Reader, or null. If null, the String value or * binary value is used. Exactly one of stringValue(), readerValue(), and * binaryValue() must be set. */
@Override public Reader readerValue() { return fieldsData instanceof Reader ? (Reader) fieldsData : null; }
The TokenStream for this field to be used when indexing, or null. If null, the Reader value or String value is analyzed to produce the indexed tokens.
/** * The TokenStream for this field to be used when indexing, or null. If null, * the Reader value or String value is analyzed to produce the indexed tokens. */
public TokenStream tokenStreamValue() { return tokenStream; }

Expert: change the value of this field. This can be used during indexing to re-use a single Field instance to improve indexing speed by avoiding GC cost of new'ing and reclaiming Field instances. Typically a single Document instance is re-used as well. This helps most on small documents.

Each Field instance should only be used once within a single Document instance. See ImproveIndexingSpeed for details.

/** * <p> * Expert: change the value of this field. This can be used during indexing to * re-use a single Field instance to improve indexing speed by avoiding GC * cost of new'ing and reclaiming Field instances. Typically a single * {@link Document} instance is re-used as well. This helps most on small * documents. * </p> * * <p> * Each Field instance should only be used once within a single * {@link Document} instance. See <a * href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed" * >ImproveIndexingSpeed</a> for details. * </p> */
public void setStringValue(String value) { if (!(fieldsData instanceof String)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to String"); } if (value == null) { throw new IllegalArgumentException("value must not be null"); } fieldsData = value; }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setReaderValue(Reader value) { if (!(fieldsData instanceof Reader)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Reader"); } fieldsData = value; }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setBytesValue(byte[] value) { setBytesValue(new BytesRef(value)); }
Expert: change the value of this field. See setStringValue(String).

NOTE: the provided BytesRef is not copied so be sure not to change it until you're done with this field.

/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. * * <p>NOTE: the provided BytesRef is not copied so be sure * not to change it until you're done with this field. */
public void setBytesValue(BytesRef value) { if (!(fieldsData instanceof BytesRef)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to BytesRef"); } if (type.indexOptions() != IndexOptions.NONE) { throw new IllegalArgumentException("cannot set a BytesRef value on an indexed field"); } if (value == null) { throw new IllegalArgumentException("value must not be null"); } fieldsData = value; }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setByteValue(byte value) { if (!(fieldsData instanceof Byte)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Byte"); } fieldsData = Byte.valueOf(value); }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setShortValue(short value) { if (!(fieldsData instanceof Short)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Short"); } fieldsData = Short.valueOf(value); }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setIntValue(int value) { if (!(fieldsData instanceof Integer)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer"); } fieldsData = Integer.valueOf(value); }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setLongValue(long value) { if (!(fieldsData instanceof Long)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Long"); } fieldsData = Long.valueOf(value); }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setFloatValue(float value) { if (!(fieldsData instanceof Float)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Float"); } fieldsData = Float.valueOf(value); }
Expert: change the value of this field. See setStringValue(String).
/** * Expert: change the value of this field. See * {@link #setStringValue(String)}. */
public void setDoubleValue(double value) { if (!(fieldsData instanceof Double)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Double"); } fieldsData = Double.valueOf(value); }
Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. May be combined with stored values from stringValue() or binaryValue()
/** * Expert: sets the token stream to be used for indexing and causes * isIndexed() and isTokenized() to return true. May be combined with stored * values from stringValue() or binaryValue() */
public void setTokenStream(TokenStream tokenStream) { if (type.indexOptions() == IndexOptions.NONE || !type.tokenized()) { throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized"); } this.tokenStream = tokenStream; } @Override public String name() { return name; } @Override public Number numericValue() { if (fieldsData instanceof Number) { return (Number) fieldsData; } else { return null; } } @Override public BytesRef binaryValue() { if (fieldsData instanceof BytesRef) { return (BytesRef) fieldsData; } else { return null; } }
Prints a Field for human consumption.
/** Prints a Field for human consumption. */
@Override public String toString() { StringBuilder result = new StringBuilder(); result.append(type.toString()); result.append('<'); result.append(name); result.append(':'); if (fieldsData != null) { result.append(fieldsData); } result.append('>'); return result.toString(); }
Returns the FieldType for this field.
/** Returns the {@link FieldType} for this field. */
@Override public IndexableFieldType fieldType() { return type; } @Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { if (fieldType().indexOptions() == IndexOptions.NONE) { // Not indexed return null; } if (!fieldType().tokenized()) { if (stringValue() != null) { if (!(reuse instanceof StringTokenStream)) { // lazy init the TokenStream as it is heavy to instantiate // (attributes,...) if not needed reuse = new StringTokenStream(); } ((StringTokenStream) reuse).setValue(stringValue()); return reuse; } else if (binaryValue() != null) { if (!(reuse instanceof BinaryTokenStream)) { // lazy init the TokenStream as it is heavy to instantiate // (attributes,...) if not needed reuse = new BinaryTokenStream(); } ((BinaryTokenStream) reuse).setValue(binaryValue()); return reuse; } else { throw new IllegalArgumentException("Non-Tokenized Fields must have a String value"); } } if (tokenStream != null) { return tokenStream; } else if (readerValue() != null) { return analyzer.tokenStream(name(), readerValue()); } else if (stringValue() != null) { return analyzer.tokenStream(name(), stringValue()); } throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value; got " + this); } private static final class BinaryTokenStream extends TokenStream { private final BytesTermAttribute bytesAtt = addAttribute(BytesTermAttribute.class); private boolean used = true; private BytesRef value;
Creates a new TokenStream that returns a BytesRef as single token.

Warning: Does not initialize the value, you must call setValue(BytesRef) afterwards!

/** Creates a new TokenStream that returns a BytesRef as single token. * <p>Warning: Does not initialize the value, you must call * {@link #setValue(BytesRef)} afterwards! */
BinaryTokenStream() { } public void setValue(BytesRef value) { this.value = value; } @Override public boolean incrementToken() { if (used) { return false; } clearAttributes(); bytesAtt.setBytesRef(value); used = true; return true; } @Override public void reset() { used = false; } @Override public void close() { value = null; } } private static final class StringTokenStream extends TokenStream { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private boolean used = true; private String value = null;
Creates a new TokenStream that returns a String as single token.

Warning: Does not initialize the value, you must call setValue(String) afterwards!

/** Creates a new TokenStream that returns a String as single token. * <p>Warning: Does not initialize the value, you must call * {@link #setValue(String)} afterwards! */
StringTokenStream() { }
Sets the string value.
/** Sets the string value. */
void setValue(String value) { this.value = value; } @Override public boolean incrementToken() { if (used) { return false; } clearAttributes(); termAttribute.append(value); offsetAttribute.setOffset(0, value.length()); used = true; return true; } @Override public void end() throws IOException { super.end(); final int finalOffset = value.length(); offsetAttribute.setOffset(finalOffset, finalOffset); } @Override public void reset() { used = false; } @Override public void close() { value = null; } }
Specifies whether and how a field should be stored.
/** Specifies whether and how a field should be stored. */
public static enum Store {
Store the original field value in the index. This is useful for short texts like a document's title which should be displayed with the results. The value is stored in its original form, i.e. no analyzer is used before it is stored.
/** Store the original field value in the index. This is useful for short texts * like a document's title which should be displayed with the results. The * value is stored in its original form, i.e. no analyzer is used before it is * stored. */
YES,
Do not store the field value in the index.
/** Do not store the field value in the index. */
NO } }