/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;

Information about a segment such as its name, directory, and files related to the segment.
@lucene.experimental
/** * Information about a segment such as its name, directory, and files related * to the segment. * * @lucene.experimental */
public final class SegmentInfo { // TODO: remove these from this class, for now this is the representation
Used by some member fields to mean not present (e.g., norms, deletions).
/** Used by some member fields to mean not present (e.g., * norms, deletions). */
public static final int NO = -1; // e.g. no norms; no deletes;
Used by some member fields to mean present (e.g., norms, deletions).
/** Used by some member fields to mean present (e.g., * norms, deletions). */
public static final int YES = 1; // e.g. have norms; have deletes;
Unique segment name in the directory.
/** Unique segment name in the directory. */
public final String name; private int maxDoc; // number of docs in seg
Where this segment resides.
/** Where this segment resides. */
public final Directory dir; private boolean isCompoundFile;
Id that uniquely identifies this segment.
/** Id that uniquely identifies this segment. */
private final byte[] id; private Codec codec; private Map<String,String> diagnostics; private Map<String,String> attributes; private final Sort indexSort; // Tracks the Lucene version this segment was created with, since 3.1. Null // indicates an older than 3.0 index, and it's used to detect a too old index. // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and // specific versions afterwards ("3.0.0", "3.1.0" etc.). // see o.a.l.util.Version. private final Version version; // Tracks the minimum version that contributed documents to a segment. For // flush segments, that is the version that wrote it. For merged segments, // this is the minimum minVersion of all the segments that have been merged // into this segment Version minVersion; void setDiagnostics(Map<String, String> diagnostics) { this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics))); }
Returns diagnostics saved into the segment when it was written. The map is immutable.
/** Returns diagnostics saved into the segment when it was * written. The map is immutable. */
public Map<String, String> getDiagnostics() { return diagnostics; }
Construct a new complete SegmentInfo instance from input.

Note: this is public only to allow access from the codecs package.

/** * Construct a new complete SegmentInfo instance from input. * <p>Note: this is public only to allow access from * the codecs package.</p> */
public SegmentInfo(Directory dir, Version version, Version minVersion, String name, int maxDoc, boolean isCompoundFile, Codec codec, Map<String,String> diagnostics, byte[] id, Map<String,String> attributes, Sort indexSort) { assert !(dir instanceof TrackingDirectoryWrapper); this.dir = Objects.requireNonNull(dir); this.version = Objects.requireNonNull(version); this.minVersion = minVersion; this.name = Objects.requireNonNull(name); this.maxDoc = maxDoc; this.isCompoundFile = isCompoundFile; this.codec = codec; this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics))); this.id = id; if (id.length != StringHelper.ID_LENGTH) { throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); } this.attributes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(attributes))); this.indexSort = indexSort; }
Mark whether this segment is stored as a compound file.
Params:
  • isCompoundFile – true if this is a compound file; else, false
/** * Mark whether this segment is stored as a compound file. * * @param isCompoundFile true if this is a compound file; * else, false */
void setUseCompoundFile(boolean isCompoundFile) { this.isCompoundFile = isCompoundFile; }
Returns true if this segment is stored as a compound file; else, false.
/** * Returns true if this segment is stored as a compound * file; else, false. */
public boolean getUseCompoundFile() { return isCompoundFile; }
Can only be called once.
/** Can only be called once. */
public void setCodec(Codec codec) { assert this.codec == null; if (codec == null) { throw new IllegalArgumentException("codec must be non-null"); } this.codec = codec; }
Return Codec that wrote this segment.
/** Return {@link Codec} that wrote this segment. */
public Codec getCodec() { return codec; }
Returns number of documents in this segment (deletions are not taken into account).
/** Returns number of documents in this segment (deletions * are not taken into account). */
public int maxDoc() { if (this.maxDoc == -1) { throw new IllegalStateException("maxDoc isn't set yet"); } return maxDoc; } // NOTE: leave package private void setMaxDoc(int maxDoc) { if (this.maxDoc != -1) { throw new IllegalStateException("maxDoc was already set: this.maxDoc=" + this.maxDoc + " vs maxDoc=" + maxDoc); } this.maxDoc = maxDoc; }
Return all files referenced by this SegmentInfo.
/** Return all files referenced by this SegmentInfo. */
public Set<String> files() { if (setFiles == null) { throw new IllegalStateException("files were not computed yet; segment=" + name + " maxDoc=" + maxDoc); } return Collections.unmodifiableSet(setFiles); } @Override public String toString() { return toString(0); }
Used for debugging. Format may suddenly change.

Current format looks like _a(3.1):c45/4:[sorter=<long: "timestamp">!], which means the segment's name is _a; it was created with Lucene 3.1 (or '?' if it's unknown); it's using compound file format (would be C if not compound); it has 45 documents; it has 4 deletions (this part is left off when there are no deletions); it is sorted by the timestamp field in descending order (this part is omitted for unsorted segments).

/** Used for debugging. Format may suddenly change. * * <p>Current format looks like * <code>_a(3.1):c45/4:[sorter=&lt;long: "timestamp"&gt;!]</code>, which means * the segment's name is <code>_a</code>; it was created with Lucene 3.1 (or * '?' if it's unknown); it's using compound file * format (would be <code>C</code> if not compound); it * has 45 documents; it has 4 deletions (this part is * left off when there are no deletions); it is sorted by the timestamp field * in descending order (this part is omitted for unsorted segments).</p> */
public String toString(int delCount) { StringBuilder s = new StringBuilder(); s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':'); char cfs = getUseCompoundFile() ? 'c' : 'C'; s.append(cfs); s.append(maxDoc); if (delCount != 0) { s.append('/').append(delCount); } if (indexSort != null) { s.append(":[indexSort="); s.append(indexSort); s.append(']'); } if (!diagnostics.isEmpty()) { s.append(":[diagnostics="); s.append(diagnostics.toString()); s.append(']'); } if (!attributes.isEmpty()) { s.append(":[attributes="); s.append(attributes.toString()); s.append(']'); } return s.toString(); }
We consider another SegmentInfo instance equal if it has the same dir and same name.
/** We consider another SegmentInfo instance equal if it * has the same dir and same name. */
@Override public boolean equals(Object obj) { if (this == obj) return true; if (obj instanceof SegmentInfo) { final SegmentInfo other = (SegmentInfo) obj; return other.dir == dir && other.name.equals(name); } else { return false; } } @Override public int hashCode() { return dir.hashCode() + name.hashCode(); }
Returns the version of the code which wrote the segment.
/** Returns the version of the code which wrote the segment. */
public Version getVersion() { return version; }
Return the minimum Lucene version that contributed documents to this segment, or null if it is unknown.
/** * Return the minimum Lucene version that contributed documents to this * segment, or {@code null} if it is unknown. */
public Version getMinVersion() { return minVersion; }
Return the id that uniquely identifies this segment.
/** Return the id that uniquely identifies this segment. */
public byte[] getId() { return id.clone(); } private Set<String> setFiles;
Sets the files written for this segment.
/** Sets the files written for this segment. */
public void setFiles(Collection<String> files) { setFiles = new HashSet<>(); addFiles(files); }
Add these files to the set of files written for this segment.
/** Add these files to the set of files written for this * segment. */
public void addFiles(Collection<String> files) { checkFileNames(files); for (String f : files) { setFiles.add(namedForThisSegment(f)); } }
Add this file to the set of files written for this segment.
/** Add this file to the set of files written for this * segment. */
public void addFile(String file) { checkFileNames(Collections.singleton(file)); setFiles.add(namedForThisSegment(file)); } private void checkFileNames(Collection<String> files) { Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher(""); for (String file : files) { m.reset(file); if (!m.matches()) { throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern()); } if (file.toLowerCase(Locale.ROOT).endsWith(".tmp")) { throw new IllegalArgumentException("invalid codec filename '" + file + "', cannot end with .tmp extension"); } } }
strips any segment name from the file, naming it with this segment this is because "segment names" can change, e.g. by addIndexes(Dir)
/** * strips any segment name from the file, naming it with this segment * this is because "segment names" can change, e.g. by addIndexes(Dir) */
String namedForThisSegment(String file) { return name + IndexFileNames.stripSegmentName(file); }
Get a codec attribute value, or null if it does not exist
/** * Get a codec attribute value, or null if it does not exist */
public String getAttribute(String key) { return attributes.get(key); }
Puts a codec attribute value.

This is a key-value mapping for the field that the codec can use to store additional metadata, and will be available to the codec when reading the segment via getAttribute(String)

If a value already exists for the field, it will be replaced with the new value. This method make a copy on write for every attribute change.

/** * Puts a codec attribute value. * <p> * This is a key-value mapping for the field that the codec can use to store * additional metadata, and will be available to the codec when reading the * segment via {@link #getAttribute(String)} * <p> * If a value already exists for the field, it will be replaced with the new * value. * This method make a copy on write for every attribute change. */
public String putAttribute(String key, String value) { HashMap<String, String> newMap = new HashMap<>(attributes); String oldValue = newMap.put(key, value); // we make a full copy of this to prevent concurrent modifications to this in the toString method // this method is only called when a segment is written but the SegmentInfo might be exposed // in running merges which can cause ConcurrentModificationExceptions if we modify / share // the same instance. Technically that's an unsafe publication but IW design would require // significant changes to prevent this. On the other hand, since we expose the map in getAttributes() // it's a good design to make it unmodifiable anyway. attributes = Collections.unmodifiableMap(newMap); return oldValue; }
Returns the internal codec attributes map.
Returns:internal codec attributes map.
/** * Returns the internal codec attributes map. * @return internal codec attributes map. */
public Map<String,String> getAttributes() { return attributes; }
Return the sort order of this segment, or null if the index has no sort.
/** Return the sort order of this segment, or null if the index has no sort. */
public Sort getIndexSort() { return indexSort; } }