/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.uhighlight;

import java.text.BreakIterator;
import java.text.CharacterIterator;

A BreakIterator that breaks the text whenever a certain separator, provided as a constructor argument, is found.
/** * A {@link BreakIterator} that breaks the text whenever a certain separator, provided as a constructor argument, is found. */
public final class CustomSeparatorBreakIterator extends BreakIterator { private final char separator; private CharacterIterator text; private int current; public CustomSeparatorBreakIterator(char separator) { this.separator = separator; } @Override public int current() { return current; } @Override public int first() { text.setIndex(text.getBeginIndex()); return current = text.getIndex(); } @Override public int last() { text.setIndex(text.getEndIndex()); return current = text.getIndex(); } @Override public int next() { if (text.getIndex() == text.getEndIndex()) { return DONE; } else { return advanceForward(); } } private int advanceForward() { char c; while ((c = text.next()) != CharacterIterator.DONE) { if (c == separator) { return current = text.getIndex() + 1; } } assert text.getIndex() == text.getEndIndex(); return current = text.getIndex(); } @Override public int following(int pos) { if (pos < text.getBeginIndex() || pos > text.getEndIndex()) { throw new IllegalArgumentException("offset out of bounds"); } else if (pos == text.getEndIndex()) { // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something) // https://bugs.openjdk.java.net/browse/JDK-8015110 text.setIndex(text.getEndIndex()); current = text.getIndex(); return DONE; } else { text.setIndex(pos); current = text.getIndex(); return advanceForward(); } } @Override public int previous() { if (text.getIndex() == text.getBeginIndex()) { return DONE; } else { return advanceBackward(); } } private int advanceBackward() { char c; while ((c = text.previous()) != CharacterIterator.DONE) { if (c == separator) { return current = text.getIndex() + 1; } } assert text.getIndex() == text.getBeginIndex(); return current = text.getIndex(); } @Override public int preceding(int pos) { if (pos < text.getBeginIndex() || pos > text.getEndIndex()) { throw new IllegalArgumentException("offset out of bounds"); } else if (pos == text.getBeginIndex()) { // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something) // https://bugs.openjdk.java.net/browse/JDK-8015110 text.setIndex(text.getBeginIndex()); current = text.getIndex(); return DONE; } else { text.setIndex(pos); current = text.getIndex(); return advanceBackward(); } } @Override public int next(int n) { if (n < 0) { for (int i = 0; i < -n; i++) { previous(); } } else { for (int i = 0; i < n; i++) { next(); } } return current(); } @Override public CharacterIterator getText() { return text; } @Override public void setText(CharacterIterator newText) { text = newText; current = text.getBeginIndex(); } }