/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.ja;


import org.apache.lucene.analysis.ja.JapaneseTokenizer.Type;
import org.apache.lucene.analysis.ja.dict.Dictionary;

Analyzed token with morphological data from its dictionary.
/** * Analyzed token with morphological data from its dictionary. */
public class Token { private final Dictionary dictionary; private final int wordId; private final char[] surfaceForm; private final int offset; private final int length; private final int position; private int positionLength; private final Type type; public Token(int wordId, char[] surfaceForm, int offset, int length, Type type, int position, Dictionary dictionary) { this.wordId = wordId; this.surfaceForm = surfaceForm; this.offset = offset; this.length = length; this.type = type; this.position = position; this.dictionary = dictionary; } @Override public String toString() { return "Token(\"" + new String(surfaceForm, offset, length) + "\" pos=" + position + " length=" + length + " posLen=" + positionLength + " type=" + type + " wordId=" + wordId + " leftID=" + dictionary.getLeftId(wordId) + ")"; }
Returns:surfaceForm
/** * @return surfaceForm */
public char[] getSurfaceForm() { return surfaceForm; }
Returns:offset into surfaceForm
/** * @return offset into surfaceForm */
public int getOffset() { return offset; }
Returns:length of surfaceForm
/** * @return length of surfaceForm */
public int getLength() { return length; }
Returns:surfaceForm as a String
/** * @return surfaceForm as a String */
public String getSurfaceFormString() { return new String(surfaceForm, offset, length); }
Returns:reading. null if token doesn't have reading.
/** * @return reading. null if token doesn't have reading. */
public String getReading() { return dictionary.getReading(wordId, surfaceForm, offset, length); }
Returns:pronunciation. null if token doesn't have pronunciation.
/** * @return pronunciation. null if token doesn't have pronunciation. */
public String getPronunciation() { return dictionary.getPronunciation(wordId, surfaceForm, offset, length); }
Returns:part of speech.
/** * @return part of speech. */
public String getPartOfSpeech() { return dictionary.getPartOfSpeech(wordId); }
Returns:inflection type or null
/** * @return inflection type or null */
public String getInflectionType() { return dictionary.getInflectionType(wordId); }
Returns:inflection form or null
/** * @return inflection form or null */
public String getInflectionForm() { return dictionary.getInflectionForm(wordId); }
Returns:base form or null if token is not inflected
/** * @return base form or null if token is not inflected */
public String getBaseForm() { return dictionary.getBaseForm(wordId, surfaceForm, offset, length); }
Returns the type of this token
Returns:token type, not null
/** * Returns the type of this token * @return token type, not null */
public Type getType() { return type; }
Returns true if this token is known word
Returns:true if this token is in standard dictionary. false if not.
/** * Returns true if this token is known word * @return true if this token is in standard dictionary. false if not. */
public boolean isKnown() { return type == Type.KNOWN; }
Returns true if this token is unknown word
Returns:true if this token is unknown word. false if not.
/** * Returns true if this token is unknown word * @return true if this token is unknown word. false if not. */
public boolean isUnknown() { return type == Type.UNKNOWN; }
Returns true if this token is defined in user dictionary
Returns:true if this token is in user dictionary. false if not.
/** * Returns true if this token is defined in user dictionary * @return true if this token is in user dictionary. false if not. */
public boolean isUser() { return type == Type.USER; }
Get index of this token in input text
Returns:position of token
/** * Get index of this token in input text * @return position of token */
public int getPosition() { return position; }
Set the position length (in tokens) of this token. For normal tokens this is 1; for compound tokens it's > 1.
/** * Set the position length (in tokens) of this token. For normal * tokens this is 1; for compound tokens it's > 1. */
public void setPositionLength(int positionLength) { this.positionLength = positionLength; }
Get the length (in tokens) of this token. For normal tokens this is 1; for compound tokens it's > 1.
Returns:position length of token
/** * Get the length (in tokens) of this token. For normal * tokens this is 1; for compound tokens it's > 1. * @return position length of token */
public int getPositionLength() { return positionLength; } }