Dependencies
Declarations
-
org
-
apache
-
lucene
-
analysis
-
ko
- DecompoundToken
-
DictionaryToken
- wordId: int
- type: Type
- dictionary: Dictionary
- DictionaryToken(Type, Dictionary, int, char[], int, int, int, int): void
- toString(): String
- getType(): Type
- isKnown(): boolean
- isUnknown(): boolean
- isUser(): boolean
- getPOSType(): Type
- getLeftPOS(): Tag
- getRightPOS(): Tag
- getReading(): String
- getMorphemes(): Morpheme[]
-
GraphvizFormatter
- BOS_LABEL: String
- EOS_LABEL: String
- FONT_NAME: String
- costs: ConnectionCosts
- bestPathMap: Map<String, String>
- sb: StringBuilder
- GraphvizFormatter(ConnectionCosts): void
- finish(): String
- onBacktrace(KoreanTokenizer, WrappedPositionArray, int, Position, int, char[], boolean): void
- setBestPathMap(WrappedPositionArray, int, Position, int): void
- formatNodes(KoreanTokenizer, WrappedPositionArray, int, Position, char[]): String
- formatHeader(): String
- formatTrailer(): String
- getNodeID(int, int): String
- KoreanAnalyzer
-
KoreanNumberFilter
- termAttr: CharTermAttribute
- offsetAttr: OffsetAttribute
- keywordAttr: KeywordAttribute
- posIncrAttr: PositionIncrementAttribute
- posLengthAttr: PositionLengthAttribute
- NO_NUMERAL: char
- numerals: char[]
- exponents: char[]
- state: State
- numeral: StringBuilder
- fallThroughTokens: int
- exhausted: boolean
- static class initializer
- KoreanNumberFilter(TokenStream): void
- incrementToken(): boolean
- reset(): void
- normalizeNumber(String): String
- parseNumber(NumberBuffer): BigDecimal
- parseLargePair(NumberBuffer): BigDecimal
- parseMediumNumber(NumberBuffer): BigDecimal
- parseMediumPair(NumberBuffer): BigDecimal
- parseBasicNumber(NumberBuffer): BigDecimal
- parseLargeHangulNumeral(NumberBuffer): BigDecimal
- parseMediumHangulNumeral(NumberBuffer): BigDecimal
- isNumeral(String): boolean
- isNumeral(char): boolean
- isNumeralPunctuation(String): boolean
- isNumeralPunctuation(char): boolean
- isArabicNumeral(char): boolean
- isHalfWidthArabicNumeral(char): boolean
- isFullWidthArabicNumeral(char): boolean
- arabicNumeralValue(char): int
- isHangulNumeral(char): boolean
- HangulNumeralValue(char): int
- isDecimalPoint(char): boolean
- isThousandSeparator(char): boolean
- NumberBuffer
- KoreanNumberFilterFactory
- KoreanPartOfSpeechStopFilter
- KoreanPartOfSpeechStopFilterFactory
- KoreanReadingFormFilter
- KoreanReadingFormFilterFactory
-
KoreanTokenizer
- Type
- DecompoundMode
- DEFAULT_DECOMPOUND: DecompoundMode
- VERBOSE: boolean
- MAX_UNKNOWN_WORD_LENGTH: int
- MAX_BACKTRACE_GAP: int
- dictionaryMap: EnumMap<Type, Dictionary>
- fst: TokenInfoFST
- dictionary: TokenInfoDictionary
- unkDictionary: UnknownDictionary
- costs: ConnectionCosts
- userDictionary: UserDictionary
- characterDefinition: CharacterDefinition
- arc: Arc<Long>
- fstReader: BytesReader
- wordIdRef: IntsRef
- userFSTReader: BytesReader
- userFST: TokenInfoFST
- discardPunctuation: boolean
- mode: DecompoundMode
- outputUnknownUnigrams: boolean
- buffer: RollingCharBuffer
- positions: WrappedPositionArray
- end: boolean
- lastBackTracePos: int
- pos: int
- pending: List<Token>
- termAtt: CharTermAttribute
- offsetAtt: OffsetAttribute
- posIncAtt: PositionIncrementAttribute
- posLengthAtt: PositionLengthAttribute
- posAtt: PartOfSpeechAttribute
- readingAtt: ReadingAttribute
- KoreanTokenizer(): void
- KoreanTokenizer(AttributeFactory, UserDictionary, DecompoundMode, boolean): void
- KoreanTokenizer(AttributeFactory, UserDictionary, DecompoundMode, boolean, boolean): void
- dotOut: GraphvizFormatter
- setGraphvizFormatter(GraphvizFormatter): void
- close(): void
- reset(): void
- resetState(): void
- end(): void
- Position
- computeSpacePenalty(Tag, int): int
- add(Dictionary, Position, int, int, int, Type): void
- incrementToken(): boolean
- WrappedPositionArray
- parse(): void
- backtrace(Position, int): void
- getDict(Type): Dictionary
- shouldFilterToken(Token): boolean
- isPunctuation(char): boolean
- isPunctuation(char, int): boolean
- isCommonOrInherited(UnicodeScript): boolean
- isSameScript(UnicodeScript, UnicodeScript): boolean
-
KoreanTokenizerFactory
- USER_DICT_PATH: String
- USER_DICT_ENCODING: String
- DECOMPOUND_MODE: String
- OUTPUT_UNKNOWN_UNIGRAMS: String
- DISCARD_PUNCTUATION: String
- userDictionaryPath: String
- userDictionaryEncoding: String
- userDictionary: UserDictionary
- mode: DecompoundMode
- outputUnknownUnigrams: boolean
- discardPunctuation: boolean
- KoreanTokenizerFactory(Map<String, String>): void
- inform(ResourceLoader): void
- create(AttributeFactory): KoreanTokenizer
-
POS
- Type
-
Tag
- E: Tag
- IC: Tag
- J: Tag
- MAG: Tag
- MAJ: Tag
- MM: Tag
- NNG: Tag
- NNP: Tag
- NNB: Tag
- NNBC: Tag
- NP: Tag
- NR: Tag
- SF: Tag
- SH: Tag
- SL: Tag
- SN: Tag
- SP: Tag
- SSC: Tag
- SSO: Tag
- SC: Tag
- SY: Tag
- SE: Tag
- VA: Tag
- VCN: Tag
- VCP: Tag
- VV: Tag
- VX: Tag
- XPN: Tag
- XR: Tag
- XSA: Tag
- XSN: Tag
- XSV: Tag
- UNKNOWN: Tag
- UNA: Tag
- NA: Tag
- VSV: Tag
- code: int
- desc: String
- code(): int
- description(): String
- Tag(int, String): void
- resolveTag(String): Tag
- resolveTag(byte): Tag
- resolveType(String): Type
- resolveType(byte): Type
-
Token
- surfaceForm: char[]
- offset: int
- length: int
- startOffset: int
- endOffset: int
- posIncr: int
- posLen: int
- Token(char[], int, int, int, int): void
- getSurfaceForm(): char[]
- getOffset(): int
- getLength(): int
- getSurfaceFormString(): String
- getPOSType(): Type
- getLeftPOS(): Tag
- getRightPOS(): Tag
- getReading(): String
- getMorphemes(): Morpheme[]
- getStartOffset(): int
- getEndOffset(): int
- setPositionIncrement(int): void
- getPositionIncrement(): int
- setPositionLength(int): void
- getPositionLength(): int
-
dict
-
BinaryDictionary
- TARGETMAP_FILENAME_SUFFIX: String
- DICT_FILENAME_SUFFIX: String
- POSDICT_FILENAME_SUFFIX: String
- DICT_HEADER: String
- TARGETMAP_HEADER: String
- POSDICT_HEADER: String
- VERSION: int
- buffer: ByteBuffer
- targetMapOffsets: int[]
- targetMap: int[]
- posDict: Tag[]
- BinaryDictionary(): void
- getResource(String): InputStream
- getClassResource(Class<Object>, String): InputStream
- lookupWordIds(int, IntsRef): void
- getLeftId(int): int
- getRightId(int): int
- getWordCost(int): int
- getPOSType(int): Type
- getLeftPOS(int): Tag
- getRightPOS(int): Tag
- getReading(int): String
- getMorphemes(int, char[], int, int): Morpheme[]
- readString(int): String
- hasSinglePOS(int): boolean
- hasReadingData(int): boolean
- HAS_SINGLE_POS: int
- HAS_READING: int
-
CharacterDefinition
- HANJANUMERIC: byte
- FILENAME_SUFFIX: String
- HEADER: String
- VERSION: int
- CLASS_COUNT: int
-
CharacterClass
- NGRAM: CharacterClass
- DEFAULT: CharacterClass
- SPACE: CharacterClass
- SYMBOL: CharacterClass
- NUMERIC: CharacterClass
- ALPHA: CharacterClass
- CYRILLIC: CharacterClass
- GREEK: CharacterClass
- HIRAGANA: CharacterClass
- KATAKANA: CharacterClass
- KANJI: CharacterClass
- HANGUL: CharacterClass
- HANJA: CharacterClass
- HANJANUMERIC: CharacterClass
- characterCategoryMap: byte[]
- invokeMap: boolean[]
- groupMap: boolean[]
- NGRAM: byte
- DEFAULT: byte
- SPACE: byte
- SYMBOL: byte
- NUMERIC: byte
- ALPHA: byte
- CYRILLIC: byte
- GREEK: byte
- HIRAGANA: byte
- KATAKANA: byte
- KANJI: byte
- HANGUL: byte
- HANJA: byte
- CharacterDefinition(): void
- getCharacterClass(char): byte
- isInvoke(char): boolean
- isGroup(char): boolean
- isHanja(char): boolean
- isHangul(char): boolean
- hasCoda(char): boolean
- lookupCharacterClass(String): byte
- getInstance(): CharacterDefinition
- SingletonHolder
- ConnectionCosts
- Dictionary
- TokenInfoDictionary
- TokenInfoFST
- UnknownDictionary
-
UserDictionary
- fst: TokenInfoFST
- WORD_COST: int
- LEFT_ID: short
- RIGHT_ID: short
- RIGHT_ID_T: short
- RIGHT_ID_F: short
- segmentations: int[][]
- rightIds: short[]
- open(Reader): UserDictionary
- UserDictionary(List<String>): void
- getFST(): TokenInfoFST
- getLeftId(int): int
- getRightId(int): int
- getWordCost(int): int
- getPOSType(int): Type
- getLeftPOS(int): Tag
- getRightPOS(int): Tag
- getReading(int): String
- getMorphemes(int, char[], int, int): Morpheme[]
- lookup(char[], int, int): List<Integer>
-
BinaryDictionary
- tokenattributes
- util
-
ko
-
analysis
-
lucene
-
apache