Dependencies
Declarations
-
org
-
apache
-
lucene
-
analysis
-
cn
-
smart
- AnalyzerProfile
- CharType
-
HMMChineseTokenizer
- sentenceProto: BreakIterator
- termAtt: CharTermAttribute
- offsetAtt: OffsetAttribute
- typeAtt: TypeAttribute
- wordSegmenter: WordSegmenter
- tokens: Iterator<SegToken>
- HMMChineseTokenizer(): void
- HMMChineseTokenizer(AttributeFactory): void
- setNextSentence(int, int): void
- incrementWord(): boolean
- reset(): void
- HMMChineseTokenizerFactory
-
SmartChineseAnalyzer
- SmartChineseAnalyzer(): void
- STOPWORD_FILE_COMMENT: String
- stopWords: CharArraySet
- DEFAULT_STOPWORD_FILE: String
- getDefaultStopSet(): CharArraySet
- DefaultSetHolder
- SmartChineseAnalyzer(boolean): void
- SmartChineseAnalyzer(CharArraySet): void
- createComponents(String): TokenStreamComponents
- normalize(String, TokenStream): TokenStream
- Utility
- WordSegmenter
- WordType
-
hhmm
- AbstractDictionary
-
BiSegGraph
- tokenPairListTable: Map<Integer, ArrayList<SegTokenPair>>
- segTokenList: List<SegToken>
- bigramDict: BigramDictionary
- BiSegGraph(SegGraph): void
- generateBiSegGraph(SegGraph): void
- isToExist(int): boolean
- getToList(int): List<SegTokenPair>
- addSegTokenPair(SegTokenPair): void
- getToCount(): int
- getShortPath(): List<SegToken>
- toString(): String
-
BigramDictionary
- BigramDictionary(): void
- WORD_SEGMENT_CHAR: char
- singleInstance: BigramDictionary
- PRIME_BIGRAM_LENGTH: int
- bigramHashTable: long[]
- frequencyTable: int[]
- max: int
- repeat: int
- getInstance(): BigramDictionary
- loadFromObj(Path): boolean
- loadFromInputStream(InputStream): void
- saveToObj(Path): void
- load(): void
- load(String): void
- loadFromFile(String): void
- getAvaliableIndex(long, char[]): int
- getBigramItemIndex(char[]): int
- getFrequency(char[]): int
- HHMMSegmenter
- PathNode
- SegGraph
- SegToken
- SegTokenFilter
- SegTokenPair
-
WordDictionary
- WordDictionary(): void
- singleInstance: WordDictionary
- PRIME_INDEX_LENGTH: int
- wordIndexTable: short[]
- charIndexTable: char[]
- wordItem_charArrayTable: char[][][]
- wordItem_frequencyTable: int[][]
- getInstance(): WordDictionary
- load(String): void
- load(): void
- loadFromObj(Path): boolean
- loadFromObjectInputStream(InputStream): void
- saveToObj(Path): void
- loadMainDataFromFile(String): int
- expandDelimiterData(): void
- mergeSameWords(): void
- sortEachItems(): void
- setTableIndex(char, int): boolean
- getAvaliableTableIndex(char): short
- getWordItemTableIndex(char): short
- findInTable(short, char[]): int
- getPrefixMatch(char[]): int
- getPrefixMatch(char[], int): int
- getFrequency(char[]): int
- isEqual(char[], int): boolean
-
smart
-
cn
-
analysis
-
lucene
-
apache