/*
 * Copyright (c) 2005 Bruno Martins
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 * 1. Redistributions of source code must retain the above copyright 
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the organization nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.apache.lucene.search.suggest.jaspell;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Locale;
import java.util.Vector;
import java.util.zip.GZIPInputStream;

import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;

Implementation of a Ternary Search Trie, a data structure for storing String objects that combines the compact size of a binary search tree with the speed of a digital search trie, and is therefore ideal for practical use in sorting and searching data.

This data structure is faster than hashing for many typical search problems, and supports a broader range of useful problems and operations. Ternary searches are faster than hashing and more powerful, too.

The theory of ternary search trees was described at a symposium in 1997 (see "Fast Algorithms for Sorting and Searching Strings," by J.L. Bentley and R. Sedgewick, Proceedings of the 8th Annual ACM-SIAM Symposium on Discrete Algorithms, January 1997). Algorithms in C, Third Edition, by Robert Sedgewick (Addison-Wesley, 1998) provides yet another view of ternary search trees.

Deprecated:Migrate to one of the newer suggesters which are much more RAM efficient.
/** * Implementation of a Ternary Search Trie, a data structure for storing * <code>String</code> objects that combines the compact size of a binary search * tree with the speed of a digital search trie, and is therefore ideal for * practical use in sorting and searching data. * * <p> * This data structure is faster than hashing for many typical search problems, * and supports a broader range of useful problems and operations. Ternary * searches are faster than hashing and more powerful, too. * </p> * * <p> * The theory of ternary search trees was described at a symposium in 1997 (see * "Fast Algorithms for Sorting and Searching Strings," by J.L. Bentley and R. * Sedgewick, Proceedings of the 8th Annual ACM-SIAM Symposium on Discrete * Algorithms, January 1997). Algorithms in C, Third Edition, by Robert * Sedgewick (Addison-Wesley, 1998) provides yet another view of ternary search * trees. * </p> * * @deprecated Migrate to one of the newer suggesters which are much more RAM efficient. */
@Deprecated public class JaspellTernarySearchTrie implements Accountable {
An inner class of Ternary Search Trie that represents a node in the trie.
/** * An inner class of Ternary Search Trie that represents a node in the trie. */
protected static final class TSTNode implements Accountable {
Index values for accessing relatives array.
/** Index values for accessing relatives array. */
protected final static int PARENT = 0, LOKID = 1, EQKID = 2, HIKID = 3;
The key to the node.
/** The key to the node. */
protected Object data;
The relative nodes.
/** The relative nodes. */
protected final TSTNode[] relatives = new TSTNode[4];
The char used in the split.
/** The char used in the split. */
protected char splitchar;
Constructor method.
Params:
  • splitchar – The char used in the split.
  • parent – The parent node.
/** * Constructor method. * *@param splitchar * The char used in the split. *@param parent * The parent node. */
protected TSTNode(char splitchar, TSTNode parent) { this.splitchar = splitchar; relatives[PARENT] = parent; } @Override public long ramBytesUsed() { long mem = RamUsageEstimator.shallowSizeOf(this) + RamUsageEstimator.shallowSizeOf(relatives); // We don't need to add parent since our parent added itself: for (int i=1;i<4;i++) { TSTNode node = relatives[i]; if (node != null) { mem += node.ramBytesUsed(); } } return mem; } }
Compares characters by alphabetical order.
Params:
  • cCompare2 – The first char in the comparison.
  • cRef – The second char in the comparison.
Returns:A negative number, 0 or a positive number if the second char is less, equal or greater.
/** * Compares characters by alphabetical order. * *@param cCompare2 * The first char in the comparison. *@param cRef * The second char in the comparison. *@return A negative number, 0 or a positive number if the second char is * less, equal or greater. */
private static int compareCharsAlphabetically(char cCompare2, char cRef) { return Character.toLowerCase(cCompare2) - Character.toLowerCase(cRef); } /* what follows is the original Jaspell code. private static int compareCharsAlphabetically(int cCompare2, int cRef) { int cCompare = 0; if (cCompare2 >= 65) { if (cCompare2 < 89) { cCompare = (2 * cCompare2) - 65; } else if (cCompare2 < 97) { cCompare = cCompare2 + 24; } else if (cCompare2 < 121) { cCompare = (2 * cCompare2) - 128; } else cCompare = cCompare2; } else cCompare = cCompare2; if (cRef < 65) { return cCompare - cRef; } if (cRef < 89) { return cCompare - ((2 * cRef) - 65); } if (cRef < 97) { return cCompare - (cRef + 24); } if (cRef < 121) { return cCompare - ((2 * cRef) - 128); } return cCompare - cRef; } */
The default number of values returned by the matchAlmost method.
/** * The default number of values returned by the <code>matchAlmost</code> * method. */
private int defaultNumReturnValues = -1;
the number of differences allowed in a call to the matchAlmostKey method.
/** * the number of differences allowed in a call to the * <code>matchAlmostKey</code> method. */
private int matchAlmostDiff;
The base node in the trie.
/** The base node in the trie. */
private TSTNode rootNode; private final Locale locale;
Constructs an empty Ternary Search Trie.
/** * Constructs an empty Ternary Search Trie. */
public JaspellTernarySearchTrie() { this(Locale.ROOT); }
Constructs an empty Ternary Search Trie, specifying the Locale used for lowercasing.
/** * Constructs an empty Ternary Search Trie, * specifying the Locale used for lowercasing. */
public JaspellTernarySearchTrie(Locale locale) { this.locale = locale; } // for loading void setRoot(TSTNode newRoot) { rootNode = newRoot; } // for saving TSTNode getRoot() { return rootNode; }
Constructs a Ternary Search Trie and loads data from a Path into the Trie. The file is a normal text document, where each line is of the form word TAB float.
Params:
  • file – The Path with the data to load into the Trie.
Throws:
  • IOException – A problem occurred while reading the data.
/** * Constructs a Ternary Search Trie and loads data from a <code>Path</code> * into the Trie. The file is a normal text document, where each line is of * the form word TAB float. * *@param file * The <code>Path</code> with the data to load into the Trie. *@exception IOException * A problem occurred while reading the data. */
public JaspellTernarySearchTrie(Path file) throws IOException { this(file, false); }
Constructs a Ternary Search Trie and loads data from a File into the Trie. The file is a normal text document, where each line is of the form "word TAB float".
Params:
  • file – The File with the data to load into the Trie.
  • compression – If true, the file is compressed with the GZIP algorithm, and if false, the file is a normal text document.
Throws:
  • IOException – A problem occurred while reading the data.
/** * Constructs a Ternary Search Trie and loads data from a <code>File</code> * into the Trie. The file is a normal text document, where each line is of * the form "word TAB float". * *@param file * The <code>File</code> with the data to load into the Trie. *@param compression * If true, the file is compressed with the GZIP algorithm, and if * false, the file is a normal text document. *@exception IOException * A problem occurred while reading the data. */
public JaspellTernarySearchTrie(Path file, boolean compression) throws IOException { this(); BufferedReader in; if (compression) in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream( Files.newInputStream(file)), StandardCharsets.UTF_8)); else in = Files.newBufferedReader(file, StandardCharsets.UTF_8); try { String word; int pos; Float occur, one = 1f; while ((word = in.readLine()) != null) { pos = word.indexOf("\t"); occur = one; if (pos != -1) { occur = Float.parseFloat(word.substring(pos + 1).trim()); word = word.substring(0, pos); } String key = word.toLowerCase(locale); if (rootNode == null) { rootNode = new TSTNode(key.charAt(0), null); } TSTNode node = null; if (key.length() > 0 && rootNode != null) { TSTNode currentNode = rootNode; int charIndex = 0; while (true) { if (currentNode == null) break; int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.length()) { node = currentNode; break; } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { currentNode = currentNode.relatives[TSTNode.LOKID]; } else { currentNode = currentNode.relatives[TSTNode.HIKID]; } } Float occur2 = null; if (node != null) occur2 = ((Float) (node.data)); if (occur2 != null) { occur += occur2.floatValue(); } currentNode = getOrCreateNode(word.trim().toLowerCase(locale)); currentNode.data = occur; } } } finally { IOUtils.close(in); } }
Deletes the node passed in as an argument. If this node has non-null data, then both the node and the data will be deleted. It also deletes any other nodes in the trie that are no longer needed after the deletion of the node.
Params:
  • nodeToDelete – The node to delete.
/** * Deletes the node passed in as an argument. If this node has non-null data, * then both the node and the data will be deleted. It also deletes any other * nodes in the trie that are no longer needed after the deletion of the node. * *@param nodeToDelete * The node to delete. */
private void deleteNode(TSTNode nodeToDelete) { if (nodeToDelete == null) { return; } nodeToDelete.data = null; while (nodeToDelete != null) { nodeToDelete = deleteNodeRecursion(nodeToDelete); // deleteNodeRecursion(nodeToDelete); } }
Recursively visits each node to be deleted. To delete a node, first set its data to null, then pass it into this method, then pass the node returned by this method into this method (make sure you don't delete the data of any of the nodes returned from this method!) and continue in this fashion until the node returned by this method is null. The TSTNode instance returned by this method will be next node to be operated on by deleteNodeRecursion (This emulates recursive method call while avoiding the JVM overhead normally associated with a recursive method.)
Params:
  • currentNode – The node to delete.
Returns:The next node to be called in deleteNodeRecursion.
/** * Recursively visits each node to be deleted. * * To delete a node, first set its data to null, then pass it into this * method, then pass the node returned by this method into this method (make * sure you don't delete the data of any of the nodes returned from this * method!) and continue in this fashion until the node returned by this * method is <code>null</code>. * * The TSTNode instance returned by this method will be next node to be * operated on by <code>deleteNodeRecursion</code> (This emulates recursive * method call while avoiding the JVM overhead normally associated with a * recursive method.) * *@param currentNode * The node to delete. *@return The next node to be called in deleteNodeRecursion. */
private TSTNode deleteNodeRecursion(TSTNode currentNode) { if (currentNode == null) { return null; } if (currentNode.relatives[TSTNode.EQKID] != null || currentNode.data != null) { return null; } // can't delete this node if it has a non-null eq kid or data TSTNode currentParent = currentNode.relatives[TSTNode.PARENT]; boolean lokidNull = currentNode.relatives[TSTNode.LOKID] == null; boolean hikidNull = currentNode.relatives[TSTNode.HIKID] == null; int childType; if (currentParent.relatives[TSTNode.LOKID] == currentNode) { childType = TSTNode.LOKID; } else if (currentParent.relatives[TSTNode.EQKID] == currentNode) { childType = TSTNode.EQKID; } else if (currentParent.relatives[TSTNode.HIKID] == currentNode) { childType = TSTNode.HIKID; } else { rootNode = null; return null; } if (lokidNull && hikidNull) { currentParent.relatives[childType] = null; return currentParent; } if (lokidNull) { currentParent.relatives[childType] = currentNode.relatives[TSTNode.HIKID]; currentNode.relatives[TSTNode.HIKID].relatives[TSTNode.PARENT] = currentParent; return currentParent; } if (hikidNull) { currentParent.relatives[childType] = currentNode.relatives[TSTNode.LOKID]; currentNode.relatives[TSTNode.LOKID].relatives[TSTNode.PARENT] = currentParent; return currentParent; } int deltaHi = currentNode.relatives[TSTNode.HIKID].splitchar - currentNode.splitchar; int deltaLo = currentNode.splitchar - currentNode.relatives[TSTNode.LOKID].splitchar; int movingKid; TSTNode targetNode; if (deltaHi == deltaLo) { if (Math.random() < 0.5) { deltaHi++; } else { deltaLo++; } } if (deltaHi > deltaLo) { movingKid = TSTNode.HIKID; targetNode = currentNode.relatives[TSTNode.LOKID]; } else { movingKid = TSTNode.LOKID; targetNode = currentNode.relatives[TSTNode.HIKID]; } while (targetNode.relatives[movingKid] != null) { targetNode = targetNode.relatives[movingKid]; } targetNode.relatives[movingKid] = currentNode.relatives[movingKid]; currentParent.relatives[childType] = targetNode; targetNode.relatives[TSTNode.PARENT] = currentParent; if (!lokidNull) { currentNode.relatives[TSTNode.LOKID] = null; } if (!hikidNull) { currentNode.relatives[TSTNode.HIKID] = null; } return currentParent; }
Retrieve the object indexed by a key.
Params:
  • key – A String index.
Returns:The object retrieved from the Ternary Search Trie.
/** * Retrieve the object indexed by a key. * *@param key * A <code>String</code> index. *@return The object retrieved from the Ternary Search Trie. */
public Object get(CharSequence key) { TSTNode node = getNode(key); if (node == null) { return null; } return node.data; }
Retrieve the Float indexed by key, increment it by one unit and store the new Float.
Params:
  • key – A String index.
Returns:The Float retrieved from the Ternary Search Trie.
/** * Retrieve the <code>Float</code> indexed by key, increment it by one unit * and store the new <code>Float</code>. * *@param key * A <code>String</code> index. *@return The <code>Float</code> retrieved from the Ternary Search Trie. */
public Float getAndIncrement(String key) { String key2 = key.trim().toLowerCase(locale); TSTNode node = getNode(key2); if (node == null) { return null; } Float aux = (Float) (node.data); if (aux == null) { aux = 1f; } else { aux = (float) (aux.intValue() + 1); } put(key2, aux); return aux; }
Returns the key that indexes the node argument.
Params:
  • node – The node whose index is to be calculated.
Returns:The String that indexes the node argument.
/** * Returns the key that indexes the node argument. * *@param node * The node whose index is to be calculated. *@return The <code>String</code> that indexes the node argument. */
protected String getKey(TSTNode node) { StringBuilder getKeyBuffer = new StringBuilder(); getKeyBuffer.setLength(0); getKeyBuffer.append("").append(node.splitchar); TSTNode currentNode; TSTNode lastNode; currentNode = node.relatives[TSTNode.PARENT]; lastNode = node; while (currentNode != null) { if (currentNode.relatives[TSTNode.EQKID] == lastNode) { getKeyBuffer.append("").append(currentNode.splitchar); } lastNode = currentNode; currentNode = currentNode.relatives[TSTNode.PARENT]; } getKeyBuffer.reverse(); return getKeyBuffer.toString(); }
Returns the node indexed by key, or null if that node doesn't exist. Search begins at root node.
Params:
  • key – A String that indexes the node that is returned.
Returns:The node object indexed by key. This object is an instance of an inner class named TernarySearchTrie.TSTNode.
/** * Returns the node indexed by key, or <code>null</code> if that node doesn't * exist. Search begins at root node. * *@param key * A <code>String</code> that indexes the node that is returned. *@return The node object indexed by key. This object is an instance of an * inner class named <code>TernarySearchTrie.TSTNode</code>. */
public TSTNode getNode(CharSequence key) { return getNode(key, rootNode); }
Returns the node indexed by key, or null if that node doesn't exist. The search begins at root node.
Params:
  • key – A String that indexes the node that is returned.
  • startNode – The top node defining the subtrie to be searched.
Returns:The node object indexed by key. This object is an instance of an inner class named TernarySearchTrie.TSTNode.
/** * Returns the node indexed by key, or <code>null</code> if that node doesn't * exist. The search begins at root node. * *@param key * A <code>String</code> that indexes the node that is returned. *@param startNode * The top node defining the subtrie to be searched. *@return The node object indexed by key. This object is an instance of an * inner class named <code>TernarySearchTrie.TSTNode</code>. */
protected TSTNode getNode(CharSequence key, TSTNode startNode) { if (key == null || startNode == null || key.length() == 0) { return null; } TSTNode currentNode = startNode; int charIndex = 0; while (true) { if (currentNode == null) { return null; } int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.length()) { return currentNode; } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { currentNode = currentNode.relatives[TSTNode.LOKID]; } else { currentNode = currentNode.relatives[TSTNode.HIKID]; } } }
Returns the node indexed by key, creating that node if it doesn't exist, and creating any required intermediate nodes if they don't exist.
Params:
  • key – A String that indexes the node that is returned.
Throws:
Returns:The node object indexed by key. This object is an instance of an inner class named TernarySearchTrie.TSTNode.
/** * Returns the node indexed by key, creating that node if it doesn't exist, * and creating any required intermediate nodes if they don't exist. * *@param key * A <code>String</code> that indexes the node that is returned. *@return The node object indexed by key. This object is an instance of an * inner class named <code>TernarySearchTrie.TSTNode</code>. *@exception NullPointerException * If the key is <code>null</code>. *@exception IllegalArgumentException * If the key is an empty <code>String</code>. */
protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException, IllegalArgumentException { if (key == null) { throw new NullPointerException( "attempt to get or create node with null key"); } if (key.length() == 0) { throw new IllegalArgumentException( "attempt to get or create node with key of zero length"); } if (rootNode == null) { rootNode = new TSTNode(key.charAt(0), null); } TSTNode currentNode = rootNode; int charIndex = 0; while (true) { int charComp = compareCharsAlphabetically(key.charAt(charIndex), currentNode.splitchar); if (charComp == 0) { charIndex++; if (charIndex == key.length()) { return currentNode; } if (currentNode.relatives[TSTNode.EQKID] == null) { currentNode.relatives[TSTNode.EQKID] = new TSTNode(key .charAt(charIndex), currentNode); } currentNode = currentNode.relatives[TSTNode.EQKID]; } else if (charComp < 0) { if (currentNode.relatives[TSTNode.LOKID] == null) { currentNode.relatives[TSTNode.LOKID] = new TSTNode(key .charAt(charIndex), currentNode); } currentNode = currentNode.relatives[TSTNode.LOKID]; } else { if (currentNode.relatives[TSTNode.HIKID] == null) { currentNode.relatives[TSTNode.HIKID] = new TSTNode(key .charAt(charIndex), currentNode); } currentNode = currentNode.relatives[TSTNode.HIKID]; } } }
Returns a List of keys that almost match the argument key. Keys returned will have exactly diff characters that do not match the target key, where diff is equal to the last value passed in as an argument to the setMatchAlmostDiff method.

If the matchAlmost method is called before the setMatchAlmostDiff method has been called for the first time, then diff = 0.

Params:
  • key – The target key.
Returns:A List with the results.
/** * Returns a <code>List</code> of keys that almost match the argument key. * Keys returned will have exactly diff characters that do not match the * target key, where diff is equal to the last value passed in as an argument * to the <code>setMatchAlmostDiff</code> method. * <p> * If the <code>matchAlmost</code> method is called before the * <code>setMatchAlmostDiff</code> method has been called for the first time, * then diff = 0. * *@param key * The target key. *@return A <code>List</code> with the results. */
public List<String> matchAlmost(String key) { return matchAlmost(key, defaultNumReturnValues); }
Returns a List of keys that almost match the argument key. Keys returned will have exactly diff characters that do not match the target key, where diff is equal to the last value passed in as an argument to the setMatchAlmostDiff method.

If the matchAlmost method is called before the setMatchAlmostDiff method has been called for the first time, then diff = 0.

Params:
  • key – The target key.
  • numReturnValues – The maximum number of values returned by this method.
Returns:A List with the results
/** * Returns a <code>List</code> of keys that almost match the argument key. * Keys returned will have exactly diff characters that do not match the * target key, where diff is equal to the last value passed in as an argument * to the <code>setMatchAlmostDiff</code> method. * <p> * If the <code>matchAlmost</code> method is called before the * <code>setMatchAlmostDiff</code> method has been called for the first time, * then diff = 0. * *@param key * The target key. *@param numReturnValues * The maximum number of values returned by this method. *@return A <code>List</code> with the results */
public List<String> matchAlmost(CharSequence key, int numReturnValues) { return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key, ((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>(), false); }
Recursivelly vists the nodes in order to find the ones that almost match a given key.
Params:
  • currentNode – The current node.
  • charIndex – The current char.
  • d – The number of differences so far.
  • matchAlmostNumReturnValues – The maximum number of values in the result List.
  • matchAlmostResult2 – The results so far.
  • upTo – If true all keys having up to and including matchAlmostDiff mismatched letters will be included in the result (including a key that is exactly the same as the target string) otherwise keys will be included in the result only if they have exactly matchAlmostDiff number of mismatched letters.
  • matchAlmostKey – The key being searched.
Returns:A List with the results.
/** * Recursivelly vists the nodes in order to find the ones that almost match a * given key. * *@param currentNode * The current node. *@param charIndex * The current char. *@param d * The number of differences so far. *@param matchAlmostNumReturnValues * The maximum number of values in the result <code>List</code>. *@param matchAlmostResult2 * The results so far. *@param upTo * If true all keys having up to and including matchAlmostDiff * mismatched letters will be included in the result (including a key * that is exactly the same as the target string) otherwise keys will * be included in the result only if they have exactly * matchAlmostDiff number of mismatched letters. *@param matchAlmostKey * The key being searched. *@return A <code>List</code> with the results. */
private List<String> matchAlmostRecursion(TSTNode currentNode, int charIndex, int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues, List<String> matchAlmostResult2, boolean upTo) { if ((currentNode == null) || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues) || (d < 0) || (charIndex >= matchAlmostKey.length())) { return matchAlmostResult2; } int charComp = compareCharsAlphabetically(matchAlmostKey.charAt(charIndex), currentNode.splitchar); List<String> matchAlmostResult = matchAlmostResult2; if ((d > 0) || (charComp < 0)) { matchAlmostResult = matchAlmostRecursion( currentNode.relatives[TSTNode.LOKID], charIndex, d, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo); } int nextD = (charComp == 0) ? d : d - 1; boolean cond = (upTo) ? (nextD >= 0) : (nextD == 0); if ((matchAlmostKey.length() == charIndex + 1) && cond && (currentNode.data != null)) { matchAlmostResult.add(getKey(currentNode)); } matchAlmostResult = matchAlmostRecursion( currentNode.relatives[TSTNode.EQKID], charIndex + 1, nextD, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo); if ((d > 0) || (charComp > 0)) { matchAlmostResult = matchAlmostRecursion( currentNode.relatives[TSTNode.HIKID], charIndex, d, matchAlmostKey, matchAlmostNumReturnValues, matchAlmostResult, upTo); } return matchAlmostResult; }
Returns an alphabetical List of all keys in the trie that begin with a given prefix. Only keys for nodes having non-null data are included in the List.
Params:
  • prefix – Each key returned from this method will begin with the characters in prefix.
Returns:A List with the results.
/** * Returns an alphabetical <code>List</code> of all keys in the trie that * begin with a given prefix. Only keys for nodes having non-null data are * included in the <code>List</code>. * *@param prefix * Each key returned from this method will begin with the characters * in prefix. *@return A <code>List</code> with the results. */
public List<String> matchPrefix(String prefix) { return matchPrefix(prefix, defaultNumReturnValues); }
Returns an alphabetical List of all keys in the trie that begin with a given prefix. Only keys for nodes having non-null data are included in the List.
Params:
  • prefix – Each key returned from this method will begin with the characters in prefix.
  • numReturnValues – The maximum number of values returned from this method.
Returns:A List with the results
/** * Returns an alphabetical <code>List</code> of all keys in the trie that * begin with a given prefix. Only keys for nodes having non-null data are * included in the <code>List</code>. * *@param prefix * Each key returned from this method will begin with the characters * in prefix. *@param numReturnValues * The maximum number of values returned from this method. *@return A <code>List</code> with the results */
public List<String> matchPrefix(CharSequence prefix, int numReturnValues) { Vector<String> sortKeysResult = new Vector<>(); TSTNode startNode = getNode(prefix); if (startNode == null) { return sortKeysResult; } if (startNode.data != null) { sortKeysResult.addElement(getKey(startNode)); } return sortKeysRecursion(startNode.relatives[TSTNode.EQKID], ((numReturnValues < 0) ? -1 : numReturnValues), sortKeysResult); }
Returns the number of nodes in the trie that have non-null data.
Returns:The number of nodes in the trie that have non-null data.
/** * Returns the number of nodes in the trie that have non-null data. * *@return The number of nodes in the trie that have non-null data. */
public int numDataNodes() { return numDataNodes(rootNode); }
Returns the number of nodes in the subtrie below and including the starting node. The method counts only nodes that have non-null data.
Params:
  • startingNode – The top node of the subtrie. the node that defines the subtrie.
Returns:The total number of nodes in the subtrie.
/** * Returns the number of nodes in the subtrie below and including the starting * node. The method counts only nodes that have non-null data. * *@param startingNode * The top node of the subtrie. the node that defines the subtrie. *@return The total number of nodes in the subtrie. */
protected int numDataNodes(TSTNode startingNode) { return recursiveNodeCalculator(startingNode, true, 0); }
Returns the total number of nodes in the trie. The method counts nodes whether or not they have data.
Returns:The total number of nodes in the trie.
/** * Returns the total number of nodes in the trie. The method counts nodes * whether or not they have data. * *@return The total number of nodes in the trie. */
public int numNodes() { return numNodes(rootNode); }
Returns the total number of nodes in the subtrie below and including the starting Node. The method counts nodes whether or not they have data.
Params:
  • startingNode – The top node of the subtrie. The node that defines the subtrie.
Returns:The total number of nodes in the subtrie.
/** * Returns the total number of nodes in the subtrie below and including the * starting Node. The method counts nodes whether or not they have data. * *@param startingNode * The top node of the subtrie. The node that defines the subtrie. *@return The total number of nodes in the subtrie. */
protected int numNodes(TSTNode startingNode) { return recursiveNodeCalculator(startingNode, false, 0); }
Stores a value in the trie. The value may be retrieved using the key.
Params:
  • key – A String that indexes the object to be stored.
  • value – The object to be stored in the Trie.
/** * Stores a value in the trie. The value may be retrieved using the key. * *@param key * A <code>String</code> that indexes the object to be stored. *@param value * The object to be stored in the Trie. */
public void put(CharSequence key, Object value) { getOrCreateNode(key).data = value; }
Recursivelly visists each node to calculate the number of nodes.
Params:
  • currentNode – The current node.
  • checkData – If true we check the data to be different of null.
  • numNodes2 – The number of nodes so far.
Returns:The number of nodes accounted.
/** * Recursivelly visists each node to calculate the number of nodes. * *@param currentNode * The current node. *@param checkData * If true we check the data to be different of <code>null</code>. *@param numNodes2 * The number of nodes so far. *@return The number of nodes accounted. */
private int recursiveNodeCalculator(TSTNode currentNode, boolean checkData, int numNodes2) { if (currentNode == null) { return numNodes2; } int numNodes = recursiveNodeCalculator( currentNode.relatives[TSTNode.LOKID], checkData, numNodes2); numNodes = recursiveNodeCalculator(currentNode.relatives[TSTNode.EQKID], checkData, numNodes); numNodes = recursiveNodeCalculator(currentNode.relatives[TSTNode.HIKID], checkData, numNodes); if (checkData) { if (currentNode.data != null) { numNodes++; } } else { numNodes++; } return numNodes; }
Removes the value indexed by key. Also removes all nodes that are rendered unnecessary by the removal of this data.
Params:
  • key – A string that indexes the object to be removed from the Trie.
/** * Removes the value indexed by key. Also removes all nodes that are rendered * unnecessary by the removal of this data. * *@param key * A <code>string</code> that indexes the object to be removed from * the Trie. */
public void remove(String key) { deleteNode(getNode(key.trim().toLowerCase(locale))); }
Sets the number of characters by which words can differ from target word when calling the matchAlmost method.

Arguments less than 0 will set the char difference to 0, and arguments greater than 3 will set the char difference to 3.

Params:
  • diff – The number of characters by which words can differ from target word.
/** * Sets the number of characters by which words can differ from target word * when calling the <code>matchAlmost</code> method. * <p> * Arguments less than 0 will set the char difference to 0, and arguments * greater than 3 will set the char difference to 3. * *@param diff * The number of characters by which words can differ from target * word. */
public void setMatchAlmostDiff(int diff) { if (diff < 0) { matchAlmostDiff = 0; } else if (diff > 3) { matchAlmostDiff = 3; } else { matchAlmostDiff = diff; } }
Sets the default maximum number of values returned from the matchPrefix and matchAlmost methods.

The value should be set this to -1 to get an unlimited number of return values. note that the methods mentioned above provide overloaded versions that allow you to specify the maximum number of return values, in which case this value is temporarily overridden.

Params:
  • num – The number of values that will be returned when calling the methods above.
/** * Sets the default maximum number of values returned from the * <code>matchPrefix</code> and <code>matchAlmost</code> methods. * <p> * The value should be set this to -1 to get an unlimited number of return * values. note that the methods mentioned above provide overloaded versions * that allow you to specify the maximum number of return values, in which * case this value is temporarily overridden. * **@param num * The number of values that will be returned when calling the * methods above. */
public void setNumReturnValues(int num) { defaultNumReturnValues = (num < 0) ? -1 : num; }
Returns keys sorted in alphabetical order. This includes the start Node and all nodes connected to the start Node.

The number of keys returned is limited to numReturnValues. To get a list that isn't limited in size, set numReturnValues to -1.

Params:
  • startNode – The top node defining the subtrie to be searched.
  • numReturnValues – The maximum number of values returned from this method.
Returns:A List with the results.
/** * Returns keys sorted in alphabetical order. This includes the start Node and * all nodes connected to the start Node. * <p> * The number of keys returned is limited to numReturnValues. To get a list * that isn't limited in size, set numReturnValues to -1. * *@param startNode * The top node defining the subtrie to be searched. *@param numReturnValues * The maximum number of values returned from this method. *@return A <code>List</code> with the results. */
protected List<String> sortKeys(TSTNode startNode, int numReturnValues) { return sortKeysRecursion(startNode, ((numReturnValues < 0) ? -1 : numReturnValues), new Vector<String>()); }
Returns keys sorted in alphabetical order. This includes the current Node and all nodes connected to the current Node.

Sorted keys will be appended to the end of the resulting List. The result may be empty when this method is invoked, but may not be null.

Params:
  • currentNode – The current node.
  • sortKeysNumReturnValues – The maximum number of values in the result.
  • sortKeysResult2 – The results so far.
Returns:A List with the results.
/** * Returns keys sorted in alphabetical order. This includes the current Node * and all nodes connected to the current Node. * <p> * Sorted keys will be appended to the end of the resulting <code>List</code>. * The result may be empty when this method is invoked, but may not be * <code>null</code>. * *@param currentNode * The current node. *@param sortKeysNumReturnValues * The maximum number of values in the result. *@param sortKeysResult2 * The results so far. *@return A <code>List</code> with the results. */
private List<String> sortKeysRecursion(TSTNode currentNode, int sortKeysNumReturnValues, List<String> sortKeysResult2) { if (currentNode == null) { return sortKeysResult2; } List<String> sortKeysResult = sortKeysRecursion( currentNode.relatives[TSTNode.LOKID], sortKeysNumReturnValues, sortKeysResult2); if (sortKeysNumReturnValues != -1 && sortKeysResult.size() >= sortKeysNumReturnValues) { return sortKeysResult; } if (currentNode.data != null) { sortKeysResult.add(getKey(currentNode)); } sortKeysResult = sortKeysRecursion(currentNode.relatives[TSTNode.EQKID], sortKeysNumReturnValues, sortKeysResult); return sortKeysRecursion(currentNode.relatives[TSTNode.HIKID], sortKeysNumReturnValues, sortKeysResult); } @Override public long ramBytesUsed() { long mem = RamUsageEstimator.shallowSizeOf(this); final TSTNode root = getRoot(); if (root != null) { mem += root.ramBytesUsed(); } return mem; } }