/* Woodstox Lite ("wool") XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.in;

import com.fasterxml.aalto.util.NameTable;

This is a symbol table implementation used for storing byte-based PNames, specifically, instances of (PNameC).
/** * This is a symbol table implementation used for storing byte-based * <code>PNames</code>, specifically, instances of * ({@link PNameC}). */
public class CharBasedPNameTable extends NameTable { final static int MIN_HASH_SIZE = 16; protected static final float DEFAULT_FILL_FACTOR = 0.75f; /* /********************************************************************** /* Actual symbol table data /********************************************************************** */
Primary matching symbols; it's expected most match occur from here.
/** * Primary matching symbols; it's expected most match occur from * here. */
protected PNameC[] _symbols;
Overflow buckets; if primary doesn't match, lookup is done from here.

Note: Number of buckets is half of number of symbol entries, on assumption there's less need for buckets.

/** * Overflow buckets; if primary doesn't match, lookup is done * from here. *<p> * Note: Number of buckets is half of number of symbol entries, on * assumption there's less need for buckets. */
protected Bucket[] _buckets;
Current size (number of entries); needed to know if and when rehash.
/** * Current size (number of entries); needed to know if and when * rehash. */
protected int _size;
Limit that indicates maximum size this instance can hold before it needs to be expanded and rehashed. Calculated using fill factor passed in to constructor.
/** * Limit that indicates maximum size this instance can hold before * it needs to be expanded and rehashed. Calculated using fill * factor passed in to constructor. */
protected int _sizeThreshold;
Mask used to get index from hash values; equal to mBuckets.length - 1, when mBuckets.length is a power of two.
/** * Mask used to get index from hash values; equal to * <code>mBuckets.length - 1</code>, when mBuckets.length is * a power of two. */
protected int _indexMask; /* /********************************************************************** /* Information about concurrency /********************************************************************** */
Flag that indicates if any changes have been made to the data; used to both determine if bucket array needs to be copied when (first) change is made, and potentially if updated bucket list is to be resync'ed back to master instance.
/** * Flag that indicates if any changes have been made to the data; * used to both determine if bucket array needs to be copied when * (first) change is made, and potentially if updated bucket list * is to be resync'ed back to master instance. */
protected boolean _dirty; /* /********************************************************************** /* Life-cycle /********************************************************************** */
Main method for constructing a master symbol table instance; will be called by other public constructors.
Params:
  • initialSize – Minimum initial size for bucket array; internally will always use a power of two equal to or bigger than this value.
/** * Main method for constructing a master symbol table instance; will * be called by other public constructors. * * @param initialSize Minimum initial size for bucket array; internally * will always use a power of two equal to or bigger than this value. */
public CharBasedPNameTable(int initialSize) { // Let's set flags so no copying of buckets is needed: _dirty = true; // No point in requesting funny initial sizes... if (initialSize < 1) { throw new IllegalArgumentException("Can not use negative/zero initial size: "+initialSize); } { int currSize = MIN_HASH_SIZE; while (currSize < initialSize) { currSize += currSize; } initialSize = currSize; } _symbols = new PNameC[initialSize]; _buckets = new Bucket[initialSize >> 1]; // Mask is easy to calc for powers of two. _indexMask = initialSize - 1; _size = 0; /* Let's use 3/4 fill factor... */ _sizeThreshold = (initialSize * 3 + 3) >> 2; } CharBasedPNameTable(CharBasedPNameTable parent) { _symbols = parent._symbols; _buckets = parent._buckets; _size = parent._size; _sizeThreshold = parent._sizeThreshold; _indexMask = parent._indexMask; // Need to make copies of arrays, if/when adding new entries _dirty = false; }
Method that allows contents of child table to potentially be "merged in" with contents of this symbol table.

Note that caller has to make sure symbol table passed in is really a child or sibling of this symbol table.

/** * Method that allows contents of child table to potentially be * "merged in" with contents of this symbol table. *<p> * Note that caller has to make sure symbol table passed in is * really a child or sibling of this symbol table. */
public synchronized void mergeFromChild(CharBasedPNameTable child) { // Let's do a basic sanity check first: if (child.size() <= size()) { // nothing to add return; } // Okie dokie, let's get the data in! _symbols = child._symbols; _buckets = child._buckets; _size = child._size; _sizeThreshold = child._sizeThreshold; _indexMask = child._indexMask; // Dirty flag... well, let's just clear it, to force copying just // in case. Shouldn't really matter, for master tables. _dirty = false; /* However, we have to mark child as dirty, so that it will not * be modifying arrays we "took over" (since child may have * returned an updated table before it stopped fully using * the SymbolTable: for example, it may still use it for * parsing PI targets in epilog) */ child._dirty = false; } /* /********************************************************************** // Public API, generic accessors: /********************************************************************** */ @Override public int size() { return _size; } @Override public boolean maybeDirty() { return _dirty; } /* /********************************************************************** /* Public API, accessing symbols /********************************************************************** */ public PNameC findSymbol(char[] buffer, int start, int len, int hash) { int index = hash & _indexMask; PNameC sym = _symbols[index]; // Optimal case; checking existing primary symbol for hash index: if (sym != null) { if (sym.equalsPName(buffer, start, len, hash)) { return sym; } // How about collision bucket? Bucket b = _buckets[index >> 1]; if (b != null) { sym = b.find(buffer, start, len, hash); if (sym != null) { return sym; } } } return null; } public PNameC addSymbol(char[] buffer, int start, int len, int hash) { String newStr = new String(buffer, start, len).intern(); PNameC pname = PNameC.construct(newStr, hash); int index = hash & _indexMask; boolean primary; // First... let's see if we can add it without a collision. if (null == _symbols[index]) { primary = true; // yup, we are good then } else { /* Otherwise, better check if we need to expand; after * which there may be a slot in primary hash? */ if (_size >= _sizeThreshold) { rehash(); // Need to recalc hash index index = hash & _indexMask; primary = (null == _symbols[index]); } else { // nope: need a bucket primary = false; } } // good, can just update primary hash table if (!_dirty) { // need to do copy-on-write? copyArrays(); } ++_size; if (primary) { _symbols[index] = pname; } else { // Ok, all right: need to add to a bucket int bix = (index >> 1); _buckets[bix] = new Bucket(pname, _buckets[bix]); } return pname; } /* /********************************************************************** /* Internal methods /********************************************************************** */
Method called when copy-on-write is needed; generally when first change is made to a derived symbol table.
/** * Method called when copy-on-write is needed; generally when first * change is made to a derived symbol table. */
private void copyArrays() { PNameC[] oldSyms = _symbols; int size = oldSyms.length; _symbols = new PNameC[size]; System.arraycopy(oldSyms, 0, _symbols, 0, size); Bucket[] oldBuckets = _buckets; size = oldBuckets.length; _buckets = new Bucket[size]; System.arraycopy(oldBuckets, 0, _buckets, 0, size); _dirty = true; }
Method called when size (number of entries) of symbol table grows so big that load factor is exceeded. Since size has to remain power of two, arrays will then always be doubled. Main work is really redistributing old entries into new String/Bucket entries.
/** * Method called when size (number of entries) of symbol table grows * so big that load factor is exceeded. Since size has to remain * power of two, arrays will then always be doubled. Main work * is really redistributing old entries into new String/Bucket * entries. */
private void rehash() { int size = _symbols.length; int newSize = size + size; PNameC[] oldSyms = _symbols; Bucket[] oldBuckets = _buckets; _symbols = new PNameC[newSize]; _buckets = new Bucket[newSize >> 1]; // Let's update index mask, threshold, now (needed for rehashing) _indexMask = newSize - 1; _sizeThreshold += _sizeThreshold; int count = 0; // let's do sanity check /* Need to do two loops, unfortunately, since spillover area is * only half the size: */ for (int i = 0; i < size; ++i) { PNameC symbol = oldSyms[i]; if (symbol != null) { ++count; int index = symbol.getCustomHash() & _indexMask; if (_symbols[index] == null) { _symbols[index] = symbol; } else { int bix = index >> 1; _buckets[bix] = new Bucket(symbol, _buckets[bix]); } } } size >>= 1; for (int i = 0; i < size; ++i) { Bucket b = oldBuckets[i]; while (b != null) { ++count; PNameC symbol = b.getSymbol(); int index = symbol.getCustomHash() & _indexMask; if (_symbols[index] == null) { _symbols[index] = symbol; } else { int bix = index >> 1; _buckets[bix] = new Bucket(symbol, _buckets[bix]); } b = b.getNext(); } } if (count != _size) { throw new Error("Internal error on SymbolTable.rehash(): had "+_size+" entries; now have "+count+"."); } } /* /********************************************************************** /* Bucket class /********************************************************************** */
This class is a symbol table entry. Each entry acts as a node in a linked list.
/** * This class is a symbol table entry. Each entry acts as a node * in a linked list. */
final static class Bucket { private final PNameC mSymbol; private final Bucket mNext; public Bucket(PNameC symbol, Bucket next) { mSymbol = symbol; mNext = next; } public PNameC getSymbol() { return mSymbol; } public Bucket getNext() { return mNext; } public PNameC find(char[] buf, int start, int len, int hash) { Bucket b = this; do { PNameC sym = b.mSymbol; if (sym.equalsPName(buf, start, len, hash)) { return sym; } b = b.getNext(); } while (b != null); return null; } } }