/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $
 */
package org.apache.xml.dtm.ref.sax2dtm;

import javax.xml.transform.Source;

import org.apache.xml.dtm.DTM;
import org.apache.xml.dtm.DTMManager;
import org.apache.xml.dtm.DTMWSFilter;
import org.apache.xml.utils.IntStack;
import org.apache.xml.utils.IntVector;
import org.apache.xml.utils.StringVector;
import org.apache.xml.utils.XMLStringFactory;

import org.xml.sax.SAXException;

This is a subclass of SAX2DTM which has been modified to meet the needs of Result Tree Frameworks (RTFs). The differences are: 1) Multiple XML trees may be appended to the single DTM. This means that the root node of each document is _not_ node 0. Some code has had to be deoptimized to support this mode of operation, and an explicit mechanism for obtaining the Node Handle of the root node has been provided. 2) A stack of these documents is maintained, allowing us to "tail-prune" the most recently added trees off the end of the DTM as stylesheet elements (and thus variable contexts) are exited. PLEASE NOTE that this class may be _heavily_ dependent upon the internals of the SAX2DTM superclass, and must be maintained in parallel with that code. Arguably, they should be conditionals within a single class... but they have deen separated for performance reasons. (In fact, one could even argue about which is the superclass and which is the subclass; the current arrangement is as much about preserving stability of existing code during development as anything else.) %REVIEW% In fact, since the differences are so minor, I think it may be possible/practical to fold them back into the base SAX2DTM. Consider that as a future code-size optimization.
/** * This is a subclass of SAX2DTM which has been modified to meet the needs of * Result Tree Frameworks (RTFs). The differences are: * * 1) Multiple XML trees may be appended to the single DTM. This means * that the root node of each document is _not_ node 0. Some code has * had to be deoptimized to support this mode of operation, and an * explicit mechanism for obtaining the Node Handle of the root node * has been provided. * * 2) A stack of these documents is maintained, allowing us to "tail-prune" the * most recently added trees off the end of the DTM as stylesheet elements * (and thus variable contexts) are exited. * * PLEASE NOTE that this class may be _heavily_ dependent upon the * internals of the SAX2DTM superclass, and must be maintained in * parallel with that code. Arguably, they should be conditionals * within a single class... but they have deen separated for * performance reasons. (In fact, one could even argue about which is * the superclass and which is the subclass; the current arrangement * is as much about preserving stability of existing code during * development as anything else.) * * %REVIEW% In fact, since the differences are so minor, I think it * may be possible/practical to fold them back into the base * SAX2DTM. Consider that as a future code-size optimization. * */
public class SAX2RTFDTM extends SAX2DTM {
Set true to monitor SAX events and similar diagnostic info.
/** Set true to monitor SAX events and similar diagnostic info. */
private static final boolean DEBUG = false;
Most recently started Document, or null if the DTM is empty.
/** Most recently started Document, or null if the DTM is empty. */
private int m_currentDocumentNode=NULL;
Tail-pruning mark: Number of nodes in use
/** Tail-pruning mark: Number of nodes in use */
IntStack mark_size=new IntStack();
Tail-pruning mark: Number of data items in use
/** Tail-pruning mark: Number of data items in use */
IntStack mark_data_size=new IntStack();
Tail-pruning mark: Number of size-of-data fields in use
/** Tail-pruning mark: Number of size-of-data fields in use */
IntStack mark_char_size=new IntStack();
Tail-pruning mark: Number of dataOrQName slots in use
/** Tail-pruning mark: Number of dataOrQName slots in use */
IntStack mark_doq_size=new IntStack();
Tail-pruning mark: Number of namespace declaration sets in use %REVIEW% I don't think number of NS sets is ever different from number of NS elements. We can probabably reduce these to a single stack and save some storage.
/** Tail-pruning mark: Number of namespace declaration sets in use * %REVIEW% I don't think number of NS sets is ever different from number * of NS elements. We can probabably reduce these to a single stack and save * some storage. * */
IntStack mark_nsdeclset_size=new IntStack();
Tail-pruning mark: Number of naespace declaration elements in use %REVIEW% I don't think number of NS sets is ever different from number of NS elements. We can probabably reduce these to a single stack and save some storage.
/** Tail-pruning mark: Number of naespace declaration elements in use * %REVIEW% I don't think number of NS sets is ever different from number * of NS elements. We can probabably reduce these to a single stack and save * some storage. */
IntStack mark_nsdeclelem_size=new IntStack();
Tail-pruning mark: initial number of nodes in use
/** * Tail-pruning mark: initial number of nodes in use */
int m_emptyNodeCount;
Tail-pruning mark: initial number of namespace declaration sets
/** * Tail-pruning mark: initial number of namespace declaration sets */
int m_emptyNSDeclSetCount;
Tail-pruning mark: initial number of namespace declaration elements
/** * Tail-pruning mark: initial number of namespace declaration elements */
int m_emptyNSDeclSetElemsCount;
Tail-pruning mark: initial number of data items in use
/** * Tail-pruning mark: initial number of data items in use */
int m_emptyDataCount;
Tail-pruning mark: initial number of characters in use
/** * Tail-pruning mark: initial number of characters in use */
int m_emptyCharsCount;
Tail-pruning mark: default initial number of dataOrQName slots in use
/** * Tail-pruning mark: default initial number of dataOrQName slots in use */
int m_emptyDataQNCount; public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity, DTMWSFilter whiteSpaceFilter, XMLStringFactory xstringfactory, boolean doIndexing) { super(mgr, source, dtmIdentity, whiteSpaceFilter, xstringfactory, doIndexing); // NEVER track source locators for RTFs; they aren't meaningful. I think. // (If we did track them, we'd need to tail-prune these too.) //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location; m_useSourceLocationProperty=false; m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector() : null; m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null; m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null; // Record initial sizes of fields that are pushed and restored // for RTF tail-pruning. More entries can be popped than pushed, so // we need this to mark the primordial state of the DTM. m_emptyNodeCount = m_size; m_emptyNSDeclSetCount = (m_namespaceDeclSets == null) ? 0 : m_namespaceDeclSets.size(); m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null) ? 0 : m_namespaceDeclSetElements.size(); m_emptyDataCount = m_data.size(); m_emptyCharsCount = m_chars.size(); m_emptyDataQNCount = m_dataOrQName.size(); }
Given a DTM, find the owning document node. In the case of SAX2RTFDTM, which may contain multiple documents, this returns the most recently started document, or null if the DTM is empty or no document is currently under construction. %REVIEW% Should we continue to report the most recent after construction has ended? I think not, given that it may have been tail-pruned. @return int Node handle of Document node, or null if this DTM does not contain an "active" document.
/** * Given a DTM, find the owning document node. In the case of * SAX2RTFDTM, which may contain multiple documents, this returns * the <b>most recently started</b> document, or null if the DTM is * empty or no document is currently under construction. * * %REVIEW% Should we continue to report the most recent after * construction has ended? I think not, given that it may have been * tail-pruned. * * @return int Node handle of Document node, or null if this DTM does not * contain an "active" document. * */
public int getDocument() { return makeNodeHandle(m_currentDocumentNode); }
Given a node handle, find the owning document node, using DTM semantics (Document owns itself) rather than DOM semantics (Document has no owner). (I'm counting on the fact that getOwnerDocument() is implemented on top of this call, in the superclass, to avoid having to rewrite that one. Be careful if that code changes!)
Params:
  • nodeHandle – the id of the node.
Returns:int Node handle of owning document
/** * Given a node handle, find the owning document node, using DTM semantics * (Document owns itself) rather than DOM semantics (Document has no owner). * * (I'm counting on the fact that getOwnerDocument() is implemented on top * of this call, in the superclass, to avoid having to rewrite that one. * Be careful if that code changes!) * * @param nodeHandle the id of the node. * @return int Node handle of owning document */
public int getDocumentRoot(int nodeHandle) { for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) { if (_type(id)==DTM.DOCUMENT_NODE) { return makeNodeHandle(id); } } return DTM.NULL; // Safety net; should never happen }
Given a node identifier, find the owning document node. Unlike the DOM, this considers the owningDocument of a Document to be itself. Note that in shared DTMs this may not be zero.
Params:
  • nodeIdentifier – the id of the starting node.
Returns:int Node identifier of the root of this DTM tree
/** * Given a node identifier, find the owning document node. Unlike the DOM, * this considers the owningDocument of a Document to be itself. Note that * in shared DTMs this may not be zero. * * @param nodeIdentifier the id of the starting node. * @return int Node identifier of the root of this DTM tree */
protected int _documentRoot(int nodeIdentifier) { if(nodeIdentifier==NULL) return NULL; for (int parent=_parent(nodeIdentifier); parent!=NULL; nodeIdentifier=parent,parent=_parent(nodeIdentifier)) ; return nodeIdentifier; }
Receive notification of the beginning of a new RTF document. %REVIEW% Y'know, this isn't all that much of a deoptimization. We might want to consider folding the start/endDocument changes back into the main SAX2DTM so we don't have to expose so many fields (even as Protected) and carry the additional code.
Throws:
  • SAXException – Any SAX exception, possibly wrapping another exception.
See Also:
/** * Receive notification of the beginning of a new RTF document. * * %REVIEW% Y'know, this isn't all that much of a deoptimization. We * might want to consider folding the start/endDocument changes back * into the main SAX2DTM so we don't have to expose so many fields * (even as Protected) and carry the additional code. * * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @see org.xml.sax.ContentHandler#startDocument * */
public void startDocument() throws SAXException { // Re-initialize the tree append process m_endDocumentOccured = false; m_prefixMappings = new java.util.Vector(); m_contextIndexes = new IntStack(); m_parents = new IntStack(); m_currentDocumentNode=m_size; super.startDocument(); }
Receive notification of the end of the document. %REVIEW% Y'know, this isn't all that much of a deoptimization. We might want to consider folding the start/endDocument changes back into the main SAX2DTM so we don't have to expose so many fields (even as Protected).
Throws:
  • SAXException – Any SAX exception, possibly wrapping another exception.
See Also:
/** * Receive notification of the end of the document. * * %REVIEW% Y'know, this isn't all that much of a deoptimization. We * might want to consider folding the start/endDocument changes back * into the main SAX2DTM so we don't have to expose so many fields * (even as Protected). * * @throws SAXException Any SAX exception, possibly * wrapping another exception. * @see org.xml.sax.ContentHandler#endDocument * */
public void endDocument() throws SAXException { charactersFlush(); m_nextsib.setElementAt(NULL,m_currentDocumentNode); if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED) m_firstch.setElementAt(NULL,m_currentDocumentNode); if (DTM.NULL != m_previous) m_nextsib.setElementAt(DTM.NULL,m_previous); m_parents = null; m_prefixMappings = null; m_contextIndexes = null; m_currentDocumentNode= NULL; // no longer open m_endDocumentOccured = true; }
"Tail-pruning" support for RTFs. This function pushes information about the current size of the DTM's data structures onto a stack, for use by popRewindMark() (which see). %REVIEW% I have no idea how to rewind m_elemIndexes. However, RTFs will not be indexed, so I can simply panic if that case arises. Hey, it works...
/** "Tail-pruning" support for RTFs. * * This function pushes information about the current size of the * DTM's data structures onto a stack, for use by popRewindMark() * (which see). * * %REVIEW% I have no idea how to rewind m_elemIndexes. However, * RTFs will not be indexed, so I can simply panic if that case * arises. Hey, it works... * */
public void pushRewindMark() { if(m_indexing || m_elemIndexes!=null) throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM"); // Values from DTMDefaultBase // %REVIEW% Can the namespace stack sizes ever differ? If not, save space! mark_size.push(m_size); mark_nsdeclset_size.push((m_namespaceDeclSets==null) ? 0 : m_namespaceDeclSets.size()); mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null) ? 0 : m_namespaceDeclSetElements.size()); // Values from SAX2DTM mark_data_size.push(m_data.size()); mark_char_size.push(m_chars.size()); mark_doq_size.push(m_dataOrQName.size()); }
"Tail-pruning" support for RTFs. This function pops the information previously saved by pushRewindMark (which see) and uses it to discard all nodes added to the DTM after that time. We expect that this will allow us to reuse storage more effectively. This is _not_ intended to be called while a document is still being constructed -- only between endDocument and the next startDocument %REVIEW% WARNING: This is the first use of some of the truncation methods. If Xalan blows up after this is called, that's a likely place to check. %REVIEW% Our original design for DTMs permitted them to share string pools. If there any risk that this might be happening, we can _not_ rewind and recover the string storage. One solution might to assert that DTMs used for RTFs Must Not take advantage of that feature, but this seems excessively fragile. Another, much less attractive, would be to just let them leak... Nah.
Returns:true if and only if the pop completely emptied the RTF. That response is used when determining how to unspool RTF-started-while-RTF-open situations.
/** "Tail-pruning" support for RTFs. * * This function pops the information previously saved by * pushRewindMark (which see) and uses it to discard all nodes added * to the DTM after that time. We expect that this will allow us to * reuse storage more effectively. * * This is _not_ intended to be called while a document is still being * constructed -- only between endDocument and the next startDocument * * %REVIEW% WARNING: This is the first use of some of the truncation * methods. If Xalan blows up after this is called, that's a likely * place to check. * * %REVIEW% Our original design for DTMs permitted them to share * string pools. If there any risk that this might be happening, we * can _not_ rewind and recover the string storage. One solution * might to assert that DTMs used for RTFs Must Not take advantage * of that feature, but this seems excessively fragile. Another, much * less attractive, would be to just let them leak... Nah. * * @return true if and only if the pop completely emptied the * RTF. That response is used when determining how to unspool * RTF-started-while-RTF-open situations. * */
public boolean popRewindMark() { boolean top=mark_size.empty(); m_size=top ? m_emptyNodeCount : mark_size.pop(); m_exptype.setSize(m_size); m_firstch.setSize(m_size); m_nextsib.setSize(m_size); m_prevsib.setSize(m_size); m_parent.setSize(m_size); m_elemIndexes=null; int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop(); if (m_namespaceDeclSets!=null) { m_namespaceDeclSets.setSize(ds); } int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop(); if (m_namespaceDeclSetElements!=null) { m_namespaceDeclSetElements.setSize(ds1); } // Values from SAX2DTM - m_data always has a reserved entry m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop()); m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop()); m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop()); // Return true iff DTM now empty return m_size==0; }
Returns:true if a DTM tree is currently under construction.
/** @return true if a DTM tree is currently under construction. * */
public boolean isTreeIncomplete() { return !m_endDocumentOccured; } }