/* Copyright 2002-2006, 2009, 2011, 2019 Elliotte Rusty Harold
This library is free software; you can redistribute it and/or modify
it under the terms of version 2.1 of the GNU Lesser General Public
License as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
You can contact Elliotte Rusty Harold by sending e-mail to
elharo@ibiblio.org. Please include the word "XOM" in the
subject line. The XOM home page is located at http://www.xom.nu/
*/
package nu.xom.canonical;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.Map.Entry;
import org.xml.sax.helpers.NamespaceSupport;
import nu.xom.Attribute;
import nu.xom.Comment;
import nu.xom.DocType;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Namespace;
import nu.xom.Node;
import nu.xom.Nodes;
import nu.xom.ParentNode;
import nu.xom.ProcessingInstruction;
import nu.xom.Serializer;
import nu.xom.Text;
import nu.xom.XPathContext;
Writes XML in the format specified by Canonical
XML Version 1.0 or Exclusive
XML Canonicalization Version 1.0. Canonical XML 1.1 support is in progress.
Author: Elliotte Rusty Harold Version: 1.3.1
/**
* <p>
* Writes XML in the format specified by <a target="_top"
* href="http://www.w3.org/TR/2001/REC-xml-c14n-20010315">Canonical
* XML Version 1.0</a> or <a target="_top"
* href="http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/">Exclusive
* XML Canonicalization Version 1.0</a>. Canonical XML 1.1 support is in progress.
* </p>
*
* @author Elliotte Rusty Harold
* @version 1.3.1
*
*/
public class Canonicalizer {
private boolean withComments;
private boolean exclusive = false;
private boolean v11 = false;
private CanonicalXMLSerializer serializer;
private List<String> inclusiveNamespacePrefixes = new ArrayList<String>();
private static Comparator<Attribute> comparator = new AttributeComparator();
public final static String CANONICAL_XML =
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315";
public final static String CANONICAL_XML_WITH_COMMENTS =
"http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments";
public final static String EXCLUSIVE_XML_CANONICALIZATION =
"http://www.w3.org/2001/10/xml-exc-c14n#";
public final static String EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS =
"http://www.w3.org/2001/10/xml-exc-c14n#WithComments";
public final static String CANONICAL_XML_11 =
"http://www.w3.org/2006/12/xml-c14n11";
public final static String CANONICAL_XML_11_WITH_COMMENTS =
"http://www.w3.org/2006/12/xml-c14n11#WithComments";
private static class AttributeComparator implements Comparator<Attribute> {
@Override
public int compare(Attribute a1, Attribute a2) {
String namespace1 = a1.getNamespaceURI();
String namespace2 = a2.getNamespaceURI();
if (namespace1.equals(namespace2)) {
return a1.getLocalName().compareTo(a2.getLocalName());
}
else if (namespace1.equals("")) {
return -1;
}
else if (namespace2.equals("")) {
return 1;
}
else { // compare namespace URIs
return namespace1.compareTo(namespace2);
}
}
}
Creates a Canonicalizer
that outputs a
canonical XML document with comments.
Params: - out – the output stream the document
is written onto
/**
* <p>
* Creates a <code>Canonicalizer</code> that outputs a
* canonical XML document with comments.
* </p>
*
* @param out the output stream the document
* is written onto
*/
public Canonicalizer(OutputStream out) {
this(out, true, false);
}
Creates a Canonicalizer
that outputs a
canonical XML document with or without comments.
Params: - out – the output stream the document
is written onto
- withComments – true if comments should be included
in the output, false otherwise
/**
* <p>
* Creates a <code>Canonicalizer</code> that outputs a
* canonical XML document with or without comments.
* </p>
*
* @param out the output stream the document
* is written onto
* @param withComments true if comments should be included
* in the output, false otherwise
*/
public Canonicalizer(
OutputStream out, boolean withComments) {
this(out, withComments, false);
}
Creates a Canonicalizer
that outputs a
canonical XML document with or without comments,
using either the original or the exclusive canonicalization
algorithm.
Params: - out – the output stream the document
is written onto
- withComments – true if comments should be included
in the output, false otherwise
- exclusive – true if exclusive XML canonicalization
should be performed, false if regular XML canonicalization
should be performed
/**
* <p>
* Creates a <code>Canonicalizer</code> that outputs a
* canonical XML document with or without comments,
* using either the original or the exclusive canonicalization
* algorithm.
* </p>
*
* @param out the output stream the document
* is written onto
* @param withComments true if comments should be included
* in the output, false otherwise
* @param exclusive true if exclusive XML canonicalization
* should be performed, false if regular XML canonicalization
* should be performed
*/
private Canonicalizer(
OutputStream out, boolean withComments, boolean exclusive) {
this.serializer = new CanonicalXMLSerializer(out);
serializer.setLineSeparator("\n");
this.withComments = withComments;
this.exclusive = exclusive;
}
Creates a Canonicalizer
that outputs a
canonical XML document using the specified algorithm.
Currently, four algorithms are defined and supported:
- Canonical XML without comments:
http://www.w3.org/TR/2001/REC-xml-c14n-20010315
- Canonical XML with comments:
http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments
- Exclusive XML canonicalization without comments:
http://www.w3.org/2001/10/xml-exc-c14n#
- Exclusive XML canonicalization with comments:
http://www.w3.org/2001/10/xml-exc-c14n#WithComments
Params: - out – the output stream the document
is written onto
- algorithm – the URI for the canonicalization algorithm
Throws: - CanonicalizationException – if the algorithm is
not recognized
- NullPointerException – if the algorithm is null
/**
* <p>
* Creates a <code>Canonicalizer</code> that outputs a
* canonical XML document using the specified algorithm.
* Currently, four algorithms are defined and supported:
* </p>
*
* <ul>
* <li>Canonical XML without comments:
* <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315</code></li>
* <li>Canonical XML with comments:
* <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments</code></li>
* <li>Exclusive XML canonicalization without comments:
* <code>http://www.w3.org/2001/10/xml-exc-c14n#</code></li>
* <li>Exclusive XML canonicalization with comments:
* <code>http://www.w3.org/2001/10/xml-exc-c14n#WithComments</code></li>
* </ul>
*
* @param out the output stream the document
* is written onto
* @param algorithm the URI for the canonicalization algorithm
*
* @throws CanonicalizationException if the algorithm is
* not recognized
* @throws NullPointerException if the algorithm is null
*
*/
public Canonicalizer(
OutputStream out, String algorithm) {
if (algorithm == null) {
throw new NullPointerException("Null algorithm");
}
this.serializer = new CanonicalXMLSerializer(out);
serializer.setLineSeparator("\n");
if (algorithm.equals(CANONICAL_XML)) {
this.withComments = false;
this.exclusive = false;
}
else if (algorithm.equals(CANONICAL_XML_WITH_COMMENTS)) {
this.withComments = true;
this.exclusive = false;
}
else if (algorithm.equals(EXCLUSIVE_XML_CANONICALIZATION)) {
this.withComments = false;
this.exclusive = true;
}
else if (algorithm.equals(EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS)) {
this.withComments = true;
this.exclusive = true;
}
else if (algorithm.equals(CANONICAL_XML_11)) {
this.withComments = false;
this.exclusive = false;
this.v11 = true;
}
else if (algorithm.equals(CANONICAL_XML_11_WITH_COMMENTS)) {
this.withComments = true;
this.exclusive = false;
this.v11 = true;
}
else {
throw new CanonicalizationException(
"Unsupported canonicalization algorithm: " + algorithm);
}
}
private class CanonicalXMLSerializer extends Serializer {
// If nodes is null we're canonicalizing all nodes;
// the entire document; this is somewhat easier than when
// canonicalizing only a document subset embedded in nodes
private Nodes nodes;
private NamespaceSupport inScope;
Creates a Serializer
that outputs a
canonical XML document with or without comments.
Params: - out – the
OutputStream
the document
is written onto - withComments – true if comments should be included
in the output, false otherwise
/**
* <p>
* Creates a <code>Serializer</code> that outputs a
* canonical XML document with or without comments.
* </p>
*
* @param out the <code>OutputStream</code> the document
* is written onto
* @param withComments true if comments should be included
* in the output, false otherwise
*/
CanonicalXMLSerializer(OutputStream out) {
super(out);
setLineSeparator("\n");
}
Serializes a document onto the output
stream using the canonical XML algorithm.
Params: - doc – the
Document
to serialize
Throws: - IOException – if the underlying
OutputStream
encounters an I/O error
/**
* <p>
* Serializes a document onto the output
* stream using the canonical XML algorithm.
* </p>
*
* @param doc the <code>Document</code> to serialize
*
* @throws IOException if the underlying <code>OutputStream</code>
* encounters an I/O error
*/
public final void write(Document doc) throws IOException {
inScope = new NamespaceSupport();
int position = 0;
while (true) {
Node child = doc.getChild(position);
if (nodes == null || child instanceof Element || nodes.contains(child)) {
writeChild(child);
if (child instanceof ProcessingInstruction) breakLine();
else if (child instanceof Comment && withComments) {
breakLine();
}
}
position++;
if (child instanceof Element) break;
}
for (int i = position; i < doc.getChildCount(); i++) {
Node child = doc.getChild(i);
if (nodes == null || child instanceof Element || nodes.contains(child)) {
if (child instanceof ProcessingInstruction) breakLine();
else if (child instanceof Comment && withComments) {
breakLine();
}
writeChild(child);
}
}
flush();
}
Serializes an element onto the output stream using the canonical
XML algorithm. The result is guaranteed to be well-formed.
If element
does not have a parent element, it will
also be namespace well-formed.
Params: - element – the
Element
to serialize
Throws: - IOException – if the underlying
OutputStream
encounters an I/O error
/**
* <p>
* Serializes an element onto the output stream using the canonical
* XML algorithm. The result is guaranteed to be well-formed.
* If <code>element</code> does not have a parent element, it will
* also be namespace well-formed.
* </p>
*
* @param element the <code>Element</code> to serialize
*
* @throws IOException if the underlying <code>OutputStream</code>
* encounters an I/O error
*/
protected final void write(Element element)
throws IOException {
// treat empty elements differently to avoid an
// instanceof test
if (element.getChildCount() == 0) {
writeStartTag(element, false);
writeEndTag(element);
}
else {
Node current = element;
boolean end = false;
int index = -1;
int[] indexes = new int[10];
int top = 0;
indexes[0] = -1;
while (true) {
if (!end && current.getChildCount() > 0) {
writeStartTag((Element) current, false);
current = current.getChild(0);
index = 0;
top++;
indexes = grow(indexes, top);
indexes[top] = 0;
}
else {
if (end) {
writeEndTag((Element) current);
if (current == element) break;
}
else {
writeChild(current);
}
end = false;
ParentNode parent = current.getParent();
if (parent.getChildCount() - 1 == index) {
current = parent;
top--;
if (current != element) {
index = indexes[top];
}
end = true;
}
else {
index++;
indexes[top] = index;
current = parent.getChild(index);
}
}
}
}
}
private int[] grow(int[] indexes, int top) {
if (top < indexes.length) return indexes;
int[] result = new int[indexes.length*2];
System.arraycopy(indexes, 0, result, 0, indexes.length);
return result;
}
protected void writeStartTag(Element element, boolean isEmpty)
throws IOException {
boolean writeElement = nodes == null || nodes.contains(element);
if (writeElement) {
inScope.pushContext();
writeRaw("<");
writeRaw(element.getQualifiedName());
}
SortedMap<String, String> map = new TreeMap<String, String>();
if (nodes == null) {
ParentNode parent = element.getParent();
Element parentElement = null;
if (parent instanceof Element) {
parentElement = (Element) parent;
}
for (int i = 0;
i < element.getNamespaceDeclarationCount();
i++) {
String prefix = element.getNamespacePrefix(i);
String uri = element.getNamespaceURI(prefix);
if (uri.equals(inScope.getURI(prefix))) {
continue;
}
else if (exclusive) {
if (needToDeclareNamespace(element, prefix, uri)) {
map.put(prefix, uri);
}
}
else if (uri.equals("")) {
// no need to say xmlns=""
if (parentElement == null) continue;
if ("".equals(parentElement.getNamespaceURI(""))) {
continue;
}
map.put(prefix, uri);
}
else {
map.put(prefix, uri);
}
}
writeNamespaceDeclarations(map);
}
else {
int position = indexOf(element);
// do we need to undeclare a default namespace?
// You know, should I instead create an output tree and then just
// canonicalize that? probably not
if (position != -1 && "".equals(element.getNamespaceURI())) {
ParentNode parent = element.getParent();
// Here we have to check for the nearest default on parents in the
// output tree, not the input tree
while (parent instanceof Element
&& !(nodes.contains(parent))) {
parent = parent.getParent();
}
if (parent instanceof Element) {
String uri = ((Element) parent).getNamespaceURI("");
if (! "".equals(uri)) {
map.put("", "");
}
}
}
for (int i = position+1; i < nodes.size(); i++) {
Node next = nodes.get(i);
if ( !(next instanceof Namespace) ) break;
Namespace namespace = (Namespace) next;
String prefix = namespace.getPrefix();
String uri = namespace.getValue();
if (uri.equals(inScope.getURI(prefix))) {
continue;
}
else if (exclusive) {
if (needToDeclareNamespace(element, prefix, uri)) {
map.put(prefix, uri);
}
}
else {
map.put(prefix, uri);
}
}
writeNamespaceDeclarations(map);
}
Attribute[] sorted = sortAttributes(element);
for (int i = 0; i < sorted.length; i++) {
if (nodes == null || nodes.contains(sorted[i])
|| (sorted[i].getNamespaceURI().equals(Namespace.XML_NAMESPACE) && sorted[i].getParent() != element)
) {
write(sorted[i]);
}
}
if (writeElement) {
writeRaw(">");
}
}
private void writeNamespaceDeclarations(SortedMap<String, String> map) throws IOException {
Iterator<Entry<String, String>> prefixes = map.entrySet().iterator();
while (prefixes.hasNext()) {
Entry<String, String> entry = (Entry<String, String>) prefixes.next();
String prefix = (String) entry.getKey();
String uri = (String) entry.getValue();
writeRaw(" ");
writeNamespaceDeclaration(prefix, uri);
inScope.declarePrefix(prefix, uri);
}
}
private boolean needToDeclareNamespace(
Element parent, String prefix, String uri) {
boolean match = visiblyUtilized(parent, prefix, uri);
if (match || inclusiveNamespacePrefixes.contains(prefix)) {
return noOutputAncestorUsesPrefix(parent, prefix, uri);
}
return false;
}
private boolean visiblyUtilized(Element element, String prefix, String uri) {
boolean match = false;
String pfx = element.getNamespacePrefix();
String local = element.getNamespaceURI();
if (prefix.equals(pfx) && local.equals(uri)) {
match = true;
}
else {
for (int i = 0; i < element.getAttributeCount(); i++) {
Attribute attribute = element.getAttribute(i);
if (nodes == null || nodes.contains(attribute)) {
pfx = attribute.getNamespacePrefix();
if (prefix.equals(pfx)) {
match = true;
break;
}
}
}
}
return match;
}
private boolean noOutputAncestorUsesPrefix(Element original, String prefix, String uri) {
ParentNode parent = original.getParent();
if (parent instanceof Document && "".equals(uri)) {
return false;
}
while (parent != null && !(parent instanceof Document)) {
if (nodes == null || nodes.contains(parent)) {
Element element = (Element) parent;
String pfx = element.getNamespacePrefix();
if (pfx.equals(prefix)) {
String newURI = element.getNamespaceURI(prefix);
return ! newURI.equals(uri);
}
for (int i = 0; i < element.getAttributeCount(); i++) {
Attribute attribute = element.getAttribute(i);
String current = attribute.getNamespacePrefix();
if (current.equals(prefix)) {
String newURI = element.getNamespaceURI(prefix);
return ! newURI.equals(uri);
}
}
}
parent = parent.getParent();
}
return true;
}
// ???? move into Nodes?
private int indexOf(Element element) {
for (int i = 0; i < nodes.size(); i++) {
if (nodes.get(i) == element) return i;
}
return -1;
}
protected void write(Attribute attribute) throws IOException {
writeRaw(" ");
writeRaw(attribute.getQualifiedName());
writeRaw("=\"");
writeRaw(prepareAttributeValue(attribute));
writeRaw("\"");
}
protected void writeEndTag(Element element) throws IOException {
if (nodes == null || nodes.contains(element)) {
writeRaw("</");
writeRaw(element.getQualifiedName());
writeRaw(">");
inScope.popContext();
}
}
private final XPathContext xmlcontext = new XPathContext("xml", Namespace.XML_NAMESPACE);
private Attribute[] sortAttributes(Element element) {
Map<String, Attribute> nearest = new TreeMap<String, Attribute>();
// add in any inherited xml: attributes
if (!exclusive && nodes != null && nodes.contains(element)
&& !nodes.contains(element.getParent())) {
// grab all xml: attributes
Nodes attributes = element.query("ancestor::*/@xml:*", xmlcontext);
if (attributes.size() != 0) {
// It's important to count backwards here because
// XPath returns all nodes in document order, which
// is top-down. To get the nearest we need to go
// bottom up instead.
for (int i = attributes.size()-1; i >= 0; i--) {
Attribute a = (Attribute) attributes.get(i);
String name = a.getLocalName();
if (element.getAttribute(name, Namespace.XML_NAMESPACE) != null) {
// this element already has that attribute
continue;
} else if (v11 && "id".equals(name)) { // Don't inherit xml:id in C14N 1.1
continue;
}
if (! nearest.containsKey(name)) {
if (!v11 || "lang".equals(name) || "space".equals(name)) {
Element parent = (Element) a.getParent();
if (! nodes.contains(parent)) {
nearest.put(name, a);
}
else {
nearest.put(name, null);
}
}
}
}
}
// remove null values
Iterator<Attribute> iterator = nearest.values().iterator();
while (iterator.hasNext()) {
Attribute a = iterator.next();
if (a == null) iterator.remove();
}
if (v11) { // fixup xml:base attributes
List<String> bases = getOmittedBases(element);
Attribute baseAttribute = element.getAttribute("base", Namespace.XML_NAMESPACE);
String baseValue = "";
if (baseAttribute != null) {
baseValue = baseAttribute.getValue();
element.removeAttribute(baseAttribute);
}
if (!bases.isEmpty()) {
for (int i = 0; i < bases.size(); i++) {
baseValue = joinURIReferences(bases.get(i), baseValue);
}
}
if (baseValue != null && baseValue.length() > 0) {
nearest.put("base", new Attribute("xml:base", Namespace.XML_NAMESPACE, baseValue));
}
}
}
int localCount = element.getAttributeCount();
Attribute[] result
= new Attribute[localCount + nearest.size()];
for (int i = 0; i < localCount; i++) {
result[i] = element.getAttribute(i);
}
Iterator<Attribute> iterator = nearest.values().iterator();
for (int j = localCount; j < result.length; j++) {
result[j] = iterator.next();
}
Arrays.sort(result, comparator);
return result;
}
private List<String> getOmittedBases(Element element) {
ArrayList<String> bases = new ArrayList<String>();
// TODO(elharo): rework this to not need this next variable
ParentNode parent = element.getParent();
while (parent != null && parent instanceof Element && !nodes.contains(parent)) {
Element parentElement = (Element) parent;
String base = parentElement.getAttributeValue("base", Namespace.XML_NAMESPACE);
if (base != null) bases.add(base);
parent = parentElement.getParent();
}
return bases;
}
private String prepareAttributeValue(Attribute attribute) {
String value = attribute.getValue();
StringBuffer result = new StringBuffer(value.length());
if (attribute.getType().equals(Attribute.Type.CDATA)
|| attribute.getType().equals(Attribute.Type.UNDECLARED)) {
char[] data = value.toCharArray();
for (int i = 0; i < data.length; i++) {
char c = data[i];
if (c == '\t') {
result.append("	");
}
else if (c == '\n') {
result.append("
");
}
else if (c == '\r') {
result.append("
");
}
else if (c == '\"') {
result.append(""");
}
else if (c == '&') {
result.append("&");
}
else if (c == '<') {
result.append("<");
}
else {
result.append(c);
}
}
}
else {
// According to the spec, "Whitespace character references
// other than   are not affected by attribute value
// normalization. For parsed documents, the parser will
// still replace these with the actual character. I am
// going to assume that if one is found here, that the
// user meant to put it there; and so we will escape it
// with a character reference
char[] data = value.toCharArray();
boolean seenFirstNonSpace = false;
for (int i = 0; i < data.length; i++) {
if (data[i] == ' ') {
if (i != data.length-1 && data[i+1] != ' ' && seenFirstNonSpace) {
result.append(data[i]);
}
continue;
}
seenFirstNonSpace = true;
if (data[i] == '\t') {
result.append("	");
}
else if (data[i] == '\n') {
result.append("
");
}
else if (data[i] == '\r') {
result.append("
");
}
else if (data[i] == '\"') {
result.append(""");
}
else if (data[i] == '&') {
result.append("&");
}
else if (data[i] == '<') {
result.append("<");
}
else {
result.append(data[i]);
}
}
}
return result.toString();
}
Serializes a Text
object
onto the output stream using the UTF-8 encoding.
The reserved characters <, >, and &
are escaped using the standard entity references such as
<
, >
,
and &
.
Params: - text – the
Text
to serialize
Throws: - IOException – if the underlying
OutputStream
encounters an I/O error
/**
* <p>
* Serializes a <code>Text</code> object
* onto the output stream using the UTF-8 encoding.
* The reserved characters <, >, and &
* are escaped using the standard entity references such as
* <code>&lt;</code>, <code>&gt;</code>,
* and <code>&amp;</code>.
* </p>
*
* @param text the <code>Text</code> to serialize
*
* @throws IOException if the underlying <code>OutputStream</code>
* encounters an I/O error
*/
protected final void write(Text text) throws IOException {
if (nodes == null || nodes.contains(text)) {
String input = text.getValue();
StringBuffer result = new StringBuffer(input.length());
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (c == '\r') {
result.append("
");
}
else if (c == '&') {
result.append("&");
}
else if (c == '<') {
result.append("<");
}
else if (c == '>') {
result.append(">");
}
else {
result.append(c);
}
}
writeRaw(result.toString());
}
}
Serializes a Comment
object
onto the output stream if and only if this
serializer is configured to produce canonical XML
with comments.
Params: - comment – the
Comment
to serialize
Throws: - IOException – if the underlying
OutputStream
encounters an I/O error
/**
* <p>
* Serializes a <code>Comment</code> object
* onto the output stream if and only if this
* serializer is configured to produce canonical XML
* with comments.
* </p>
*
* @param comment the <code>Comment</code> to serialize
*
* @throws IOException if the underlying <code>OutputStream</code>
* encounters an I/O error
*/
protected final void write(Comment comment)
throws IOException {
if (withComments && (nodes == null || nodes.contains(comment))) {
super.write(comment);
}
}
protected final void write(ProcessingInstruction pi)
throws IOException {
if (nodes == null || nodes.contains(pi)) {
super.write(pi);
}
}
Does nothing because canonical XML does not include
document type declarations.
Params: - doctype – the document type declaration to serialize
/**
* <p>
* Does nothing because canonical XML does not include
* document type declarations.
* </p>
*
* @param doctype the document type declaration to serialize
*/
protected final void write(DocType doctype) {
// DocType is not serialized in canonical XML
}
public void write(Node node) throws IOException {
if (node instanceof Document) {
write((Document) node);
}
else if (node instanceof Attribute) {
write((Attribute) node);
}
else if (node instanceof Namespace) {
write((Namespace) node);
}
else {
writeChild(node);
}
}
private void write(Namespace namespace) throws IOException {
String prefix = namespace.getPrefix();
String uri = namespace.getValue();
writeRaw(" xmlns" );
if (!"".equals(prefix)) {
writeRaw(":");
writeRaw(prefix);
}
writeRaw("=\"");
writeAttributeValue(uri);
writeRaw("\"");
}
}
Serializes a node onto the output stream using the specified
canonicalization algorithm. If the node is a document or an
element, then the node's entire subtree is written out.
Params: - node – the node to canonicalize
Throws: - IOException – if the underlying
OutputStream
encounters an I/O error
/**
* <p>
* Serializes a node onto the output stream using the specified
* canonicalization algorithm. If the node is a document or an
* element, then the node's entire subtree is written out.
* </p>
*
* @param node the node to canonicalize
*
* @throws IOException if the underlying <code>OutputStream</code>
* encounters an I/O error
*/
public final void write(Node node) throws IOException {
// See this thread:
// http://lists.ibiblio.org/pipermail/xom-interest/2005-October/002656.html
if (node instanceof Element) {
Document doc = node.getDocument();
Element pseudoRoot = null;
if (doc == null) {
pseudoRoot = new Element("pseudo");
new Document(pseudoRoot);
ParentNode root = (ParentNode) node;
while (root.getParent() != null) root = root.getParent();
pseudoRoot.appendChild(root);
}
try {
// XXX Consider if it's faster to do this without XPath
Nodes nodes = node.query(".//. | .//@* | .//namespace::*");
if (exclusive) { // only include namespace nodes in scope
Set<String> prefixes = new HashSet<String>(nodes.size());
for (int i = 0; i < nodes.size(); i++) {
Node n = nodes.get(i);
if (n instanceof Element) {
String prefix = ((Element) n).getNamespacePrefix();
prefixes.add(prefix);
}
else if (n instanceof Attribute) {
String prefix = ((Attribute) n).getNamespacePrefix();
if (! "".equals(prefix)) prefixes.add(prefix);
}
}
// run through the namespaces and remove any that aren't visibly utilized
for (int i = 0; i < nodes.size(); i++) {
Node n = nodes.get(i);
if (n instanceof Namespace) {
String prefix = ((Namespace) n).getPrefix();
if (! prefixes.contains((prefix))) {
nodes.remove(i);
i--;
}
}
}
}
write(nodes);
}
finally {
if (pseudoRoot != null) pseudoRoot.removeChild(0);
}
}
else {
serializer.nodes = null;
serializer.write(node);
}
serializer.flush();
}
Serializes a document subset onto the output stream using the
canonical XML algorithm. All nodes in the list must come from
same document. Furthermore, they must come from a document.
They cannot be detached. The nodes need not be sorted. This
method will sort them into the appropriate order for
canonicalization.
In most common use cases, these nodes will be the result of
evaluating an XPath expression. For example,
Canonicalizer canonicalizer
= new Canonicalizer(System.out, Canonicalizer.CANONICAL_XML);
Nodes result = doc.query("//. | //@* | //namespace::*");
canonicalizer.write(result);
Children are not output unless the subset also includes them.
Including an element in the subset does not automatically
select all the element's children, attributes, and namespaces.
Furthermore, not selecting an element does not imply that its
children, namespaces, attributes will not be output.
Params: - documentSubset – the nodes to serialize
Throws: - IOException – if the underlying
OutputStream
encounters an I/O error - CanonicalizationException – if the nodes come from more
than one document; or if a detached node is in the list
/**
* <p>
* Serializes a document subset onto the output stream using the
* canonical XML algorithm. All nodes in the list must come from
* same document. Furthermore, they must come from a document.
* They cannot be detached. The nodes need not be sorted. This
* method will sort them into the appropriate order for
* canonicalization.
* </p>
*
* <p>
* In most common use cases, these nodes will be the result of
* evaluating an XPath expression. For example,
* </p>
*
* <pre><code> Canonicalizer canonicalizer
* = new Canonicalizer(System.out, Canonicalizer.CANONICAL_XML);
* Nodes result = doc.query("//. | //@* | //namespace::*");
* canonicalizer.write(result);
* </code></pre>
*
* <p>
* Children are not output unless the subset also includes them.
* Including an element in the subset does not automatically
* select all the element's children, attributes, and namespaces.
* Furthermore, not selecting an element does not imply that its
* children, namespaces, attributes will not be output.
* </p>
*
* @param documentSubset the nodes to serialize
*
* @throws IOException if the underlying <code>OutputStream</code>
* encounters an I/O error
* @throws CanonicalizationException if the nodes come from more
* than one document; or if a detached node is in the list
*/
public final void write(Nodes documentSubset) throws IOException {
if (documentSubset.size() > 0) {
Document doc = documentSubset.get(0).getDocument();
if (doc == null) {
throw new CanonicalizationException(
"Canonicalization is not defined for detached nodes");
}
Nodes result = sort(documentSubset);
serializer.nodes = result;
serializer.write(doc);
serializer.flush();
}
}
Specifies the prefixes that will be output as specified in
regular canonical XML, even when doing exclusive
XML canonicalization.
Params: - inclusiveNamespacePrefixes – a whitespace separated list
of namespace prefixes that will always be included in the
output, even in exclusive canonicalization
/**
* <p>
* Specifies the prefixes that will be output as specified in
* regular canonical XML, even when doing exclusive
* XML canonicalization.
* </p>
*
* @param inclusiveNamespacePrefixes a whitespace separated list
* of namespace prefixes that will always be included in the
* output, even in exclusive canonicalization
*/
public final void setInclusiveNamespacePrefixList(String inclusiveNamespacePrefixes) {
this.inclusiveNamespacePrefixes.clear();
if (this.exclusive && inclusiveNamespacePrefixes != null) {
StringTokenizer tokenizer = new StringTokenizer(
inclusiveNamespacePrefixes, " \t\r\n", false);
while (tokenizer.hasMoreTokens()) {
this.inclusiveNamespacePrefixes.add(tokenizer.nextToken());
}
}
}
// XXX remove recursion
// recursively descend through document; in document
// order, and add results as they are found
private Nodes sort(Nodes in) {
Node root = in.get(0).getDocument();
if (in.size() > 1) {
Nodes out = new Nodes();
List<Node> list = new ArrayList<Node>(in.size());
List<Node> namespaces = new ArrayList<Node>();
for (int i = 0; i < in.size(); i++) {
Node node = in.get(i);
list.add(node);
if (node instanceof Namespace) namespaces.add(node);
}
sort(list, namespaces, out, (ParentNode) root);
if (! list.isEmpty() ) {
// Are these just duplicates; or is there really a node
// from a different document?
Iterator<Node> iterator = list.iterator();
while (iterator.hasNext()) {
Node next = iterator.next();
if (root != next.getDocument()) {
throw new CanonicalizationException(
"Cannot canonicalize subsets that contain nodes from more than one document");
}
}
}
return out;
}
else {
return new Nodes(in.get(0));
}
}
private static String joinURIReferences(String parent, String child) {
if ("..".equals(parent)) parent = "../";
if ("..".equals(child)) child = "../";
if (child == null) {
return parent;
}
//return parent + child;
return URIJoiner.absolutize(parent, child);
}
private static void sort(List<Node> in, List<Node> namespaces, Nodes out, ParentNode parent) {
if (in.isEmpty()) return;
if (in.contains(parent)) {
out.append(parent);
in.remove(parent);
// I'm fairly sure this next line is unreachable, but just
// in case it isn't I'll leave this comment here.
// if (in.isEmpty()) return;
}
int childCount = parent.getChildCount();
for (int i = 0; i < childCount; i++) {
Node child = parent.getChild(i);
if (child instanceof Element) {
Element element = (Element) child;
if (in.contains(element)) {
out.append(element);
in.remove(element);
}
// attach namespaces
if (!namespaces.isEmpty()) {
Iterator<Node> iterator = in.iterator();
while (iterator.hasNext()) {
Object o = iterator.next();
if (o instanceof Namespace) {
Namespace n = (Namespace) o;
if (element == n.getParent()) {
out.append(n);
iterator.remove();
}
}
}
}
// attach attributes
for (int a = 0; a < element.getAttributeCount(); a++) {
Attribute att = element.getAttribute(a);
if (in.contains(att)) {
out.append(att);
in.remove(att);
if (in.isEmpty()) return;
}
}
sort(in, namespaces, out, element);
}
else {
if (in.contains(child)) {
out.append(child);
in.remove(child);
if (in.isEmpty()) return;
}
}
}
}
}