/* Copyright 2002-2006, 2009, 2010, 2013, 2018 Elliotte Rusty Harold
This library is free software; you can redistribute it and/or modify
it under the terms of version 2.1 of the GNU Lesser General Public
License as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
You can contact Elliotte Rusty Harold by sending e-mail to
elharo@ibiblio.org. Please include the word "XOM" in the
subject line. The XOM home page is located at http://www.xom.nu/
*/
package nu.xom;
import java.io.CharConversionException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UTFDataFormatException;
import java.net.MalformedURLException;
import java.net.URL;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import org.apache.xerces.impl.Version;
This class is responsible for creating XOM Document
objects from a URL, file, string, or input stream by reading
an XML document. A SAX parser is used to read the
document and report any well-formedness errors.
Author: Elliotte Rusty Harold Version: 1.2.11
/**
* <p>
* This class is responsible for creating XOM <code>Document</code>
* objects from a URL, file, string, or input stream by reading
* an XML document. A SAX parser is used to read the
* document and report any well-formedness errors.
* </p>
*
* @author Elliotte Rusty Harold
* @version 1.2.11
*
*/
public class Builder {
private XMLReader parser;
private NodeFactory factory;
private static double xercesVersion = 2.6;
static {
try {
String x = Version.getVersion();
String versionString = x.substring(9);
int firstPeriod = versionString.indexOf(".");
int secondPeriod = versionString.lastIndexOf(".");
String major = versionString.substring(0, firstPeriod);
String minor = versionString.substring(firstPeriod+1, secondPeriod);
if (Integer.parseInt(minor) < 10 && Integer.parseInt(major) < 3) {
xercesVersion = Double.parseDouble(x.substring(9,12));
}
// else it's 2.6 or later which is all we really need to know
}
catch (Exception ex) {
// The version string format changed so presumably it's
// 2.6 or later
}
catch (Error err) {
// Xerces not installed, so none of this matters
}
}
Creates a Builder
that uses the default node
factory and chooses among any available SAX2 parsers.
In order of preference, it looks for:
- Xerces 2.x (a.k.a. IBM XML parser for Java)
- GNU Ælfred
- Crimson
- Piccolo
- Oracle
- XP
- Saxon's Ælfred
- dom4j's Ælfred
- The platform default specified by the
org.xml.sax.driver
system property
Parsers must implicitly or explicitly support the
http://xml.org/sax/features/external-general-entities
and
http://xml.org/sax/features/external-parameter-entities
features XOM requires. Parsers that don't are rejected
automatically.
Throws: - XMLException – if no satisfactory parser is
installed in the local class path
/**
* <p>
* Creates a <code>Builder</code> that uses the default node
* factory and chooses among any available SAX2 parsers.
* In order of preference, it looks for:
* </p>
*
* <ol>
* <li>Xerces 2.x (a.k.a. IBM XML parser for Java)</li>
* <li>GNU Ælfred</li>
* <li>Crimson</li>
* <li>Piccolo</li>
* <li>Oracle</li>
* <li>XP</li>
* <li>Saxon's Ælfred</li>
* <li>dom4j's Ælfred</li>
* <li>The platform default specified by the
* <code>org.xml.sax.driver</code> system property</li>
* </ol>
*
* <p>
* Parsers must implicitly or explicitly support the
* http://xml.org/sax/features/external-general-entities
* and
* http://xml.org/sax/features/external-parameter-entities
* features XOM requires. Parsers that don't are rejected
* automatically.
* </p>
*
* @throws XMLException if no satisfactory parser is
* installed in the local class path
*/
public Builder() {
this(false);
}
Creates a Builder
based on an optionally validating
parser. If the validate
argument
is true, then a validity error while
parsing will cause a fatal error; that is,
it will throw a ValidityException
.
Params: - validate – true if the parser should
validate the document while parsing
Throws: - XMLException – if no satisfactory parser
is installed in the local class path
/**
* <p>
* Creates a <code>Builder</code> based on an optionally validating
* parser. If the <code>validate</code> argument
* is true, then a validity error while
* parsing will cause a fatal error; that is,
* it will throw a <code>ValidityException</code>.
* </p>
*
* @param validate true if the parser should
* validate the document while parsing
*
* @throws XMLException if no satisfactory parser
* is installed in the local class path
*/
public Builder(boolean validate) {
this(findParser(validate), validate, null);
}
Creates a Builder
based on an optionally
validating parser that builds node objects with the supplied
factory. If the validate
argument is true, then
a validity error while parsing will cause a fatal error; that
is, it will throw a ValidityException
.
Params: - validate – true if the parser should
validate the document while parsing
- factory – the
NodeFactory
that creates
the node objects for this Builder
Throws: - XMLException – if no satisfactory parser
is installed in the local class path
/**
* <p>
* Creates a <code>Builder</code> based on an optionally
* validating parser that builds node objects with the supplied
* factory. If the <code>validate</code> argument is true, then
* a validity error while parsing will cause a fatal error; that
* is, it will throw a <code>ValidityException</code>.
* </p>
*
* @param validate true if the parser should
* validate the document while parsing
* @param factory the <code>NodeFactory</code> that creates
* the node objects for this <code>Builder</code>
*
* @throws XMLException if no satisfactory parser
* is installed in the local class path
*/
public Builder(boolean validate, NodeFactory factory) {
this(findParser(validate), validate, factory);
}
// These are stored in the order of preference.
private static String[] parsers = {
"nu.xom.XML1_0Parser",
"nu.xom.JDK15XML1_0Parser",
"org.apache.xerces.parsers.SAXParser",
"org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser", // xerces-2.9.x
"com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser", // JDK 1.6
"com.sun.org.apache.xerces.internal.parsers.SAXParser",
"gnu.xml.aelfred2.XmlReader",
"org.apache.crimson.parser.XMLReaderImpl",
"com.bluecast.xml.Piccolo",
"oracle.xml.parser.v2.SAXParser",
"com.jclark.xml.sax.SAX2Driver",
"net.sf.saxon.aelfred.SAXDriver",
"com.icl.saxon.aelfred.SAXDriver",
"org.dom4j.io.aelfred2.SAXDriver",
"org.dom4j.io.aelfred.SAXDriver",
"org.xmlpull.v1.sax2.Driver" // android
};
static XMLReader findParser(boolean validate) {
// first look for Xerces; we only trust Xerces if
// we set it up; and we need to configure it specially
// so we can't load it with the XMLReaderFactory
XMLReader parser;
try {
parser = new XML1_0Parser();
setupParser(parser, validate);
return parser;
}
catch (SAXException ex) {
// look for next one
}
catch (NoClassDefFoundError err) {
// Xerces is not available; look for next one
}
try {
parser = (XMLReader) Class.forName(
"nu.xom.JDK15XML1_0Parser").newInstance();
setupParser(parser, validate);
return parser;
}
catch (SAXException ex) {
// look for next one
}
catch (InstantiationException ex) {
// look for next one
}
catch (ClassNotFoundException ex) {
// look for next one
}
catch (IllegalAccessException ex) {
// look for next one
}
catch (NoClassDefFoundError err) {
// Xerces is not available; look for next one
}
// XMLReaderFactory.createXMLReader never returns
// null. If it can't locate the parser, it throws
// a SAXException.
for (int i = 2; i < parsers.length; i++) {
try {
parser = XMLReaderFactory.createXMLReader(parsers[i]);
setupParser(parser, validate);
return parser;
}
catch (SAXException ex) {
// try the next one
}
catch (NoClassDefFoundError err) {
// try the next one
}
}
try { // default
parser = XMLReaderFactory.createXMLReader();
setupParser(parser, validate);
return parser;
}
catch (SAXException ex) {
throw new XMLException(
"Could not find a suitable SAX2 parser", ex);
}
}
private static void setupParser(XMLReader parser, boolean validate)
throws SAXNotRecognizedException, SAXNotSupportedException {
// General configuration for all parsers
parser.setFeature(
"http://xml.org/sax/features/namespace-prefixes", true);
parser.setFeature(
"http://xml.org/sax/features/namespaces", true);
// Parser specific configuration
XMLReader baseParser = parser;
while (baseParser instanceof XMLFilter) {
XMLReader parent = ((XMLFilter) baseParser).getParent();
if (parent == null) break;
baseParser = parent;
}
String parserName = baseParser.getClass().getName();
if (!validate) {
if (parserName.equals( // Crimson workaround
"org.apache.crimson.parser.XMLReaderImpl")) {
parser.setErrorHandler(
new NamespaceWellformednessRequired()
);
}
else {
parser.setFeature(
"http://xml.org/sax/features/external-general-entities",
true
);
parser.setFeature(
"http://xml.org/sax/features/external-parameter-entities",
true
);
}
}
else {
parser.setFeature(
"http://xml.org/sax/features/validation", true);
parser.setErrorHandler(new ValidityRequired());
}
try {
parser.setFeature(
"http://xml.org/sax/features/string-interning", true);
}
catch (SAXException ex) {
// This parser does not support string interning.
// We can live without that.
}
// A couple of Xerces specific properties
if (parserName.equals("nu.xom.XML1_0Parser")
|| parserName.equals("nu.xom.JDK15XML1_0Parser")
|| parserName.equals("org.apache.xerces.parsers.SAXParser")
|| parserName.equals("com.sun.org.apache.xerces.internal.parsers.SAXParser")
|| parserName.equals("org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser") // xerces-2.9.x
|| parserName.equals("com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser")) // JDK 1.6
{
try {
parser.setFeature(
"http://apache.org/xml/features/allow-java-encodings", true);
}
catch (SAXException ex) {
// Possibly an earlier version of Xerces; no big deal.
// We can live without this feature.
}
// See http://nagoya.apache.org/bugzilla/show_bug.cgi?id=23768
// if you care to know why this line breaks unit tests on
// versions of Xerces prior to 2.6.1
try {
parser.setFeature(
"http://apache.org/xml/features/standard-uri-conformant",
true);
}
catch (SAXException ex) {
// Possibly an earlier version of Xerces, or a
// or a non-Xerces parser; no big deal.
// We can live without this.
}
}
}
Creates a Builder
that uses
the specified SAX XMLReader
.
Custom SAX features and properties such as
schema validation can be set on this XMLReader
before passing it to this method.
Params: - parser – the SAX2
XMLReader
that
parses the document
Throws: - XMLException – if
parser
does not support the
features XOM requires
/**
* <p>
* Creates a <code>Builder</code> that uses
* the specified SAX <code>XMLReader</code>.
* Custom SAX features and properties such as
* schema validation can be set on this <code>XMLReader</code>
* before passing it to this method.
* </p>
*
* @param parser the SAX2 <code>XMLReader</code> that
* parses the document
*
* @throws XMLException if <code>parser</code> does not support the
* features XOM requires
*/
public Builder(XMLReader parser) {
this(parser, false);
}
Creates a Builder
that uses
the specified NodeFactory
to create
node objects.
Params: - factory – the
NodeFactory
that creates
the node objects for this Builder
Throws: - XMLException – if no satisfactory parser is
installed in the local class path
/**
* <p>
* Creates a <code>Builder</code> that uses
* the specified <code>NodeFactory</code> to create
* node objects.
* </p>
*
* @param factory the <code>NodeFactory</code> that creates
* the node objects for this <code>Builder</code>
*
* @throws XMLException if no satisfactory parser is
* installed in the local class path
*/
public Builder(NodeFactory factory) {
this(findParser(false), false, factory);
}
Creates a optionally validating Builder
based
on the specified parser object. Custom SAX features and
properties such as schema validation can be set on this
XMLReader
before passing it to this method.
If the validate argument is true, then a validity error
while parsing will cause a fatal error; that is, it
will throw a ParsingException
Params: - parser – the SAX2
XMLReader
that parses
the document - validate – true if the parser should validate
the document while parsing
/**
* <p>
* Creates a optionally validating <code>Builder</code> based
* on the specified parser object. Custom SAX features and
* properties such as schema validation can be set on this
* <code>XMLReader</code> before passing it to this method.
* </p>
*
* <p>
* If the validate argument is true, then a validity error
* while parsing will cause a fatal error; that is, it
* will throw a <code>ParsingException</code>
* </p>
*
* @param parser the SAX2 <code>XMLReader</code> that parses
* the document
* @param validate true if the parser should validate
* the document while parsing
*
*/
public Builder(XMLReader parser, boolean validate) {
this(parser, validate, null);
}
Creates an optionally validating Builder
that reads
data from the specified parser object and constructs new nodes
using the specified factory object. Custom SAX features and
properties such as schema validation can be set on this
XMLReader
before passing it to this method.
If the validate
argument is true, then a validity
error while parsing will throw a ParsingException
.
Params: - parser – the SAX2
XMLReader
that parses
the document - validate – true if the parser should validate the
document while parsing
- factory – the
NodeFactory
this builder uses to create objects in the tree
Throws: - XMLException – if
parser
does not support
the features XOM requires
/**
* <p>
* Creates an optionally validating <code>Builder</code> that reads
* data from the specified parser object and constructs new nodes
* using the specified factory object. Custom SAX features and
* properties such as schema validation can be set on this
* <code>XMLReader</code> before passing it to this method.
* </p>
*
* <p>
* If the <code>validate</code> argument is true, then a validity
* error while parsing will throw a <code>ParsingException</code>.
* </p>
*
* @param parser the SAX2 <code>XMLReader</code> that parses
* the document
* @param validate true if the parser should validate the
* document while parsing
* @param factory the <code>NodeFactory</code>
* this builder uses to create objects in the tree
*
* @throws XMLException if <code>parser</code> does not support
* the features XOM requires
*
*/
public Builder(
XMLReader parser, boolean validate, NodeFactory factory) {
try {
setupParser(parser, validate);
}
catch (SAXException ex) {
if (validate) {
throw new XMLException(parser.getClass().getName()
+ " does not support validation.", ex);
}
else {
throw new XMLException(parser.getClass().getName()
+ " does not support the entity resolution"
+ " features XOM requires.", ex);
}
}
// setup the handlers
this.parser = parser;
this.factory = factory;
setHandlers();
}
private static boolean knownGoodParser(XMLReader parser) {
String parserName = parser.getClass().getName();
// In general, a filter may violate the constraints of XML 1.0.
// However, I specifically trust Norm Walsh not to do that, so
// if his filters are being used we look at the parent instead.
if (parserName.equals("org.apache.xml.resolver.tools.ResolvingXMLFilter")) {
XMLFilter filter = (XMLFilter) parser;
parserName = filter.getParent().getClass().getName();
}
// These parsers are known to not make all the checks
// they're supposed to. :-(
if (parserName.equals("gnu.xml.aelfred2.XmlReader")) return false;
if (parserName.equals("net.sf.saxon.aelfred.SAXDriver")) return false;
if (parserName.equals("com.icl.saxon.aelfred.SAXDriver")) return false;
if (parserName.equals("org.apache.xerces.parsers.SAXParser")
&& xercesVersion >= 2.4) {
return false;
}
for (int i = 0; i < parsers.length; i++) {
if (parserName.equals(parsers[i])) return true;
}
return false;
}
private void setHandlers() {
XOMHandler handler;
if ((factory == null
|| factory.getClass().getName().equals("nu.xom.NodeFactory"))
&& knownGoodParser(parser)) {
// If no factory is supplied by user, don't
// return one
NodeFactory tempFactory = factory;
if (tempFactory == null) tempFactory = new NodeFactory();
handler = new NonVerifyingHandler(tempFactory);
}
else {
if (factory == null) factory = new NodeFactory();
handler = new XOMHandler(factory);
}
parser.setContentHandler(handler);
parser.setDTDHandler(handler);
try {
parser.setProperty(
"http://xml.org/sax/properties/lexical-handler",
handler);
}
catch (SAXException ex) {
// This parser does not support lexical events.
// We can live without them, though it does mean
// there won't be any comments or a DOCTYPE declaration
// in the tree.
}
try {
parser.setProperty(
"http://xml.org/sax/properties/declaration-handler",
handler);
// Due to Crimson bugs in misidentifying the internal and
// external DTD subsets, we only build the internal DTD
// subset if there is no external DTD subset.
if (parser.getClass().getName().equals(
"org.apache.crimson.parser.XMLReaderImpl")) {
handler.usingCrimson = true;
}
}
catch (SAXException ex) {
// This parser does not support declaration events.
// We can live without them, though it does mean
// they won't be any internal DTD subset.
}
}
Parses the document at the specified URL.
Note that relative URLs generally do not work here, as
there's no base to resolve them against. This includes
relative URLs that point into the file system, though this
is somewhat platform dependent. Furthermore, file
URLs often only work when they adhere exactly to RFC 2396
syntax. URLs that work in Internet Explorer often fail when
used in Java. If you're reading XML from a file, more reliable
results are obtained by using the build
method
that takes a java.io.File
object as an argument.
Params: - systemID – an absolute URL from which the document is read.
The URL's scheme must be one supported by the Java VM.
Throws: - ValidityException – if a validity error is detected. This
is only thrown if the builder has been instructed to validate.
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a broken socket
prevents the document from being fully read
Returns: the parsed Document
/**
* <p>
* Parses the document at the specified URL.
* </p>
*
* <p>
* Note that relative URLs generally do not work here, as
* there's no base to resolve them against. This includes
* relative URLs that point into the file system, though this
* is somewhat platform dependent. Furthermore, <code>file</code>
* URLs often only work when they adhere exactly to RFC 2396
* syntax. URLs that work in Internet Explorer often fail when
* used in Java. If you're reading XML from a file, more reliable
* results are obtained by using the <code>build</code> method
* that takes a <code>java.io.File</code> object as an argument.
* </p>
*
* @param systemID an absolute URL from which the document is read.
* The URL's scheme must be one supported by the Java VM.
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected. This
* is only thrown if the builder has been instructed to validate.
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a broken socket
* prevents the document from being fully read
*/
public Document build(String systemID)
throws ParsingException, ValidityException, IOException {
systemID = canonicalizeURL(systemID);
InputSource source = new InputSource(systemID);
return build(source);
}
Reads the document from an input stream.
Params: - in – the input stream from which the document is read
Throws: - ValidityException – if a validity error is detected;
only thrown if the builder has been instructed to validate
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a broken
socket prevents the document from being fully read
- NullPointerException – if
in
is null
Returns: the parsed Document
/**
* <p>
* Reads the document from an input stream.
* </p>
*
* @param in the input stream from which the document is read
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected;
* only thrown if the builder has been instructed to validate
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a broken
* socket prevents the document from being fully read
* @throws NullPointerException if <code>in</code> is null
*/
public Document build(InputStream in)
throws ParsingException, ValidityException, IOException {
if (in == null) throw new NullPointerException("Null InputStream");
InputSource source = new InputSource(in);
return build(source);
}
Reads the document from an input stream while specifying
a base URI (which need not be the stream's actual URI).
Params: - in – the input stream from which the document is read
- baseURI – an absolute URI for this document; may be null
Throws: - ValidityException – if a validity error is detected;
only thrown if the builder has been instructed to validate
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a broken
socket prevents the document from being fully read
Returns: the parsed Document
/**
* <p>
* Reads the document from an input stream while specifying
* a base URI (which need not be the stream's actual URI).
* </p>
*
* @param in the input stream from which the document is read
* @param baseURI an absolute URI for this document; may be null
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected;
* only thrown if the builder has been instructed to validate
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a broken
* socket prevents the document from being fully read
*/
public Document build(InputStream in, String baseURI)
throws ParsingException, ValidityException, IOException {
InputSource source = new InputSource(in);
if (baseURI != null) {
baseURI = canonicalizeURL(baseURI);
source.setSystemId(baseURI);
}
return build(source);
}
// Nasty hack to make sure we get the right form
// of file URLs on Windows
private static String fileURLPrefix = "file://";
static {
String os = System.getProperty("os.name", "Unix");
// I could do System.setProperty("os.name" "Windows") to test
// this, but I'd need to use a fresh ClassLoader to rerun the
// static initializer block.
if (os.indexOf("Windows") >= 0) {
fileURLPrefix = "file:/";
}
}
Reads the document from a file.
The base URI of the document is set to the
location of the file.
Params: - in – the file from which the document is read
Throws: - ValidityException – if a validity error is detected. This
is only thrown if the builder has been instructed to validate.
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a bad disk
prevents the file from being read
Returns: the parsed Document
/**
* <p>
* Reads the document from a file.
* The base URI of the document is set to the
* location of the file.
* </p>
*
* @param in the file from which the document is read
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected. This
* is only thrown if the builder has been instructed to validate.
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a bad disk
* prevents the file from being read
*/
public Document build(File in)
throws ParsingException, ValidityException, IOException {
InputStream fin = new FileInputStream(in);
// Java's toURL method doesn't properly escape file
// names so we have to do it manually
String absolute = in.getAbsolutePath();
StringBuffer url = new StringBuffer(fileURLPrefix);
int length = absolute.length();
char separatorChar = File.separatorChar;
for (int i = 0; i < length; i++) {
char c = absolute.charAt(i);
if (c == separatorChar) url.append('/');
else {
switch(c) {
case ' ':
url.append("%20");
break;
case '!':
url.append(c);
break;
case '"':
url.append("%22");
break;
case '#':
url.append("%23");
break;
case '$':
url.append(c);
break;
case '%':
url.append("%25");
break;
case '&':
// ampersand does not need to be encoded in
// path part of URL
url.append('&');
break;
case '\'':
url.append(c);
break;
case '(':
url.append(c);
break;
case ')':
url.append(c);
break;
case '*':
url.append(c);
break;
case '+':
url.append("%2B");
break;
case ',':
url.append(c);
break;
case '-':
url.append(c);
break;
case '.':
url.append(c);
break;
case '/':
url.append("%2F");
break;
case '0':
url.append(c);
break;
case '1':
url.append(c);
break;
case '2':
url.append(c);
break;
case '3':
url.append(c);
break;
case '4':
url.append(c);
break;
case '5':
url.append(c);
break;
case '6':
url.append(c);
break;
case '7':
url.append(c);
break;
case '8':
url.append(c);
break;
case '9':
url.append(c);
break;
case ':':
url.append(c);
break;
case ';':
url.append(c);
break;
case '<':
url.append("%3C");
break;
case '=':
url.append(c);
break;
case '>':
url.append("%3E");
break;
case '?':
url.append("%3F");
break;
case '@':
url.append("%40");
break;
case 'A':
url.append(c);
break;
case 'B':
url.append(c);
break;
case 'C':
url.append(c);
break;
case 'D':
url.append(c);
break;
case 'E':
url.append(c);
break;
case 'F':
url.append(c);
break;
case 'G':
url.append(c);
break;
case 'H':
url.append(c);
break;
case 'I':
url.append(c);
break;
case 'J':
url.append(c);
break;
case 'K':
url.append(c);
break;
case 'L':
url.append(c);
break;
case 'M':
url.append(c);
break;
case 'N':
url.append(c);
break;
case 'O':
url.append(c);
break;
case 'P':
url.append(c);
break;
case 'Q':
url.append(c);
break;
case 'R':
url.append(c);
break;
case 'S':
url.append(c);
break;
case 'T':
url.append(c);
break;
case 'U':
url.append(c);
break;
case 'V':
url.append(c);
break;
case 'W':
url.append(c);
break;
case 'X':
url.append(c);
break;
case 'Y':
url.append(c);
break;
case 'Z':
url.append(c);
break;
case '[':
url.append("%5B");
break;
case '\\':
url.append("%5C");
break;
case ']':
url.append("%5D");
break;
case '^':
url.append("%5E");
break;
case '_':
url.append(c);
break;
case '`':
url.append("%60");
break;
case 'a':
url.append(c);
break;
case 'b':
url.append(c);
break;
case 'c':
url.append(c);
break;
case 'd':
url.append(c);
break;
case 'e':
url.append(c);
break;
case 'f':
url.append(c);
break;
case 'g':
url.append(c);
break;
case 'h':
url.append(c);
break;
case 'i':
url.append(c);
break;
case 'j':
url.append(c);
break;
case 'k':
url.append(c);
break;
case 'l':
url.append(c);
break;
case 'm':
url.append(c);
break;
case 'n':
url.append(c);
break;
case 'o':
url.append(c);
break;
case 'p':
url.append(c);
break;
case 'q':
url.append(c);
break;
case 'r':
url.append(c);
break;
case 's':
url.append(c);
break;
case 't':
url.append(c);
break;
case 'u':
url.append(c);
break;
case 'v':
url.append(c);
break;
case 'w':
url.append(c);
break;
case 'x':
url.append(c);
break;
case 'y':
url.append(c);
break;
case 'z':
url.append(c);
break;
case '{':
url.append("%7B");
break;
case '|':
url.append("%7C");
break;
case '}':
url.append("%7D");
break;
case '~':
url.append(c);
break;
default:
if (c < 0xD800 || c > 0xDFFF) {
url.append(URIUtil.percentEscape(c));
}
else if (c <= 0xDBFF) {
// high surrogate; therefore we need to
// grab the next half before encoding
i++;
try {
char low = absolute.charAt(i);
String character = String.valueOf(c)+low;
byte[] data = character.getBytes("UTF8");
// Always exactly 4 bytes, unless the encoder is buggy
for (int j=0; j < 4; j++) {
url.append('%');
String hex = Integer.toHexString(data[j]).toUpperCase();
url.append(hex.substring(hex.length()-2));
}
}
catch (IndexOutOfBoundsException ex) {
// file name contains a high half and not a low half
url = new StringBuffer(0);
break;
}
}
else {
// low half not preceded by high half
// Can't create a base URI
url = new StringBuffer(0);
break;
}
}
}
}
String base = url.toString();
try {
Document doc = build(fin, base);
return doc;
}
finally {
fin.close();
}
}
Reads the document from a reader.
Params: - in – the reader from which the document is read
Throws: - ValidityException – if a validity error is detected. This
is only thrown if the builder has been instructed to validate.
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a bad disk
prevents the document from being fully read
Returns: the parsed Document
/**
* <p>
* Reads the document from a reader.
* </p>
*
* @param in the reader from which the document is read
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected. This
* is only thrown if the builder has been instructed to validate.
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a bad disk
* prevents the document from being fully read
*/
public Document build(Reader in)
throws ParsingException, ValidityException, IOException {
if (in == null) throw new NullPointerException("Attempted to build from null reader");
InputSource source = new InputSource(in);
return build(source);
}
Reads the document from a character stream while
specifying a base URI.
Params: - in – the reader from which the document
is read
- baseURI – the base URI for this document; may be null
Throws: - ValidityException – if a validity error is detected. This
is only thrown if the builder has been instructed to
validate.
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a bad disk
prevents the document from being completely read
Returns: the parsed Document
/**
* <p>
* Reads the document from a character stream while
* specifying a base URI.
* </p>
*
* @param in the reader from which the document
* is read
* @param baseURI the base URI for this document; may be null
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected. This
* is only thrown if the builder has been instructed to
* validate.
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a bad disk
* prevents the document from being completely read
*/
public Document build(Reader in, String baseURI)
throws ParsingException, ValidityException, IOException {
InputSource source = new InputSource(in);
if (baseURI != null) {
baseURI = canonicalizeURL(baseURI);
source.setSystemId(baseURI);
}
return build(source);
}
Reads the document from the contents of a string.
Params: - document – the string that contains the XML document
- baseURI – the base URI for this document; may be null
Throws: - ValidityException – if a validity error is detected. This
is only thrown if the builder has been instructed to
validate.
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a bad disk
prevents the document's external DTD subset from being read
Returns: the parsed Document
/**
* <p>
* Reads the document from the contents of a string.
* </p>
*
* @param document the string that contains the XML document
* @param baseURI the base URI for this document; may be null
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected. This
* is only thrown if the builder has been instructed to
* validate.
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a bad disk
* prevents the document's external DTD subset from being read
*/
public Document build(String document, String baseURI)
throws ParsingException, ValidityException, IOException {
Reader reader = new StringReader(document);
return build(reader, baseURI);
}
// needed to work around a bug in Xerces and Crimson
// for URLs with no trailing slashes (no path part)
// such as http://www.cafeconleche.org.
// Also needed to work around a VM bug involving file URLs such as
// file:///tmp/nosuchdirectory/../foo.xml
// where "nosuchdirectory" does not exist.
private String canonicalizeURL(String uri) {
try {
URL u = new URL(uri);
String path = u.getPath();
String scheme = u.getProtocol();
String authority = u.getHost();
String query = u.getQuery();
int port = u.getPort();
// fragment ID not needed
if (path == null || path.length() == 0) {
// We handle here the case where we have a URL such as
// http://www.cafeaulait.org with no trailing slash.
path = "/";
}
// If this proves to be a hot spot we could probably take this path
// only if the scheme is file; not in the more common case where
// it's http
path = URIUtil.removeDotSegments(path);
StringBuffer canonicalForm = new StringBuffer(uri.length());
canonicalForm.append(scheme);
canonicalForm.append("://");
if (authority != null) canonicalForm.append(authority);
if (port >= 0) canonicalForm.append(":" + port);
canonicalForm.append(path);
if (query != null) canonicalForm.append("?" + query);
return canonicalForm.toString();
}
catch (MalformedURLException ex) {
return uri;
}
}
Reads the document from a SAX InputSource
.
Params: - in – the input source from which the document is read
Throws: - ValidityException – if a validity error is detected. This
is only thrown if the builder has been instructed to
validate.
- ParsingException – if a well-formedness error is detected
- IOException – if an I/O error such as a bad disk
prevents the document from being read
Returns: the parsed Document
/**
* <p>
* Reads the document from a SAX <code>InputSource</code>.
* </p>
*
* @param in the input source from which the document is read
*
* @return the parsed <code>Document</code>
*
* @throws ValidityException if a validity error is detected. This
* is only thrown if the builder has been instructed to
* validate.
* @throws ParsingException if a well-formedness error is detected
* @throws IOException if an I/O error such as a bad disk
* prevents the document from being read
*/
private Document build(InputSource in)
throws ParsingException, ValidityException, IOException {
XOMHandler handler = (XOMHandler) parser.getContentHandler();
Document result = null;
try {
parser.parse(in);
result = handler.getDocument();
}
catch (SAXParseException ex) {
ParsingException pex = new ParsingException(
ex.getMessage(),
ex.getSystemId(),
ex.getLineNumber(),
ex.getColumnNumber(),
ex);
throw pex;
}
catch (SAXException ex) {
ParsingException pex
= new ParsingException(ex.getMessage(), in.getSystemId(), ex);
throw pex;
}
catch (XMLException ex) {
throw new ParsingException(ex.getMessage(), ex);
}
catch (RuntimeException ex) {
// Work-around for non-conformant parsers, especially Piccolo
ParsingException pex
= new ParsingException(ex.getMessage(), in.getSystemId(), ex);
throw pex;
}
catch (UTFDataFormatException ex) {
// Work-around for non-conformant parsers, especially Xerces
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=27583
ParsingException pex
= new ParsingException(ex.getMessage(), in.getSystemId(), ex);
throw pex;
}
catch (CharConversionException ex) {
// Work-around for non-conformant parsers, especially Xerces
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=27583
ParsingException pex
= new ParsingException(ex.getMessage(), in.getSystemId(), ex);
throw pex;
}
catch (IOException ex) {
// Work-around for Xerces; I don't want to just catch
// org.apache.xerces.util.URI.MalformedURIException
// because that would introduce a dependence on Xerces
if (ex.getClass().getName().equals(
"org.apache.xerces.util.URI$MalformedURIException")) {
throw new ParsingException(ex.getMessage(), in.getSystemId(), ex);
}
else {
throw ex;
}
}
finally {
handler.freeMemory();
}
if (result == null) {
ParsingException ex = new ParsingException(
"Parser did not build document",
in.getSystemId(), -1, -1
);
throw ex;
}
if ("".equals(result.getBaseURI())) {
result.setBaseURI(in.getSystemId());
}
ErrorHandler errorHandler = parser.getErrorHandler();
if (errorHandler instanceof ValidityRequired) {
ValidityRequired validityHandler
= (ValidityRequired) errorHandler;
if (!validityHandler.isValid()) {
ValidityException vex = validityHandler.vexception;
vex.setDocument(result);
validityHandler.reset();
throw vex;
}
}
return result;
}
private static class ValidityRequired implements ErrorHandler {
ValidityException vexception = null;
void reset() {
vexception = null;
}
public void warning(SAXParseException exception) {
// ignore warnings
}
public void error(SAXParseException exception) {
if (vexception == null) {
vexception = new ValidityException(
exception.getMessage(),
exception.getSystemId(),
exception.getLineNumber(),
exception.getColumnNumber(),
exception);
}
vexception.addError(exception);
}
public void fatalError(SAXParseException exception)
throws SAXParseException {
throw exception;
}
boolean isValid() {
return vexception == null;
}
}
// Because Crimson doesn't report namespace errors as fatal
private static class NamespaceWellformednessRequired
implements ErrorHandler {
public void warning(SAXParseException exception) {
// ignore warnings
}
public void error(SAXParseException exception)
throws SAXParseException {
if (exception.getMessage().equals("Illegal Namespace prefix: \"xml\".")) {
return;
}
throw exception;
}
public void fatalError(SAXParseException exception)
throws SAXParseException {
throw exception;
}
}
// I added this because XIncluder needed it.
Returns this builder's NodeFactory
. It returns
null if a factory was not supplied when the builder was created.
Returns: the node factory that was specified in the constructor
/**
* <p>
* Returns this builder's <code>NodeFactory</code>. It returns
* null if a factory was not supplied when the builder was created.
* </p>
*
* @return the node factory that was specified in the constructor
*/
public NodeFactory getNodeFactory() {
return factory;
}
}