/* Aalto XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.in;

import java.io.*;
import java.text.MessageFormat;

import javax.xml.stream.Location;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.impl.ErrorConsts;
import com.fasterxml.aalto.impl.IoStreamException;
import com.fasterxml.aalto.impl.LocationImpl;
import com.fasterxml.aalto.util.CharsetNames;

Class that takes care of bootstrapping main document input from a Stream input source.
/** * Class that takes care of bootstrapping main document input from * a Stream input source. */
public final class CharSourceBootstrapper extends InputBootstrapper {
Whether to use a bigger (4000, ie. 8k) or smaller (2000 -> 4k) buffer size?
/** * Whether to use a bigger (4000, ie. 8k) or smaller (2000 -> 4k) * buffer size? */
final static int DEFAULT_BUFFER_SIZE = 4000; final static char CHAR_BOM_MARKER = (char) 0xFEFF; /* /********************************************************************** /* Configuration /********************************************************************** */
Underlying Reader to use for reading content.
/** * Underlying Reader to use for reading content. */
final Reader _in; /* /********************************************************************** /* Input buffering /********************************************************************** */ final char[] _inputBuffer; private int _inputPtr;
Offset of the first character after the end of valid buffer contents.
/** * Offset of the first character after the end of valid buffer * contents. */
private int _inputLast; /* /////////////////////////////////////////////////////////////// // Life-cycle /////////////////////////////////////////////////////////////// */ private CharSourceBootstrapper(ReaderConfig cfg, Reader r) { super(cfg); _in = r; _inputBuffer = cfg.allocFullCBuffer(ReaderConfig.DEFAULT_CHAR_BUFFER_LEN); _inputLast = _inputPtr = 0; } private CharSourceBootstrapper(ReaderConfig cfg, char[] buffer, int start, int len) { super(cfg); _in = null; _inputBuffer = buffer; _inputPtr = start; _inputLast = start+len; } public static CharSourceBootstrapper construct(ReaderConfig cfg, Reader r) throws XMLStreamException { return new CharSourceBootstrapper(cfg, r); } public static CharSourceBootstrapper construct(ReaderConfig cfg, char[] buffer, int start, int len) throws XMLStreamException { return new CharSourceBootstrapper(cfg, buffer, start, len); } @Override public final XmlScanner bootstrap() throws XMLStreamException { try { return doBootstrap(); } catch (IOException ioe) { throw new IoStreamException(ioe); } finally { _config.freeSmallCBuffer(mKeyword); } } public XmlScanner doBootstrap() throws IOException, XMLStreamException { if (_inputPtr >= _inputLast) { initialLoad(7); } String normEnc = null; /* Only need 6 for signature ("<?xml\s"), but there may be a leading * BOM in there... and a valid xml declaration has to be longer * than 7 chars anyway (although, granted, shortest valid xml docl * is just 4 chars... "<a/>") */ if ((_inputLast - _inputPtr) >= 7) { char c = _inputBuffer[_inputPtr]; // BOM to skip? if (c == CHAR_BOM_MARKER) { c = _inputBuffer[++_inputPtr]; } if (c == '<') { if (_inputBuffer[_inputPtr+1] == '?' && _inputBuffer[_inputPtr+2] == 'x' && _inputBuffer[_inputPtr+3] == 'm' && _inputBuffer[_inputPtr+4] == 'l' && _inputBuffer[_inputPtr+5] <= 0x0020) { // Yup, got the declaration ok! _inputPtr += 6; // skip declaration readXmlDeclaration(); if (mFoundEncoding != null) { normEnc = verifyXmlEncoding(mFoundEncoding); } } } else { /* We may also get something that would be invalid xml * ("garbage" char; neither '<' nor space). If so, and * it's one of "well-known" cases, we can not only throw * an exception but also indicate a clue as to what is likely * to be wrong. */ /* Specifically, UTF-8 read via, say, ISO-8859-1 reader, can * "leak" marker (0xEF, 0xBB, 0xBF). While we could just eat * it, there's bound to be other problems cropping up, so let's * inform about the problem right away. */ if (c == 0xEF) { throw new IoStreamException("Unexpected first character (char code 0xEF), not valid in xml document: could be mangled UTF-8 BOM marker. Make sure that the Reader uses correct encoding or pass an InputStream instead"); } } } _config.setActualEncoding(normEnc); _config.setXmlDeclInfo(mDeclaredXmlVersion, mFoundEncoding, mStandalone); return new ReaderScanner(_config, _in, _inputBuffer, _inputPtr, _inputLast); } /* //////////////////////////////////////////////////// // Internal methods, main xml decl processing //////////////////////////////////////////////////// */
Returns:Normalized encoding name
/** * @return Normalized encoding name */
protected String verifyXmlEncoding(String enc) throws XMLStreamException { enc = CharsetNames.normalize(enc); // Probably no point in comparing at all... is there? // But we can report a possible problem? String extEnc = _config.getExternalEncoding(); if (extEnc != null && enc != null && !extEnc.equalsIgnoreCase(enc)) { XMLReporter rep = _config.getXMLReporter(); if (rep != null) { Location loc = getLocation(); rep.report(MessageFormat.format(ErrorConsts.W_MIXED_ENCODINGS, new Object[] { extEnc, enc }), ErrorConsts.WT_XML_DECL, this, loc); } } return enc; } /* ///////////////////////////////////////////////////// // Internal methods, loading input data ///////////////////////////////////////////////////// */ protected boolean initialLoad(int minimum) throws IOException { _inputPtr = 0; _inputLast = 0; if (_in == null) { // for block sources return false; } while (_inputLast < minimum) { int count = _in.read(_inputBuffer, _inputLast, _inputBuffer.length - _inputLast); if (count < 1) { return false; } _inputLast += count; } return true; } protected void loadMore() throws IOException, XMLStreamException { /* Need to make sure offsets are properly updated for error * reporting purposes, and do this now while previous amounts * are still known. */ _inputProcessed += _inputLast; _inputRowStart -= _inputLast; if (_in == null) { // for block sources reportEof(); } _inputPtr = 0; _inputLast = _in.read(_inputBuffer, 0, _inputBuffer.length); if (_inputLast < 1) { reportEof(); } } /* ///////////////////////////////////////////////////// // Implementations of abstract parsing methods ///////////////////////////////////////////////////// */ @Override protected void pushback() { --_inputPtr; } @Override protected int getNext() throws IOException, XMLStreamException { return (_inputPtr < _inputLast) ? _inputBuffer[_inputPtr++] : nextChar(); } @Override protected int getNextAfterWs(boolean reqWs) throws IOException, XMLStreamException { int count = 0; while (true) { char c = (_inputPtr < _inputLast) ? _inputBuffer[_inputPtr++] : nextChar(); if (c > CHAR_SPACE) { if (reqWs && count == 0) { reportUnexpectedChar(c, ERR_XMLDECL_EXP_SPACE); } return c; } if (c == CHAR_CR || c == CHAR_LF) { skipCRLF(c); } else if (c == CHAR_NULL) { reportNull(); } ++count; } }
Returns:First character that does not match expected, if any; CHAR_NULL if match succeeded
/** * @return First character that does not match expected, if any; * CHAR_NULL if match succeeded */
@Override protected int checkKeyword(String exp) throws IOException, XMLStreamException { int len = exp.length(); for (int ptr = 1; ptr < len; ++ptr) { char c = (_inputPtr < _inputLast) ? _inputBuffer[_inputPtr++] : nextChar(); if (c != exp.charAt(ptr)) { return c; } if (c == CHAR_NULL) { reportNull(); } } return CHAR_NULL; } @Override protected int readQuotedValue(char[] kw, int quoteChar) throws IOException, XMLStreamException { int i = 0; int len = kw.length; while (true) { char c = (_inputPtr < _inputLast) ? _inputBuffer[_inputPtr++] : nextChar(); if (c == CHAR_CR || c == CHAR_LF) { skipCRLF(c); } else if (c == CHAR_NULL) { reportNull(); } if (c == quoteChar) { return (i < len) ? i : -1; } // Let's just truncate longer values, but match quote if (i < len) { kw[i++] = c; } } } @Override protected Location getLocation() { return LocationImpl.fromZeroBased (_config.getPublicId(), _config.getSystemId(), _inputProcessed + _inputPtr, _inputRow, _inputPtr - _inputRowStart); } /* /********************************************************************** /* Internal methods, single-byte access methods /********************************************************************** */ protected char nextChar() throws IOException, XMLStreamException { if (_inputPtr >= _inputLast) { loadMore(); } return _inputBuffer[_inputPtr++]; } protected void skipCRLF(char lf) throws IOException, XMLStreamException { if (lf == '\r') { char c = (_inputPtr < _inputLast) ? _inputBuffer[_inputPtr++] : nextChar(); if (c != '\n') { --_inputPtr; // pushback if not 2-char/byte lf } } ++_inputRow; _inputRowStart = _inputPtr; } }