/*
 * Copyright (c) 2010, 2017 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package org.glassfish.grizzly.http.util;

import java.io.CharConversionException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.glassfish.grizzly.Buffer;
import org.glassfish.grizzly.Grizzly;
import static org.glassfish.grizzly.utils.Charsets.*;

Utility class that make sure an HTTP url defined inside a MessageBytes is normalized, converted and valid. It also makes sure there is no security hole. Mainly, this class can be used by doing:


HttpRequestURIDecoder.decode(decodedURI, urlDecoder, encoding, b2cConverter);
Author:Jeanfrancois Arcand
/** * Utility class that make sure an HTTP url defined inside a {@link MessageBytes} * is normalized, converted and valid. It also makes sure there is no security * hole. Mainly, this class can be used by doing: * <p><pre><code> * * HttpRequestURIDecoder.decode(decodedURI, urlDecoder, encoding, b2cConverter); * * </code></pre> * * @author Jeanfrancois Arcand */
public class HttpRequestURIDecoder { protected static final boolean ALLOW_BACKSLASH = false; private static final boolean COLLAPSE_ADJACENT_SLASHES = Boolean.valueOf(System.getProperty("com.sun.enterprise.web.collapseAdjacentSlashes", "true")); private static final Logger LOGGER = Grizzly.logger(HttpRequestURIDecoder.class);
Decode the http request represented by the bytes inside MessageBytes using an UDecoder.
Params:
  • decodedURI – - The bytes to decode
  • urlDecoder – - The urlDecoder to use to decode.
Throws:
/** * Decode the http request represented by the bytes inside {@link MessageBytes} * using an {@link UDecoder}. * @param decodedURI - The bytes to decode * @param urlDecoder - The urlDecoder to use to decode. * @throws java.lang.Exception */
public static void decode(final MessageBytes decodedURI, final UDecoder urlDecoder) throws Exception { decode(decodedURI, urlDecoder, null, null); }
Decode the HTTP request represented by the bytes inside MessageBytes using an UDecoder, using the specified encoding, using the specified [@link B2CConverter} to decode the request.
Params:
  • decodedURI – - The bytes to decode
  • urlDecoder – - The urlDecoder to use to decode.
  • encoding – the encoding value, default is UTF-8.
  • b2cConverter – the Bytes to Char Converter.
Throws:
/** * Decode the HTTP request represented by the bytes inside {@link MessageBytes} * using an {@link UDecoder}, using the specified encoding, using the specified * [@link B2CConverter} to decode the request. * @param decodedURI - The bytes to decode * @param urlDecoder - The urlDecoder to use to decode. * @param encoding the encoding value, default is UTF-8. * @param b2cConverter the Bytes to Char Converter. * @throws java.lang.Exception */
public static void decode(final MessageBytes decodedURI, final UDecoder urlDecoder, String encoding, final B2CConverter b2cConverter) throws Exception { // %xx decoding of the URL urlDecoder.convert(decodedURI, false); if (!normalize(decodedURI)) { throw new IOException("Invalid URI character encoding"); } if (encoding == null) { encoding = "utf-8"; } convertURI(decodedURI, encoding, b2cConverter); // Check that the URI is still normalized if (!checkNormalize(decodedURI.getCharChunk())) { throw new IOException("Invalid URI character encoding"); } }
Decode the HTTP request represented by the bytes inside DataChunk.
Params:
  • decodedURI – - The bytes to decode
Throws:
/** * Decode the HTTP request represented by the bytes inside {@link DataChunk}. * @param decodedURI - The bytes to decode * @throws java.io.CharConversionException */
public static void decode(final DataChunk decodedURI) throws CharConversionException { decode(decodedURI, false, UTF8_CHARSET); }
Decode the HTTP request represented by the bytes inside DataChunk.
Params:
  • decodedURI – - The bytes to decode
  • isSlashAllowed – allow encoded slashes
Throws:
/** * Decode the HTTP request represented by the bytes inside {@link DataChunk}. * @param decodedURI - The bytes to decode * @param isSlashAllowed allow encoded slashes * @throws java.io.CharConversionException */
public static void decode(final DataChunk decodedURI, final boolean isSlashAllowed) throws CharConversionException { decode(decodedURI, isSlashAllowed, UTF8_CHARSET); }
Decode the HTTP request represented by the bytes inside DataChunk.
Params:
  • decodedURI – - The bytes to decode
  • isSlashAllowed – allow encoded slashes
  • encoding – the encoding value, default is UTF-8.
Throws:
/** * Decode the HTTP request represented by the bytes inside {@link DataChunk}. * @param decodedURI - The bytes to decode * @param isSlashAllowed allow encoded slashes * @param encoding the encoding value, default is UTF-8. * @throws java.io.CharConversionException */
public static void decode(final DataChunk decodedURI, final boolean isSlashAllowed, final Charset encoding) throws CharConversionException { decode(decodedURI, decodedURI, isSlashAllowed, encoding); }
Decode the HTTP request represented by the bytes inside DataChunk.
Params:
  • originalURI – - The bytes to decode
  • targetDecodedURI – the target DataChunk URI will be decoded to
  • isSlashAllowed – is '/' an allowable character
  • encoding – the encoding value, default is UTF-8
Throws:
/** * Decode the HTTP request represented by the bytes inside {@link DataChunk}. * @param originalURI - The bytes to decode * @param targetDecodedURI the target {@link DataChunk} URI will be decoded to * @param isSlashAllowed is '/' an allowable character * @param encoding the encoding value, default is UTF-8 * @throws java.io.CharConversionException */
public static void decode(final DataChunk originalURI, final DataChunk targetDecodedURI, final boolean isSlashAllowed, final Charset encoding) throws CharConversionException { // %xx decoding of the URL URLDecoder.decode(originalURI, targetDecodedURI, isSlashAllowed); if (!normalize(targetDecodedURI)) { throw new CharConversionException("Invalid URI character encoding"); } convertToChars(targetDecodedURI, encoding); }
Converts the normalized the HTTP request represented by the bytes inside DataChunk to chars representation, using the passed encoding.
Params:
  • decodedURI – - The bytes to decode
  • encoding – the encoding value, default is UTF-8.
Throws:
/** * Converts the normalized the HTTP request represented by the bytes inside * {@link DataChunk} to chars representation, using the passed encoding. * @param decodedURI - The bytes to decode * @param encoding the encoding value, default is UTF-8. * @throws java.io.CharConversionException */
public static void convertToChars(final DataChunk decodedURI, Charset encoding) throws CharConversionException { if (encoding == null) { encoding = UTF8_CHARSET; } decodedURI.toChars(encoding); // Check that the URI is still normalized if (!checkNormalize(decodedURI.getCharChunk())) { throw new CharConversionException("Invalid URI character encoding"); } }
Convert a URI using the specified encoding, using the specified [@link B2CConverter} to decode the request.
Params:
  • uri – - The bytes to decode
  • encoding – the encoding value
  • b2cConverter – the Bytes to Char Converter.
Throws:
/** * Convert a URI using the specified encoding, using the specified * [@link B2CConverter} to decode the request. * @param uri - The bytes to decode * @param encoding the encoding value * @param b2cConverter the Bytes to Char Converter. * @throws java.lang.Exception */
private static void convertURI(final MessageBytes uri, final String encoding, B2CConverter b2cConverter) throws Exception { final ByteChunk bc = uri.getByteChunk(); final CharChunk cc = uri.getCharChunk(); cc.allocate(bc.getLength(), -1); if (encoding != null && encoding.trim().length() != 0 && !"ISO-8859-1".equalsIgnoreCase(encoding)) { try { if (b2cConverter == null) { b2cConverter = new B2CConverter(encoding); } } catch (IOException e) { // Ignore LOGGER.severe("Invalid URI encoding; using HTTP default"); } if (b2cConverter != null) { try { b2cConverter.convert(bc, cc); uri.setChars(cc.getBuffer(), cc.getStart(), cc.getLength()); return; } catch (IOException e) { LOGGER.severe("Invalid URI character encoding; trying ascii"); cc.recycle(); } } } // Default encoding: fast conversion final byte[] bbuf = bc.getBuffer(); final char[] cbuf = cc.getBuffer(); int start = bc.getStart(); for (int i = 0; i < bc.getLength(); i++) { cbuf[i] = (char) (bbuf[i + start] & 0xff); } uri.setChars(cbuf, 0, bc.getLength()); }
Normalize URI.

This method normalizes "\", "//", "/./" and "/../". This method will return false when trying to go above the root, or if the URI contains a null byte.

Params:
  • uriMB – URI to be normalized
Returns:true if normalization was successful, or false otherwise
/** * Normalize URI. * <p> * This method normalizes "\", "//", "/./" and "/../". This method will * return false when trying to go above the root, or if the URI contains * a null byte. * * @param uriMB URI to be normalized * @return <tt>true</tt> if normalization was successful, or <tt>false</tt> otherwise */
public static boolean normalize(MessageBytes uriMB) { int type = uriMB.getType(); if (type == MessageBytes.T_CHARS) { return normalizeChars(uriMB.getCharChunk()); } else { return normalizeBytes(uriMB.getByteChunk()); } }
Normalize URI.

This method normalizes "\", "//", "/./" and "/../". This method will return false when trying to go above the root, or if the URI contains a null byte.

Params:
  • dataChunk – URI to be normalized
Returns:true if normalization was successful, or false otherwise
/** * Normalize URI. * <p> * This method normalizes "\", "//", "/./" and "/../". This method will * return false when trying to go above the root, or if the URI contains * a null byte. * * @param dataChunk URI to be normalized * @return <tt>true</tt> if normalization was successful, or <tt>false</tt> otherwise */
public static boolean normalize(final DataChunk dataChunk) { switch (dataChunk.getType()) { case Bytes: return normalizeBytes(dataChunk.getByteChunk()); case Buffer: return normalizeBuffer(dataChunk.getBufferChunk()); case String: try { dataChunk.toChars(null); } catch (CharConversionException unexpected) { // should never occur throw new IllegalStateException("Unexpected exception", unexpected); } // pass to Chars case case Chars: return normalizeChars(dataChunk.getCharChunk()); default: throw new NullPointerException(); } }
Check that the URI is normalized following character decoding.

This method checks for "\", 0, "//", "/./" and "/../". This method will return false if sequences that are supposed to be normalized are still present in the URI.

Params:
  • uriCC – URI to be checked (should be chars)
Returns:true if the uriCC represents a normalized URI, or false otherwise
/** * Check that the URI is normalized following character decoding. * <p> * This method checks for "\", 0, "//", "/./" and "/../". This method will * return false if sequences that are supposed to be normalized are still * present in the URI. * * @param uriCC URI to be checked (should be chars) * @return <tt>true</tt> if the uriCC represents a normalized URI, or <tt>false</tt> otherwise */
public static boolean checkNormalize(final CharChunk uriCC) { char[] c = uriCC.getChars(); int start = uriCC.getStart(); int end = uriCC.getEnd(); int pos; // Check for '\' and 0 for (pos = start; pos < end; pos++) { if (c[pos] == '\\') { return false; } if (c[pos] == 0) { return false; } } if (COLLAPSE_ADJACENT_SLASHES) { // Check for "//" for (pos = start; pos < (end - 1); pos++) { if (c[pos] == '/') { if (c[pos + 1] == '/') { return false; } } } } // Check for ending with "/." or "/.." if (((end - start) >= 2) && (c[end - 1] == '.')) { if ((c[end - 2] == '/') || ((c[end - 2] == '.') && (c[end - 3] == '/'))) { return false; } } // Check for "/./" return uriCC.indexOf("/./", 0, 3, 0) < 0; } public static boolean normalizeChars(final CharChunk uriCC) { char[] c = uriCC.getChars(); int start = uriCC.getStart(); int end = uriCC.getEnd(); // URL * is acceptable if ((end - start == 1) && c[start] == '*') { return true; } int pos; int index; // Replace '\' with '/' // Check for null char for (pos = start; pos < end; pos++) { if (c[pos] == '\\') { if (ALLOW_BACKSLASH) { c[pos] = '/'; } else { return false; } } if (c[pos] == (char) 0) { return false; } } // The URL must start with '/' if (c[start] != '/') { return false; } // Replace "//" with "/" if (COLLAPSE_ADJACENT_SLASHES) { for (pos = start; pos < (end - 1); pos++) { if (c[pos] == '/') { while ((pos + 1 < end) && (c[pos + 1] == '/')) { copyChars(c, pos, pos + 1, end - pos - 1); end--; } } } } // If the URI ends with "/." or "/..", then we append an extra "/" // Note: It is possible to extend the URI by 1 without any side effect // as the next character is a non-significant WS. if (((end - start) > 2) && (c[end - 1] == '.')) { if ((c[end - 2] == '/') || ((c[end - 2] == '.') && (c[end - 3] == '/'))) { c[end] = '/'; end++; } } uriCC.setEnd(end); index = 0; // Resolve occurrences of "/./" in the normalized path while (true) { index = uriCC.indexOf("/./", 0, 3, index); if (index < 0) { break; } copyChars(c, start + index, start + index + 2, end - start - index - 2); end = end - 2; uriCC.setEnd(end); } index = 0; // Resolve occurrences of "/../" in the normalized path while (true) { index = uriCC.indexOf("/../", 0, 4, index); if (index < 0) { break; } // Prevent from going outside our context if (index == 0) { return false; } int index2 = -1; for (pos = start + index - 1; (pos >= 0) && (index2 < 0); pos--) { if (c[pos] == '/') { index2 = pos; } } copyChars(c, start + index2, start + index + 3, end - start - index - 3); end = end + index2 - index - 3; uriCC.setEnd(end); index = index2; } uriCC.setChars(c, start, end); return true; } // ------------------------------------------------------ Protected Methods
Copy an array of bytes to a different position. Used during normalization.
/** * Copy an array of bytes to a different position. Used during * normalization. */
protected static void copyBytes(byte[] b, int dest, int src, int len) { System.arraycopy(b, src, b, dest, len); }
Copy an array of chars to a different position. Used during normalization.
/** * Copy an array of chars to a different position. Used during * normalization. */
private static void copyChars(char[] c, int dest, int src, int len) { System.arraycopy(c, src, c, dest, len); }
Log a message on the Logger associated with our Container (if any)
Params:
  • message – Message to be logged
/** * Log a message on the Logger associated with our Container (if any) * * @param message Message to be logged */
protected void log(String message) { LOGGER.info(message); }
Log a message on the Logger associated with our Container (if any)
Params:
  • message – Message to be logged
  • throwable – Associated exception
/** * Log a message on the Logger associated with our Container (if any) * * @param message Message to be logged * @param throwable Associated exception */
protected void log(String message, Throwable throwable) { LOGGER.log(Level.SEVERE, message, throwable); }
Character conversion of the a US-ASCII MessageBytes.
/** * Character conversion of the a US-ASCII MessageBytes. */
protected void convertMB(MessageBytes mb) { // This is of course only meaningful for bytes if (mb.getType() != MessageBytes.T_BYTES) { return; } ByteChunk bc = mb.getByteChunk(); CharChunk cc = mb.getCharChunk(); cc.allocate(bc.getLength(), -1); // Default encoding: fast conversion byte[] bbuf = bc.getBuffer(); char[] cbuf = cc.getBuffer(); int start = bc.getStart(); for (int i = 0; i < bc.getLength(); i++) { cbuf[i] = (char) (bbuf[i + start] & 0xff); } mb.setChars(cbuf, 0, bc.getLength()); } private static final int STATE_CHAR = 0; private static final int STATE_SLASH = 1; private static final int STATE_PERCENT = 2; private static final int STATE_SLASHDOT = 3; private static final int STATE_SLASHDOTDOT = 4; public static boolean normalizeBytes(final ByteChunk bc) { byte[] bs = bc.getBytes(); int start = bc.getStart(); int end = bc.getEnd(); // An empty URL is not acceptable if (start == end) { return false; } // URL * is acceptable if ((end - start == 1) && bs[start] == (byte) '*') { return true; } // If the URI ends with "/." or "/..", then we append an extra "/" // Note: It is possible to extend the URI by 1 without any side effect // as the next character is a non-significant WS. if (((end - start) > 2) && (bs[end - 1] == (byte) '.')) { if ((bs[end - 2] == (byte) '/') || ((bs[end - 2] == (byte) '.') && (bs[end - 3] == (byte) '/'))) { bs[end] = (byte) '/'; end++; } } int state = STATE_CHAR; int srcPos = start; int lastSlash = -1; int parentSlash = -1; for (int pos = start; pos < end; pos++) { if (bs[pos] == (byte) 0) { return false; } if (bs[pos] == (byte) '\\') { if (ALLOW_BACKSLASH) { bs[pos] = (byte) '/'; } else { return false; } } if (bs[pos] == '/') { if (state == STATE_CHAR) { state = STATE_SLASH; bs[srcPos] = bs[pos]; parentSlash = lastSlash; lastSlash = srcPos; srcPos++; } else if (state == STATE_SLASH) { // This is '//'. Ignore if COLLAPSE_ADJACENT_SLASHES is true. // What is the behavior for '/../' patterns if collapse is false. // Ignoring for now. if (!COLLAPSE_ADJACENT_SLASHES) { srcPos++; } } else if (state == STATE_SLASHDOT) { // This is '/./' ==> move the srcPos one position back srcPos--; } else if (state == STATE_SLASHDOTDOT) { // This is '/../' ==> search backward to reset lastSlash and parentSlash if (parentSlash == -1) { // This is an error // System.out.print("Incorrect URI"); return false; } else { lastSlash = parentSlash; srcPos = parentSlash; // Find the parentSlash parentSlash = -1; for (int i = lastSlash - 1; i >= start; i--) { if (bs[i] == '/') { parentSlash = i; break; } } } state = STATE_SLASH; bs[srcPos++] = bs[pos]; } } else if (bs[pos] == '.') { if (state == STATE_CHAR) { bs[srcPos++] = bs[pos]; } else if (state == STATE_SLASH) { state = STATE_SLASHDOT; bs[srcPos++] = bs[pos]; } else if (state == STATE_SLASHDOT) { state = STATE_SLASHDOTDOT; bs[srcPos++] = bs[pos]; } } else { state = STATE_CHAR; bs[srcPos++] = bs[pos]; } } bc.setEnd(srcPos); return true; } public static boolean normalizeBuffer(final BufferChunk bc) { final Buffer bs = bc.getBuffer(); final int start = bc.getStart(); int end = bc.getEnd(); // An empty URL is not acceptable if (start == end) { return false; } // URL * is acceptable if ((end - start == 1) && bs.get(start) == (byte) '*') { return true; } // If the URI ends with "/." or "/..", then we append an extra "/" // Note: It is possible to extend the URI by 1 without any side effect // as the next character is a non-significant WS. if (((end - start) > 2) && (bs.get(end - 1) == (byte) '.')) { final byte b = bs.get(end - 2); if (b == (byte) '/' || (b == (byte) '.' && bs.get(end - 3) == (byte) '/')) { bs.put(end, (byte) '/'); end++; } } int state = STATE_CHAR; int srcPos = start; int lastSlash = -1; int parentSlash = -1; for (int pos = start; pos < end; pos++) { final byte b = bs.get(pos); if (b == (byte) 0) { return false; } if (b == (byte) '\\') { if (ALLOW_BACKSLASH) { bs.put(pos, (byte) '/'); } else { return false; } } if (b == '/') { if (state == STATE_CHAR) { state = STATE_SLASH; bs.put(srcPos, b); parentSlash = lastSlash; lastSlash = srcPos; srcPos++; } else if (state == STATE_SLASH) { // This is '//'. Ignore if COLLAPSE_ADJACENT_SLASHES is true. // What is the behavior for '/../' patterns if collapse is false. // Ignoring for now. if (!COLLAPSE_ADJACENT_SLASHES) { srcPos++; } } else if (state == STATE_SLASHDOT) { // This is '/./' ==> move the srcPos one position back srcPos--; } else if (state == STATE_SLASHDOTDOT) { // This is '/../' ==> search backward to reset lastSlash and parentSlash if (parentSlash == -1) { // This is an error // System.out.print("Incorrect URI"); return false; } else { lastSlash = parentSlash; srcPos = parentSlash; // Find the parentSlash parentSlash = -1; for (int i = lastSlash - 1; i >= start; i--) { if (bs.get(i) == '/') { parentSlash = i; break; } } } state = STATE_SLASH; bs.put(srcPos++, b); } } else if (b == '.') { if (state == STATE_CHAR) { bs.put(srcPos++, b); } else if (state == STATE_SLASH) { state = STATE_SLASHDOT; bs.put(srcPos++, b); } else if (state == STATE_SLASHDOT) { state = STATE_SLASHDOTDOT; bs.put(srcPos++, b); } } else { state = STATE_CHAR; bs.put(srcPos++, b); } } bc.setEnd(srcPos); return true; } }