/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.tomcat.util.buf;

import java.io.ByteArrayOutputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

import org.apache.tomcat.util.res.StringManager;

All URL decoding happens here. This way we can reuse, review, optimize without adding complexity to the buffers. The conversion will modify the original buffer. @author Costin Manolache
/** * All URL decoding happens here. This way we can reuse, review, optimize * without adding complexity to the buffers. * * The conversion will modify the original buffer. * * @author Costin Manolache */
public final class UDecoder { private static final StringManager sm = StringManager.getManager(UDecoder.class); public static final boolean ALLOW_ENCODED_SLASH = Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false")); private static class DecodeException extends CharConversionException { private static final long serialVersionUID = 1L; public DecodeException(String s) { super(s); } @Override public synchronized Throwable fillInStackTrace() { // This class does not provide a stack trace return this; } }
Unexpected end of data.
/** Unexpected end of data. */
private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));
%xx with not-hex digit
/** %xx with not-hex digit */
private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException( "isHexDigit");
%-encoded slash is forbidden in resource path
/** %-encoded slash is forbidden in resource path */
private static final IOException EXCEPTION_SLASH = new DecodeException( "noSlash"); public UDecoder() { }
URLDecode, will modify the source.
Params:
  • mb – The URL encoded bytes
  • query – true if this is a query string
Throws:
/** * URLDecode, will modify the source. * @param mb The URL encoded bytes * @param query <code>true</code> if this is a query string * @throws IOException Invalid %xx URL encoding */
public void convert( ByteChunk mb, boolean query ) throws IOException { int start=mb.getOffset(); byte buff[]=mb.getBytes(); int end=mb.getEnd(); int idx= ByteChunk.findByte( buff, start, end, (byte) '%' ); int idx2=-1; if( query ) { idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' ); } if( idx<0 && idx2<0 ) { return; } // idx will be the smallest positive index ( first % or + ) if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) { idx=idx2; } final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); for( int j=idx; j<end; j++, idx++ ) { if( buff[ j ] == '+' && query) { buff[idx]= (byte)' ' ; } else if( buff[ j ] != '%' ) { buff[idx]= buff[j]; } else { // read next 2 digits if( j+2 >= end ) { throw EXCEPTION_EOF; } byte b1= buff[j+1]; byte b2=buff[j+2]; if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) { throw EXCEPTION_NOT_HEX_DIGIT; } j+=2; int res=x2c( b1, b2 ); if (noSlash && (res == '/')) { throw EXCEPTION_SLASH; } buff[idx]=(byte)res; } } mb.setEnd( idx ); } // -------------------- Additional methods -------------------- // XXX What do we do about charset ????
In-buffer processing - the buffer will be modified.
Params:
  • mb – The URL encoded chars
  • query – true if this is a query string
Throws:
/** * In-buffer processing - the buffer will be modified. * @param mb The URL encoded chars * @param query <code>true</code> if this is a query string * @throws IOException Invalid %xx URL encoding */
public void convert( CharChunk mb, boolean query ) throws IOException { // log( "Converting a char chunk "); int start=mb.getOffset(); char buff[]=mb.getBuffer(); int cend=mb.getEnd(); int idx= CharChunk.indexOf( buff, start, cend, '%' ); int idx2=-1; if( query ) { idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' ); } if( idx<0 && idx2<0 ) { return; } // idx will be the smallest positive index ( first % or + ) if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) { idx=idx2; } final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); for( int j=idx; j<cend; j++, idx++ ) { if( buff[ j ] == '+' && query ) { buff[idx]=( ' ' ); } else if( buff[ j ] != '%' ) { buff[idx]=buff[j]; } else { // read next 2 digits if( j+2 >= cend ) { // invalid throw EXCEPTION_EOF; } char b1= buff[j+1]; char b2=buff[j+2]; if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) { throw EXCEPTION_NOT_HEX_DIGIT; } j+=2; int res=x2c( b1, b2 ); if (noSlash && (res == '/')) { throw EXCEPTION_SLASH; } buff[idx]=(char)res; } } mb.setEnd( idx ); }
URLDecode, will modify the source
Params:
  • mb – The URL encoded String, bytes or chars
  • query – true if this is a query string
Throws:
/** * URLDecode, will modify the source * @param mb The URL encoded String, bytes or chars * @param query <code>true</code> if this is a query string * @throws IOException Invalid %xx URL encoding */
public void convert(MessageBytes mb, boolean query) throws IOException { switch (mb.getType()) { case MessageBytes.T_STR: String strValue=mb.toString(); if( strValue==null ) { return; } try { mb.setString( convert( strValue, query )); } catch (RuntimeException ex) { throw new DecodeException(ex.getMessage()); } break; case MessageBytes.T_CHARS: CharChunk charC=mb.getCharChunk(); convert( charC, query ); break; case MessageBytes.T_BYTES: ByteChunk bytesC=mb.getByteChunk(); convert( bytesC, query ); break; } }
%xx decoding of a string. FIXME: this is inefficient.
Params:
  • str – The URL encoded string
  • query – true if this is a query string
Returns:the decoded string
/** * %xx decoding of a string. FIXME: this is inefficient. * @param str The URL encoded string * @param query <code>true</code> if this is a query string * @return the decoded string */
public final String convert(String str, boolean query) { if (str == null) { return null; } if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) { return str; } final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); StringBuilder dec = new StringBuilder(); // decoded string output int strPos = 0; int strLen = str.length(); dec.ensureCapacity(str.length()); while (strPos < strLen) { int laPos; // lookahead position // look ahead to next URLencoded metacharacter, if any for (laPos = strPos; laPos < strLen; laPos++) { char laChar = str.charAt(laPos); if ((laChar == '+' && query) || (laChar == '%')) { break; } } // if there were non-metacharacters, copy them all as a block if (laPos > strPos) { dec.append(str.substring(strPos,laPos)); strPos = laPos; } // shortcut out of here if we're at the end of the string if (strPos >= strLen) { break; } // process next metacharacter char metaChar = str.charAt(strPos); if (metaChar == '+') { dec.append(' '); strPos++; continue; } else if (metaChar == '%') { // We throw the original exception - the super will deal with // it // try { char res = (char) Integer.parseInt( str.substring(strPos + 1, strPos + 3), 16); if (noSlash && (res == '/')) { throw new IllegalArgumentException(sm.getString("uDecoder.noSlash")); } dec.append(res); strPos += 3; } } return dec.toString(); }
Decode and return the specified URL-encoded String. When the byte array is converted to a string, UTF-8 is used. This may be different than some other servers. It is assumed the string is not a query string.
Params:
  • str – The url-encoded string
Throws:
Returns:the decoded string
/** * Decode and return the specified URL-encoded String. * When the byte array is converted to a string, UTF-8 is used. This may * be different than some other servers. It is assumed the string is not a * query string. * * @param str The url-encoded string * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number */
public static String URLDecode(String str) { return URLDecode(str, StandardCharsets.UTF_8); }
Decode and return the specified URL-encoded String. It is assumed the string is not a query string.
Params:
  • str – The url-encoded string
  • charset – The character encoding to use; if null, UTF-8 is used.
Throws:
Returns:the decoded string
/** * Decode and return the specified URL-encoded String. It is assumed the * string is not a query string. * * @param str The url-encoded string * @param charset The character encoding to use; if null, UTF-8 is used. * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number */
public static String URLDecode(String str, Charset charset) { if (str == null) { return null; } if (str.indexOf('%') == -1) { // No %nn sequences, so return string unchanged return str; } if (charset == null) { charset = StandardCharsets.UTF_8; } /* * Decoding is required. * * Potential complications: * - The source String may be partially decoded so it is not valid to * assume that the source String is ASCII. * - Have to process as characters since there is no guarantee that the * byte sequence for '%' is going to be the same in all character * sets. * - We don't know how many '%nn' sequences are required for a single * character. It varies between character sets and some use a variable * length. */ // This isn't perfect but it is a reasonable guess for the size of the // array required ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2); OutputStreamWriter osw = new OutputStreamWriter(baos, charset); char[] sourceChars = str.toCharArray(); int len = sourceChars.length; int ix = 0; try { while (ix < len) { char c = sourceChars[ix++]; if (c == '%') { osw.flush(); if (ix + 2 > len) { throw new IllegalArgumentException( sm.getString("uDecoder.urlDecode.missingDigit", str)); } char c1 = sourceChars[ix++]; char c2 = sourceChars[ix++]; if (isHexDigit(c1) && isHexDigit(c2)) { baos.write(x2c(c1, c2)); } else { throw new IllegalArgumentException( sm.getString("uDecoder.urlDecode.missingDigit", str)); } } else { osw.append(c); } } osw.flush(); return baos.toString(charset.name()); } catch (IOException ioe) { throw new IllegalArgumentException( sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe); } } private static boolean isHexDigit( int c ) { return ( ( c>='0' && c<='9' ) || ( c>='a' && c<='f' ) || ( c>='A' && c<='F' )); } private static int x2c( byte b1, byte b2 ) { int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 : (b1 -'0'); digit*=16; digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 : (b2 -'0'); return digit; } private static int x2c( char b1, char b2 ) { int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 : (b1 -'0'); digit*=16; digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 : (b2 -'0'); return digit; } }