/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.coyote.http2;

import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.apache.tomcat.util.res.StringManager;

public class HPackHuffman {

    protected static final StringManager sm = StringManager.getManager(HPackHuffman.class);

    private static final HuffmanCode[] HUFFMAN_CODES;

    
array based tree representation of a huffman code.

the high two bytes corresponds to the tree node if the bit is set, and the low two bytes for if it is clear if the high bit is set it is a terminal node, otherwise it contains the next node position.
/** * array based tree representation of a huffman code. * <p/> * the high two bytes corresponds to the tree node if the bit is set, and the low two bytes for if it is clear * if the high bit is set it is a terminal node, otherwise it contains the next node position. */
private static final int[] DECODING_TABLE; private static final int LOW_TERMINAL_BIT = (0b10000000) << 8; private static final int HIGH_TERMINAL_BIT = (0b10000000) << 24; private static final int LOW_MASK = 0b0111111111111111; static { HuffmanCode[] codes = new HuffmanCode[257]; codes[0] = new HuffmanCode(0x1ff8, 13); codes[1] = new HuffmanCode(0x7fffd8, 23); codes[2] = new HuffmanCode(0xfffffe2, 28); codes[3] = new HuffmanCode(0xfffffe3, 28); codes[4] = new HuffmanCode(0xfffffe4, 28); codes[5] = new HuffmanCode(0xfffffe5, 28); codes[6] = new HuffmanCode(0xfffffe6, 28); codes[7] = new HuffmanCode(0xfffffe7, 28); codes[8] = new HuffmanCode(0xfffffe8, 28); codes[9] = new HuffmanCode(0xffffea, 24); codes[10] = new HuffmanCode(0x3ffffffc, 30); codes[11] = new HuffmanCode(0xfffffe9, 28); codes[12] = new HuffmanCode(0xfffffea, 28); codes[13] = new HuffmanCode(0x3ffffffd, 30); codes[14] = new HuffmanCode(0xfffffeb, 28); codes[15] = new HuffmanCode(0xfffffec, 28); codes[16] = new HuffmanCode(0xfffffed, 28); codes[17] = new HuffmanCode(0xfffffee, 28); codes[18] = new HuffmanCode(0xfffffef, 28); codes[19] = new HuffmanCode(0xffffff0, 28); codes[20] = new HuffmanCode(0xffffff1, 28); codes[21] = new HuffmanCode(0xffffff2, 28); codes[22] = new HuffmanCode(0x3ffffffe, 30); codes[23] = new HuffmanCode(0xffffff3, 28); codes[24] = new HuffmanCode(0xffffff4, 28); codes[25] = new HuffmanCode(0xffffff5, 28); codes[26] = new HuffmanCode(0xffffff6, 28); codes[27] = new HuffmanCode(0xffffff7, 28); codes[28] = new HuffmanCode(0xffffff8, 28); codes[29] = new HuffmanCode(0xffffff9, 28); codes[30] = new HuffmanCode(0xffffffa, 28); codes[31] = new HuffmanCode(0xffffffb, 28); codes[32] = new HuffmanCode(0x14, 6); codes[33] = new HuffmanCode(0x3f8, 10); codes[34] = new HuffmanCode(0x3f9, 10); codes[35] = new HuffmanCode(0xffa, 12); codes[36] = new HuffmanCode(0x1ff9, 13); codes[37] = new HuffmanCode(0x15, 6); codes[38] = new HuffmanCode(0xf8, 8); codes[39] = new HuffmanCode(0x7fa, 11); codes[40] = new HuffmanCode(0x3fa, 10); codes[41] = new HuffmanCode(0x3fb, 10); codes[42] = new HuffmanCode(0xf9, 8); codes[43] = new HuffmanCode(0x7fb, 11); codes[44] = new HuffmanCode(0xfa, 8); codes[45] = new HuffmanCode(0x16, 6); codes[46] = new HuffmanCode(0x17, 6); codes[47] = new HuffmanCode(0x18, 6); codes[48] = new HuffmanCode(0x0, 5); codes[49] = new HuffmanCode(0x1, 5); codes[50] = new HuffmanCode(0x2, 5); codes[51] = new HuffmanCode(0x19, 6); codes[52] = new HuffmanCode(0x1a, 6); codes[53] = new HuffmanCode(0x1b, 6); codes[54] = new HuffmanCode(0x1c, 6); codes[55] = new HuffmanCode(0x1d, 6); codes[56] = new HuffmanCode(0x1e, 6); codes[57] = new HuffmanCode(0x1f, 6); codes[58] = new HuffmanCode(0x5c, 7); codes[59] = new HuffmanCode(0xfb, 8); codes[60] = new HuffmanCode(0x7ffc, 15); codes[61] = new HuffmanCode(0x20, 6); codes[62] = new HuffmanCode(0xffb, 12); codes[63] = new HuffmanCode(0x3fc, 10); codes[64] = new HuffmanCode(0x1ffa, 13); codes[65] = new HuffmanCode(0x21, 6); codes[66] = new HuffmanCode(0x5d, 7); codes[67] = new HuffmanCode(0x5e, 7); codes[68] = new HuffmanCode(0x5f, 7); codes[69] = new HuffmanCode(0x60, 7); codes[70] = new HuffmanCode(0x61, 7); codes[71] = new HuffmanCode(0x62, 7); codes[72] = new HuffmanCode(0x63, 7); codes[73] = new HuffmanCode(0x64, 7); codes[74] = new HuffmanCode(0x65, 7); codes[75] = new HuffmanCode(0x66, 7); codes[76] = new HuffmanCode(0x67, 7); codes[77] = new HuffmanCode(0x68, 7); codes[78] = new HuffmanCode(0x69, 7); codes[79] = new HuffmanCode(0x6a, 7); codes[80] = new HuffmanCode(0x6b, 7); codes[81] = new HuffmanCode(0x6c, 7); codes[82] = new HuffmanCode(0x6d, 7); codes[83] = new HuffmanCode(0x6e, 7); codes[84] = new HuffmanCode(0x6f, 7); codes[85] = new HuffmanCode(0x70, 7); codes[86] = new HuffmanCode(0x71, 7); codes[87] = new HuffmanCode(0x72, 7); codes[88] = new HuffmanCode(0xfc, 8); codes[89] = new HuffmanCode(0x73, 7); codes[90] = new HuffmanCode(0xfd, 8); codes[91] = new HuffmanCode(0x1ffb, 13); codes[92] = new HuffmanCode(0x7fff0, 19); codes[93] = new HuffmanCode(0x1ffc, 13); codes[94] = new HuffmanCode(0x3ffc, 14); codes[95] = new HuffmanCode(0x22, 6); codes[96] = new HuffmanCode(0x7ffd, 15); codes[97] = new HuffmanCode(0x3, 5); codes[98] = new HuffmanCode(0x23, 6); codes[99] = new HuffmanCode(0x4, 5); codes[100] = new HuffmanCode(0x24, 6); codes[101] = new HuffmanCode(0x5, 5); codes[102] = new HuffmanCode(0x25, 6); codes[103] = new HuffmanCode(0x26, 6); codes[104] = new HuffmanCode(0x27, 6); codes[105] = new HuffmanCode(0x6, 5); codes[106] = new HuffmanCode(0x74, 7); codes[107] = new HuffmanCode(0x75, 7); codes[108] = new HuffmanCode(0x28, 6); codes[109] = new HuffmanCode(0x29, 6); codes[110] = new HuffmanCode(0x2a, 6); codes[111] = new HuffmanCode(0x7, 5); codes[112] = new HuffmanCode(0x2b, 6); codes[113] = new HuffmanCode(0x76, 7); codes[114] = new HuffmanCode(0x2c, 6); codes[115] = new HuffmanCode(0x8, 5); codes[116] = new HuffmanCode(0x9, 5); codes[117] = new HuffmanCode(0x2d, 6); codes[118] = new HuffmanCode(0x77, 7); codes[119] = new HuffmanCode(0x78, 7); codes[120] = new HuffmanCode(0x79, 7); codes[121] = new HuffmanCode(0x7a, 7); codes[122] = new HuffmanCode(0x7b, 7); codes[123] = new HuffmanCode(0x7ffe, 15); codes[124] = new HuffmanCode(0x7fc, 11); codes[125] = new HuffmanCode(0x3ffd, 14); codes[126] = new HuffmanCode(0x1ffd, 13); codes[127] = new HuffmanCode(0xffffffc, 28); codes[128] = new HuffmanCode(0xfffe6, 20); codes[129] = new HuffmanCode(0x3fffd2, 22); codes[130] = new HuffmanCode(0xfffe7, 20); codes[131] = new HuffmanCode(0xfffe8, 20); codes[132] = new HuffmanCode(0x3fffd3, 22); codes[133] = new HuffmanCode(0x3fffd4, 22); codes[134] = new HuffmanCode(0x3fffd5, 22); codes[135] = new HuffmanCode(0x7fffd9, 23); codes[136] = new HuffmanCode(0x3fffd6, 22); codes[137] = new HuffmanCode(0x7fffda, 23); codes[138] = new HuffmanCode(0x7fffdb, 23); codes[139] = new HuffmanCode(0x7fffdc, 23); codes[140] = new HuffmanCode(0x7fffdd, 23); codes[141] = new HuffmanCode(0x7fffde, 23); codes[142] = new HuffmanCode(0xffffeb, 24); codes[143] = new HuffmanCode(0x7fffdf, 23); codes[144] = new HuffmanCode(0xffffec, 24); codes[145] = new HuffmanCode(0xffffed, 24); codes[146] = new HuffmanCode(0x3fffd7, 22); codes[147] = new HuffmanCode(0x7fffe0, 23); codes[148] = new HuffmanCode(0xffffee, 24); codes[149] = new HuffmanCode(0x7fffe1, 23); codes[150] = new HuffmanCode(0x7fffe2, 23); codes[151] = new HuffmanCode(0x7fffe3, 23); codes[152] = new HuffmanCode(0x7fffe4, 23); codes[153] = new HuffmanCode(0x1fffdc, 21); codes[154] = new HuffmanCode(0x3fffd8, 22); codes[155] = new HuffmanCode(0x7fffe5, 23); codes[156] = new HuffmanCode(0x3fffd9, 22); codes[157] = new HuffmanCode(0x7fffe6, 23); codes[158] = new HuffmanCode(0x7fffe7, 23); codes[159] = new HuffmanCode(0xffffef, 24); codes[160] = new HuffmanCode(0x3fffda, 22); codes[161] = new HuffmanCode(0x1fffdd, 21); codes[162] = new HuffmanCode(0xfffe9, 20); codes[163] = new HuffmanCode(0x3fffdb, 22); codes[164] = new HuffmanCode(0x3fffdc, 22); codes[165] = new HuffmanCode(0x7fffe8, 23); codes[166] = new HuffmanCode(0x7fffe9, 23); codes[167] = new HuffmanCode(0x1fffde, 21); codes[168] = new HuffmanCode(0x7fffea, 23); codes[169] = new HuffmanCode(0x3fffdd, 22); codes[170] = new HuffmanCode(0x3fffde, 22); codes[171] = new HuffmanCode(0xfffff0, 24); codes[172] = new HuffmanCode(0x1fffdf, 21); codes[173] = new HuffmanCode(0x3fffdf, 22); codes[174] = new HuffmanCode(0x7fffeb, 23); codes[175] = new HuffmanCode(0x7fffec, 23); codes[176] = new HuffmanCode(0x1fffe0, 21); codes[177] = new HuffmanCode(0x1fffe1, 21); codes[178] = new HuffmanCode(0x3fffe0, 22); codes[179] = new HuffmanCode(0x1fffe2, 21); codes[180] = new HuffmanCode(0x7fffed, 23); codes[181] = new HuffmanCode(0x3fffe1, 22); codes[182] = new HuffmanCode(0x7fffee, 23); codes[183] = new HuffmanCode(0x7fffef, 23); codes[184] = new HuffmanCode(0xfffea, 20); codes[185] = new HuffmanCode(0x3fffe2, 22); codes[186] = new HuffmanCode(0x3fffe3, 22); codes[187] = new HuffmanCode(0x3fffe4, 22); codes[188] = new HuffmanCode(0x7ffff0, 23); codes[189] = new HuffmanCode(0x3fffe5, 22); codes[190] = new HuffmanCode(0x3fffe6, 22); codes[191] = new HuffmanCode(0x7ffff1, 23); codes[192] = new HuffmanCode(0x3ffffe0, 26); codes[193] = new HuffmanCode(0x3ffffe1, 26); codes[194] = new HuffmanCode(0xfffeb, 20); codes[195] = new HuffmanCode(0x7fff1, 19); codes[196] = new HuffmanCode(0x3fffe7, 22); codes[197] = new HuffmanCode(0x7ffff2, 23); codes[198] = new HuffmanCode(0x3fffe8, 22); codes[199] = new HuffmanCode(0x1ffffec, 25); codes[200] = new HuffmanCode(0x3ffffe2, 26); codes[201] = new HuffmanCode(0x3ffffe3, 26); codes[202] = new HuffmanCode(0x3ffffe4, 26); codes[203] = new HuffmanCode(0x7ffffde, 27); codes[204] = new HuffmanCode(0x7ffffdf, 27); codes[205] = new HuffmanCode(0x3ffffe5, 26); codes[206] = new HuffmanCode(0xfffff1, 24); codes[207] = new HuffmanCode(0x1ffffed, 25); codes[208] = new HuffmanCode(0x7fff2, 19); codes[209] = new HuffmanCode(0x1fffe3, 21); codes[210] = new HuffmanCode(0x3ffffe6, 26); codes[211] = new HuffmanCode(0x7ffffe0, 27); codes[212] = new HuffmanCode(0x7ffffe1, 27); codes[213] = new HuffmanCode(0x3ffffe7, 26); codes[214] = new HuffmanCode(0x7ffffe2, 27); codes[215] = new HuffmanCode(0xfffff2, 24); codes[216] = new HuffmanCode(0x1fffe4, 21); codes[217] = new HuffmanCode(0x1fffe5, 21); codes[218] = new HuffmanCode(0x3ffffe8, 26); codes[219] = new HuffmanCode(0x3ffffe9, 26); codes[220] = new HuffmanCode(0xffffffd, 28); codes[221] = new HuffmanCode(0x7ffffe3, 27); codes[222] = new HuffmanCode(0x7ffffe4, 27); codes[223] = new HuffmanCode(0x7ffffe5, 27); codes[224] = new HuffmanCode(0xfffec, 20); codes[225] = new HuffmanCode(0xfffff3, 24); codes[226] = new HuffmanCode(0xfffed, 20); codes[227] = new HuffmanCode(0x1fffe6, 21); codes[228] = new HuffmanCode(0x3fffe9, 22); codes[229] = new HuffmanCode(0x1fffe7, 21); codes[230] = new HuffmanCode(0x1fffe8, 21); codes[231] = new HuffmanCode(0x7ffff3, 23); codes[232] = new HuffmanCode(0x3fffea, 22); codes[233] = new HuffmanCode(0x3fffeb, 22); codes[234] = new HuffmanCode(0x1ffffee, 25); codes[235] = new HuffmanCode(0x1ffffef, 25); codes[236] = new HuffmanCode(0xfffff4, 24); codes[237] = new HuffmanCode(0xfffff5, 24); codes[238] = new HuffmanCode(0x3ffffea, 26); codes[239] = new HuffmanCode(0x7ffff4, 23); codes[240] = new HuffmanCode(0x3ffffeb, 26); codes[241] = new HuffmanCode(0x7ffffe6, 27); codes[242] = new HuffmanCode(0x3ffffec, 26); codes[243] = new HuffmanCode(0x3ffffed, 26); codes[244] = new HuffmanCode(0x7ffffe7, 27); codes[245] = new HuffmanCode(0x7ffffe8, 27); codes[246] = new HuffmanCode(0x7ffffe9, 27); codes[247] = new HuffmanCode(0x7ffffea, 27); codes[248] = new HuffmanCode(0x7ffffeb, 27); codes[249] = new HuffmanCode(0xffffffe, 28); codes[250] = new HuffmanCode(0x7ffffec, 27); codes[251] = new HuffmanCode(0x7ffffed, 27); codes[252] = new HuffmanCode(0x7ffffee, 27); codes[253] = new HuffmanCode(0x7ffffef, 27); codes[254] = new HuffmanCode(0x7fffff0, 27); codes[255] = new HuffmanCode(0x3ffffee, 26); codes[256] = new HuffmanCode(0x3fffffff, 30); HUFFMAN_CODES = codes; //lengths determined by experimentation, just set it to something large then see how large it actually ends up int[] codingTree = new int[256]; //the current position in the tree int pos = 0; int allocated = 1; //the next position to allocate to //map of the current state at a given position //only used while building the tree HuffmanCode[] currentCode = new HuffmanCode[256]; currentCode[0] = new HuffmanCode(0, 0); final Set<HuffmanCode> allCodes = new HashSet<>(); allCodes.addAll(Arrays.asList(HUFFMAN_CODES)); while (!allCodes.isEmpty()) { int length = currentCode[pos].length; int code = currentCode[pos].value; int newLength = length + 1; HuffmanCode high = new HuffmanCode(code << 1 | 1, newLength); HuffmanCode low = new HuffmanCode(code << 1, newLength); int newVal = 0; boolean highTerminal = allCodes.remove(high); if (highTerminal) { //bah, linear search int i = 0; for (i = 0; i < codes.length; ++i) { if (codes[i].equals(high)) { break; } } newVal = LOW_TERMINAL_BIT | i; } else { int highPos = allocated++; currentCode[highPos] = high; newVal = highPos; } newVal <<= 16; boolean lowTerminal = allCodes.remove(low); if (lowTerminal) { //bah, linear search int i = 0; for (i = 0; i < codes.length; ++i) { if (codes[i].equals(low)) { break; } } newVal |= LOW_TERMINAL_BIT | i; } else { int lowPos = allocated++; currentCode[lowPos] = low; newVal |= lowPos; } codingTree[pos] = newVal; pos++; } DECODING_TABLE = codingTree; }
Decodes a huffman encoded string into the target StringBuilder. There must be enough space left in the buffer for this method to succeed.
Params:
  • data – The byte buffer
  • length – The length of data from the buffer to decode
  • target – The target for the decompressed data
Throws:
  • HpackException – If the Huffman encoded value in HPACK headers did not end with EOS padding
/** * Decodes a huffman encoded string into the target StringBuilder. There * must be enough space left in the buffer for this method to succeed. * * @param data The byte buffer * @param length The length of data from the buffer to decode * @param target The target for the decompressed data * * @throws HpackException If the Huffman encoded value in HPACK headers did * not end with EOS padding */
public static void decode(ByteBuffer data, int length, StringBuilder target) throws HpackException { assert data.remaining() >= length; int treePos = 0; boolean eosBits = true; int eosBitCount = 0; for (int i = 0; i < length; ++i) { byte b = data.get(); int bitPos = 7; while (bitPos >= 0) { int val = DECODING_TABLE[treePos]; if (((1 << bitPos) & b) == 0) { //bit not set, we want the lower part of the tree if ((val & LOW_TERMINAL_BIT) == 0) { treePos = val & LOW_MASK; eosBits = false; eosBitCount = 0; } else { target.append((char) (val & LOW_MASK)); treePos = 0; eosBits = true; } } else { if (eosBits) { eosBitCount++; } //bit not set, we want the lower part of the tree if ((val & HIGH_TERMINAL_BIT) == 0) { treePos = (val >> 16) & LOW_MASK; } else { if (eosBitCount != 0) { // This must be the EOS symbol which MUST be treated // as an error throw new HpackException(sm.getString("hpackhuffman.stringLiteralEOS")); } target.append((char) ((val >> 16) & LOW_MASK)); treePos = 0; eosBits = true; } } bitPos--; } } if (eosBitCount > 7) { throw new HpackException(sm.getString( "hpackhuffman.stringLiteralTooMuchPadding")); } if (!eosBits) { throw new HpackException(sm.getString( "hpackhuffman.huffmanEncodedHpackValueDidNotEndWithEOS")); } }
Encodes the given string into the buffer. If there is not enough space in the buffer, or the encoded version is bigger than the original it will return false and not modify the buffers position.
Params:
  • buffer – The buffer to encode into
  • toEncode – The string to encode
  • forceLowercase – If the string should be encoded in lower case
Returns:true if encoding succeeded
/** * Encodes the given string into the buffer. If there is not enough space in * the buffer, or the encoded version is bigger than the original it will * return false and not modify the buffers position. * * @param buffer The buffer to encode into * @param toEncode The string to encode * @param forceLowercase If the string should be encoded in lower case * @return true if encoding succeeded */
public static boolean encode(ByteBuffer buffer, String toEncode, boolean forceLowercase) { if (buffer.remaining() <= toEncode.length()) { return false; } int start = buffer.position(); //this sucks, but we need to put the length first //and we don't really have any option but to calculate it in advance to make sure we have left enough room //so we end up iterating twice int length = 0; for (int i = 0; i < toEncode.length(); ++i) { char c = toEncode.charAt(i); if (c > 255) { throw new IllegalArgumentException(sm.getString("hpack.invalidCharacter", Character.toString(c), Integer.valueOf(c))); } if(forceLowercase) { c = Hpack.toLower(c); } HuffmanCode code = HUFFMAN_CODES[c]; length += code.length; } int byteLength = length / 8 + (length % 8 == 0 ? 0 : 1); buffer.put((byte) (1 << 7)); Hpack.encodeInteger(buffer, byteLength, 7); int bytePos = 0; byte currentBufferByte = 0; for (int i = 0; i < toEncode.length(); ++i) { char c = toEncode.charAt(i); if(forceLowercase) { c = Hpack.toLower(c); } HuffmanCode code = HUFFMAN_CODES[c]; if (code.length + bytePos <= 8) { //it fits in the current byte currentBufferByte |= ((code.value & 0xFF) << 8 - (code.length + bytePos)); bytePos += code.length; } else { //it does not fit, it may need up to 4 bytes int val = code.value; int rem = code.length; while (rem > 0) { if (!buffer.hasRemaining()) { buffer.position(start); return false; } int remainingInByte = 8 - bytePos; if (rem > remainingInByte) { currentBufferByte |= (val >> (rem - remainingInByte)); } else { currentBufferByte |= (val << (remainingInByte - rem)); } if (rem > remainingInByte) { buffer.put(currentBufferByte); currentBufferByte = 0; bytePos = 0; } else { bytePos = rem; } rem -= remainingInByte; } } if (bytePos == 8) { if (!buffer.hasRemaining()) { buffer.position(start); return false; } buffer.put(currentBufferByte); currentBufferByte = 0; bytePos = 0; } if (buffer.position() - start > toEncode.length()) { //the encoded version is longer than the original //just return false buffer.position(start); return false; } } if (bytePos > 0) { //add the EOS bytes if we have not finished on a single byte if (!buffer.hasRemaining()) { buffer.position(start); return false; } buffer.put((byte) (currentBufferByte | ((0xFF) >> bytePos))); } return true; } protected static class HuffmanCode {
The value of the least significant bits of the code
/** * The value of the least significant bits of the code */
int value;
length of the code, in bits
/** * length of the code, in bits */
int length; public HuffmanCode(int value, int length) { this.value = value; this.length = length; } public int getValue() { return value; } public int getLength() { return length; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; HuffmanCode that = (HuffmanCode) o; if (length != that.length) return false; if (value != that.value) return false; return true; } @Override public int hashCode() { int result = value; result = 31 * result + length; return result; } @Override public String toString() { return "HuffmanCode{" + "value=" + value + ", length=" + length + '}'; } } }