1 /*
2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package jdk.internal.net.http.hpack;
27 
28 import jdk.internal.net.http.hpack.HPACK.BufferUpdateConsumer;
29 
30 import java.io.IOException;
31 import java.nio.ByteBuffer;
32 import java.util.List;
33 import java.util.Objects;
34 
35 import static jdk.internal.net.http.hpack.HPACK.bytesForBits;
36 
37 public final class QuickHuffman {
38 
39     /*
40      * Mapping of characters to their code information. Code information
41      * consists of a code value and a code length. Both components are packed in
42      * a single long as follows:
43      *
44      *     MSB                             LSB
45      *     +----------------+----------------+
46      *     |~code           |         length~|
47      *     +----------------+----------------+
48      *     |<----- 32 ----->|<----- 32 ----->|
49      *     |<------------- 64 -------------->|
50      *
51      * The leftmost 32 bits hold the code value. This value is aligned left (or
52      * MSB). The rightmost 32 bits hold the code length. This length is aligned
53      * right (or LSB). Such structure is possible thanks to codes being up to 30
54      * bits long and their lengths being up to 5 bits long (length = 5..30).
55      * This allows both components to fit into long and, thus, better locality.
56      *
57      * Input strings never contain EOS. Thus there's no need to provide EOS
58      * mapping. Hence, the length of the array is 256, not 257.
59      */
60     private static final long[] codes = new long[256];
61 
codeValueOf(char c)62     private static long codeValueOf(char c) {
63         return codes[c] & 0xffffffff00000000L;
64     }
65 
codeLengthOf(char c)66     private static long codeLengthOf(char c) {
67         return codes[c] & 0x00000000ffffffffL;
68     }
69 
70     private static final int EOS_LENGTH = 30;
71     private static final int EOS_LSB = 0x3fffffff;
72     // EOS_LSB is casted to long before the shift, to allow long shift (6 bits)
73     // instead of int shift (5 bits):
74     private static final long EOS_MSB = ((long) EOS_LSB) << (64 - EOS_LENGTH);
75 
76     /*
77      * Huffman trie.
78      *
79      * The root node leads to 257 descendant leaf nodes, of which 256 nodes
80      * correspond to characters and 1 node correspond to EOS.
81      */
82     private static final Node root = buildTrie();
83 
buildTrie()84     private static Node buildTrie() {
85         TemporaryNode tmpRoot = new TemporaryNode();
86         addChar(tmpRoot,   0, 0x1ff8,     13);
87         addChar(tmpRoot,   1, 0x7fffd8,   23);
88         addChar(tmpRoot,   2, 0xfffffe2,  28);
89         addChar(tmpRoot,   3, 0xfffffe3,  28);
90         addChar(tmpRoot,   4, 0xfffffe4,  28);
91         addChar(tmpRoot,   5, 0xfffffe5,  28);
92         addChar(tmpRoot,   6, 0xfffffe6,  28);
93         addChar(tmpRoot,   7, 0xfffffe7,  28);
94         addChar(tmpRoot,   8, 0xfffffe8,  28);
95         addChar(tmpRoot,   9, 0xffffea,   24);
96         addChar(tmpRoot,  10, 0x3ffffffc, 30);
97         addChar(tmpRoot,  11, 0xfffffe9,  28);
98         addChar(tmpRoot,  12, 0xfffffea,  28);
99         addChar(tmpRoot,  13, 0x3ffffffd, 30);
100         addChar(tmpRoot,  14, 0xfffffeb,  28);
101         addChar(tmpRoot,  15, 0xfffffec,  28);
102         addChar(tmpRoot,  16, 0xfffffed,  28);
103         addChar(tmpRoot,  17, 0xfffffee,  28);
104         addChar(tmpRoot,  18, 0xfffffef,  28);
105         addChar(tmpRoot,  19, 0xffffff0,  28);
106         addChar(tmpRoot,  20, 0xffffff1,  28);
107         addChar(tmpRoot,  21, 0xffffff2,  28);
108         addChar(tmpRoot,  22, 0x3ffffffe, 30);
109         addChar(tmpRoot,  23, 0xffffff3,  28);
110         addChar(tmpRoot,  24, 0xffffff4,  28);
111         addChar(tmpRoot,  25, 0xffffff5,  28);
112         addChar(tmpRoot,  26, 0xffffff6,  28);
113         addChar(tmpRoot,  27, 0xffffff7,  28);
114         addChar(tmpRoot,  28, 0xffffff8,  28);
115         addChar(tmpRoot,  29, 0xffffff9,  28);
116         addChar(tmpRoot,  30, 0xffffffa,  28);
117         addChar(tmpRoot,  31, 0xffffffb,  28);
118         addChar(tmpRoot,  32, 0x14,        6);
119         addChar(tmpRoot,  33, 0x3f8,      10);
120         addChar(tmpRoot,  34, 0x3f9,      10);
121         addChar(tmpRoot,  35, 0xffa,      12);
122         addChar(tmpRoot,  36, 0x1ff9,     13);
123         addChar(tmpRoot,  37, 0x15,        6);
124         addChar(tmpRoot,  38, 0xf8,        8);
125         addChar(tmpRoot,  39, 0x7fa,      11);
126         addChar(tmpRoot,  40, 0x3fa,      10);
127         addChar(tmpRoot,  41, 0x3fb,      10);
128         addChar(tmpRoot,  42, 0xf9,        8);
129         addChar(tmpRoot,  43, 0x7fb,      11);
130         addChar(tmpRoot,  44, 0xfa,        8);
131         addChar(tmpRoot,  45, 0x16,        6);
132         addChar(tmpRoot,  46, 0x17,        6);
133         addChar(tmpRoot,  47, 0x18,        6);
134         addChar(tmpRoot,  48, 0x0,         5);
135         addChar(tmpRoot,  49, 0x1,         5);
136         addChar(tmpRoot,  50, 0x2,         5);
137         addChar(tmpRoot,  51, 0x19,        6);
138         addChar(tmpRoot,  52, 0x1a,        6);
139         addChar(tmpRoot,  53, 0x1b,        6);
140         addChar(tmpRoot,  54, 0x1c,        6);
141         addChar(tmpRoot,  55, 0x1d,        6);
142         addChar(tmpRoot,  56, 0x1e,        6);
143         addChar(tmpRoot,  57, 0x1f,        6);
144         addChar(tmpRoot,  58, 0x5c,        7);
145         addChar(tmpRoot,  59, 0xfb,        8);
146         addChar(tmpRoot,  60, 0x7ffc,     15);
147         addChar(tmpRoot,  61, 0x20,        6);
148         addChar(tmpRoot,  62, 0xffb,      12);
149         addChar(tmpRoot,  63, 0x3fc,      10);
150         addChar(tmpRoot,  64, 0x1ffa,     13);
151         addChar(tmpRoot,  65, 0x21,        6);
152         addChar(tmpRoot,  66, 0x5d,        7);
153         addChar(tmpRoot,  67, 0x5e,        7);
154         addChar(tmpRoot,  68, 0x5f,        7);
155         addChar(tmpRoot,  69, 0x60,        7);
156         addChar(tmpRoot,  70, 0x61,        7);
157         addChar(tmpRoot,  71, 0x62,        7);
158         addChar(tmpRoot,  72, 0x63,        7);
159         addChar(tmpRoot,  73, 0x64,        7);
160         addChar(tmpRoot,  74, 0x65,        7);
161         addChar(tmpRoot,  75, 0x66,        7);
162         addChar(tmpRoot,  76, 0x67,        7);
163         addChar(tmpRoot,  77, 0x68,        7);
164         addChar(tmpRoot,  78, 0x69,        7);
165         addChar(tmpRoot,  79, 0x6a,        7);
166         addChar(tmpRoot,  80, 0x6b,        7);
167         addChar(tmpRoot,  81, 0x6c,        7);
168         addChar(tmpRoot,  82, 0x6d,        7);
169         addChar(tmpRoot,  83, 0x6e,        7);
170         addChar(tmpRoot,  84, 0x6f,        7);
171         addChar(tmpRoot,  85, 0x70,        7);
172         addChar(tmpRoot,  86, 0x71,        7);
173         addChar(tmpRoot,  87, 0x72,        7);
174         addChar(tmpRoot,  88, 0xfc,        8);
175         addChar(tmpRoot,  89, 0x73,        7);
176         addChar(tmpRoot,  90, 0xfd,        8);
177         addChar(tmpRoot,  91, 0x1ffb,     13);
178         addChar(tmpRoot,  92, 0x7fff0,    19);
179         addChar(tmpRoot,  93, 0x1ffc,     13);
180         addChar(tmpRoot,  94, 0x3ffc,     14);
181         addChar(tmpRoot,  95, 0x22,        6);
182         addChar(tmpRoot,  96, 0x7ffd,     15);
183         addChar(tmpRoot,  97, 0x3,         5);
184         addChar(tmpRoot,  98, 0x23,        6);
185         addChar(tmpRoot,  99, 0x4,         5);
186         addChar(tmpRoot, 100, 0x24,        6);
187         addChar(tmpRoot, 101, 0x5,         5);
188         addChar(tmpRoot, 102, 0x25,        6);
189         addChar(tmpRoot, 103, 0x26,        6);
190         addChar(tmpRoot, 104, 0x27,        6);
191         addChar(tmpRoot, 105, 0x6,         5);
192         addChar(tmpRoot, 106, 0x74,        7);
193         addChar(tmpRoot, 107, 0x75,        7);
194         addChar(tmpRoot, 108, 0x28,        6);
195         addChar(tmpRoot, 109, 0x29,        6);
196         addChar(tmpRoot, 110, 0x2a,        6);
197         addChar(tmpRoot, 111, 0x7,         5);
198         addChar(tmpRoot, 112, 0x2b,        6);
199         addChar(tmpRoot, 113, 0x76,        7);
200         addChar(tmpRoot, 114, 0x2c,        6);
201         addChar(tmpRoot, 115, 0x8,         5);
202         addChar(tmpRoot, 116, 0x9,         5);
203         addChar(tmpRoot, 117, 0x2d,        6);
204         addChar(tmpRoot, 118, 0x77,        7);
205         addChar(tmpRoot, 119, 0x78,        7);
206         addChar(tmpRoot, 120, 0x79,        7);
207         addChar(tmpRoot, 121, 0x7a,        7);
208         addChar(tmpRoot, 122, 0x7b,        7);
209         addChar(tmpRoot, 123, 0x7ffe,     15);
210         addChar(tmpRoot, 124, 0x7fc,      11);
211         addChar(tmpRoot, 125, 0x3ffd,     14);
212         addChar(tmpRoot, 126, 0x1ffd,     13);
213         addChar(tmpRoot, 127, 0xffffffc,  28);
214         addChar(tmpRoot, 128, 0xfffe6,    20);
215         addChar(tmpRoot, 129, 0x3fffd2,   22);
216         addChar(tmpRoot, 130, 0xfffe7,    20);
217         addChar(tmpRoot, 131, 0xfffe8,    20);
218         addChar(tmpRoot, 132, 0x3fffd3,   22);
219         addChar(tmpRoot, 133, 0x3fffd4,   22);
220         addChar(tmpRoot, 134, 0x3fffd5,   22);
221         addChar(tmpRoot, 135, 0x7fffd9,   23);
222         addChar(tmpRoot, 136, 0x3fffd6,   22);
223         addChar(tmpRoot, 137, 0x7fffda,   23);
224         addChar(tmpRoot, 138, 0x7fffdb,   23);
225         addChar(tmpRoot, 139, 0x7fffdc,   23);
226         addChar(tmpRoot, 140, 0x7fffdd,   23);
227         addChar(tmpRoot, 141, 0x7fffde,   23);
228         addChar(tmpRoot, 142, 0xffffeb,   24);
229         addChar(tmpRoot, 143, 0x7fffdf,   23);
230         addChar(tmpRoot, 144, 0xffffec,   24);
231         addChar(tmpRoot, 145, 0xffffed,   24);
232         addChar(tmpRoot, 146, 0x3fffd7,   22);
233         addChar(tmpRoot, 147, 0x7fffe0,   23);
234         addChar(tmpRoot, 148, 0xffffee,   24);
235         addChar(tmpRoot, 149, 0x7fffe1,   23);
236         addChar(tmpRoot, 150, 0x7fffe2,   23);
237         addChar(tmpRoot, 151, 0x7fffe3,   23);
238         addChar(tmpRoot, 152, 0x7fffe4,   23);
239         addChar(tmpRoot, 153, 0x1fffdc,   21);
240         addChar(tmpRoot, 154, 0x3fffd8,   22);
241         addChar(tmpRoot, 155, 0x7fffe5,   23);
242         addChar(tmpRoot, 156, 0x3fffd9,   22);
243         addChar(tmpRoot, 157, 0x7fffe6,   23);
244         addChar(tmpRoot, 158, 0x7fffe7,   23);
245         addChar(tmpRoot, 159, 0xffffef,   24);
246         addChar(tmpRoot, 160, 0x3fffda,   22);
247         addChar(tmpRoot, 161, 0x1fffdd,   21);
248         addChar(tmpRoot, 162, 0xfffe9,    20);
249         addChar(tmpRoot, 163, 0x3fffdb,   22);
250         addChar(tmpRoot, 164, 0x3fffdc,   22);
251         addChar(tmpRoot, 165, 0x7fffe8,   23);
252         addChar(tmpRoot, 166, 0x7fffe9,   23);
253         addChar(tmpRoot, 167, 0x1fffde,   21);
254         addChar(tmpRoot, 168, 0x7fffea,   23);
255         addChar(tmpRoot, 169, 0x3fffdd,   22);
256         addChar(tmpRoot, 170, 0x3fffde,   22);
257         addChar(tmpRoot, 171, 0xfffff0,   24);
258         addChar(tmpRoot, 172, 0x1fffdf,   21);
259         addChar(tmpRoot, 173, 0x3fffdf,   22);
260         addChar(tmpRoot, 174, 0x7fffeb,   23);
261         addChar(tmpRoot, 175, 0x7fffec,   23);
262         addChar(tmpRoot, 176, 0x1fffe0,   21);
263         addChar(tmpRoot, 177, 0x1fffe1,   21);
264         addChar(tmpRoot, 178, 0x3fffe0,   22);
265         addChar(tmpRoot, 179, 0x1fffe2,   21);
266         addChar(tmpRoot, 180, 0x7fffed,   23);
267         addChar(tmpRoot, 181, 0x3fffe1,   22);
268         addChar(tmpRoot, 182, 0x7fffee,   23);
269         addChar(tmpRoot, 183, 0x7fffef,   23);
270         addChar(tmpRoot, 184, 0xfffea,    20);
271         addChar(tmpRoot, 185, 0x3fffe2,   22);
272         addChar(tmpRoot, 186, 0x3fffe3,   22);
273         addChar(tmpRoot, 187, 0x3fffe4,   22);
274         addChar(tmpRoot, 188, 0x7ffff0,   23);
275         addChar(tmpRoot, 189, 0x3fffe5,   22);
276         addChar(tmpRoot, 190, 0x3fffe6,   22);
277         addChar(tmpRoot, 191, 0x7ffff1,   23);
278         addChar(tmpRoot, 192, 0x3ffffe0,  26);
279         addChar(tmpRoot, 193, 0x3ffffe1,  26);
280         addChar(tmpRoot, 194, 0xfffeb,    20);
281         addChar(tmpRoot, 195, 0x7fff1,    19);
282         addChar(tmpRoot, 196, 0x3fffe7,   22);
283         addChar(tmpRoot, 197, 0x7ffff2,   23);
284         addChar(tmpRoot, 198, 0x3fffe8,   22);
285         addChar(tmpRoot, 199, 0x1ffffec,  25);
286         addChar(tmpRoot, 200, 0x3ffffe2,  26);
287         addChar(tmpRoot, 201, 0x3ffffe3,  26);
288         addChar(tmpRoot, 202, 0x3ffffe4,  26);
289         addChar(tmpRoot, 203, 0x7ffffde,  27);
290         addChar(tmpRoot, 204, 0x7ffffdf,  27);
291         addChar(tmpRoot, 205, 0x3ffffe5,  26);
292         addChar(tmpRoot, 206, 0xfffff1,   24);
293         addChar(tmpRoot, 207, 0x1ffffed,  25);
294         addChar(tmpRoot, 208, 0x7fff2,    19);
295         addChar(tmpRoot, 209, 0x1fffe3,   21);
296         addChar(tmpRoot, 210, 0x3ffffe6,  26);
297         addChar(tmpRoot, 211, 0x7ffffe0,  27);
298         addChar(tmpRoot, 212, 0x7ffffe1,  27);
299         addChar(tmpRoot, 213, 0x3ffffe7,  26);
300         addChar(tmpRoot, 214, 0x7ffffe2,  27);
301         addChar(tmpRoot, 215, 0xfffff2,   24);
302         addChar(tmpRoot, 216, 0x1fffe4,   21);
303         addChar(tmpRoot, 217, 0x1fffe5,   21);
304         addChar(tmpRoot, 218, 0x3ffffe8,  26);
305         addChar(tmpRoot, 219, 0x3ffffe9,  26);
306         addChar(tmpRoot, 220, 0xffffffd,  28);
307         addChar(tmpRoot, 221, 0x7ffffe3,  27);
308         addChar(tmpRoot, 222, 0x7ffffe4,  27);
309         addChar(tmpRoot, 223, 0x7ffffe5,  27);
310         addChar(tmpRoot, 224, 0xfffec,    20);
311         addChar(tmpRoot, 225, 0xfffff3,   24);
312         addChar(tmpRoot, 226, 0xfffed,    20);
313         addChar(tmpRoot, 227, 0x1fffe6,   21);
314         addChar(tmpRoot, 228, 0x3fffe9,   22);
315         addChar(tmpRoot, 229, 0x1fffe7,   21);
316         addChar(tmpRoot, 230, 0x1fffe8,   21);
317         addChar(tmpRoot, 231, 0x7ffff3,   23);
318         addChar(tmpRoot, 232, 0x3fffea,   22);
319         addChar(tmpRoot, 233, 0x3fffeb,   22);
320         addChar(tmpRoot, 234, 0x1ffffee,  25);
321         addChar(tmpRoot, 235, 0x1ffffef,  25);
322         addChar(tmpRoot, 236, 0xfffff4,   24);
323         addChar(tmpRoot, 237, 0xfffff5,   24);
324         addChar(tmpRoot, 238, 0x3ffffea,  26);
325         addChar(tmpRoot, 239, 0x7ffff4,   23);
326         addChar(tmpRoot, 240, 0x3ffffeb,  26);
327         addChar(tmpRoot, 241, 0x7ffffe6,  27);
328         addChar(tmpRoot, 242, 0x3ffffec,  26);
329         addChar(tmpRoot, 243, 0x3ffffed,  26);
330         addChar(tmpRoot, 244, 0x7ffffe7,  27);
331         addChar(tmpRoot, 245, 0x7ffffe8,  27);
332         addChar(tmpRoot, 246, 0x7ffffe9,  27);
333         addChar(tmpRoot, 247, 0x7ffffea,  27);
334         addChar(tmpRoot, 248, 0x7ffffeb,  27);
335         addChar(tmpRoot, 249, 0xffffffe,  28);
336         addChar(tmpRoot, 250, 0x7ffffec,  27);
337         addChar(tmpRoot, 251, 0x7ffffed,  27);
338         addChar(tmpRoot, 252, 0x7ffffee,  27);
339         addChar(tmpRoot, 253, 0x7ffffef,  27);
340         addChar(tmpRoot, 254, 0x7fffff0,  27);
341         addChar(tmpRoot, 255, 0x3ffffee,  26);
342         addEOS (tmpRoot, 256, EOS_LSB, EOS_LENGTH);
343 
344         // The difference in performance can always be checked by not using
345         // the immutable trie:
346         //     return tmpRoot;
347         return ImmutableNode.copyOf(tmpRoot);
348     }
349 
QuickHuffman()350     private QuickHuffman() { }
351 
addChar(Node root, int symbol, int code, int bitLength)352     private static void addChar(Node root, int symbol, int code, int bitLength)
353     {
354         addLeaf(root, (char) symbol, code, bitLength, false);
355         long value = ((long) code) << (64 - bitLength); // re-align MSB <- LSB
356         codes[symbol] = value | bitLength;
357     }
358 
addEOS(Node root, int symbol, int code, int bitLength)359     private static void addEOS(Node root, int symbol, int code, int bitLength)
360     {
361         addLeaf(root, (char) symbol, code, bitLength, true);
362     }
363 
addLeaf(Node root, char symbol, int code, int bitLength, boolean isEOS)364     private static void addLeaf(Node root,
365                                 char symbol,
366                                 int code,
367                                 int bitLength,
368                                 boolean isEOS)
369     {
370         assert 0 < bitLength && bitLength <= 32 : bitLength;
371         Node curr = root;
372         int nBytes = bytesForBits(bitLength);
373         // The number of bits the code needs to be shifted to the left in order
374         // to align with the byte #nBytes boundary:
375         int align = (nBytes << 3) - bitLength;
376         code <<= align;
377         // descend the trie until the last element
378         int l = 0;
379         for (int i = 0, probe = 0xff << ((nBytes - 1) << 3);
380              i < nBytes - 1;
381              i++, probe >>>= 8)
382         {
383             curr.setEOSPath(curr.isEOSPath() | isEOS);
384             int idx = (code & probe) >>> ((nBytes - 1 - i) << 3);
385             curr = curr.getOrCreateChild(idx);
386             curr.setLength(8);
387             l += 8;
388         }
389         // Assign the same char to all byte variants. For example, if the code
390         // and its length are 00011b and 5 respectively (letter 'a') then, in
391         // order to be able to match any byte starting with 00011b prefix,
392         // the following nodes need to be created:
393         //
394         //     00011000b, 00011001b, 00011010b, 00011011b, 00011100b, 00011101b,
395         //     00011110b and 00011111b
396         int idx = code & 0xff;
397         curr.setEOSPath(curr.isEOSPath() | isEOS);
398         for (int i = 0; i < (1 << align); i++) {
399             Node child = curr.getOrCreateChild(idx | i);
400             child.setSymbol(symbol);
401             child.setEOSPath(child.isEOSPath() | isEOS);
402             child.setLength(bitLength - l);
403         }
404     }
405 
406     /*
407      * A node in the Huffman trie.
408      */
409     interface Node {
410 
411         boolean isEOSPath();
412 
413         void setEOSPath(boolean value);
414 
415         boolean isLeaf();
416 
417         Node getChild(int index);
418 
419         Node getOrCreateChild(int index);
420 
421         Node[] getChildren();
422 
423         char getSymbol();
424 
425         void setSymbol(char symbol);
426 
427         int getLength();
428 
429         void setLength(int value);
430     }
431 
432     /*
433      * Mutable nodes used for initial construction of the Huffman trie.
434      * (These nodes are perfectly ok to be used after that.)
435      */
436     static final class TemporaryNode implements Node {
437 
438         private char symbol;
439         private boolean eosPath;
440         private TemporaryNode[] children;
441         private int length;
442 
443         @Override
444         public TemporaryNode getOrCreateChild(int index) {
445             ensureChildrenExist();
446             if (children[index] == null) {
447                 children[index] = new TemporaryNode();
448             }
449             return children[index];
450         }
451 
452         private void ensureChildrenExist() {
453             if (children == null) {
454                 children = new TemporaryNode[256];
455             }
456         }
457 
458         @Override
459         public boolean isLeaf() {
460             return children == null;
461         }
462 
463         @Override
464         public boolean isEOSPath() {
465             return eosPath;
466         }
467 
468         @Override
469         public void setEOSPath(boolean value) {
470             eosPath = value;
471         }
472 
473         @Override
474         public TemporaryNode getChild(int index) {
475             ensureChildrenExist();
476             return children[index];
477         }
478 
479         @Override
480         public Node[] getChildren() {
481             if (children == null) {
482                 return new Node[0];
483             }
484             return children;
485         }
486 
487         @Override
488         public char getSymbol() {
489             return symbol;
490         }
491 
492         @Override
493         public int getLength() {
494             return length;
495         }
496 
497         @Override
498         public void setSymbol(char value) {
499             this.symbol = value;
500         }
501 
502         @Override
503         public void setLength(int value) {
504             this.length = value;
505         }
506     }
507 
508     /*
509      * Immutable node used to construct traversal-only Huffman trie.
510      *
511      * Once the trie has been built, the support of modifications is no longer
512      * required. An immutable trie should be used. Not only it will help to
513      * catch possible bugs, but hopefully speedup the traversal operations.
514      */
515     static final class ImmutableNode implements Node {
516 
517         private final char symbol;
518         private final boolean eosPath;
519         private final int length;
520         private final List<ImmutableNode> children;
521 
522         public static ImmutableNode copyOf(Node node) {
523             if (node.isLeaf()) {
524                 return new ImmutableNode(node.getSymbol(), node.isEOSPath(),
525                                          node.getLength());
526             }
527             Node[] children = node.getChildren();
528             ImmutableNode[] immutableChildren = new ImmutableNode[children.length];
529             for (int i = 0; i < children.length; i++) {
530                 immutableChildren[i] = copyOf(children[i]);
531             }
532             return new ImmutableNode(node.isEOSPath(), node.getLength(),
533                                      immutableChildren);
534         }
535 
536         /* Creates a leaf node */
537         private ImmutableNode(char symbol,
538                               boolean eosPath,
539                               int length) {
540             this.symbol = symbol;
541             this.eosPath = eosPath;
542             this.length = length;
543             this.children = List.of();
544         }
545 
546         /* Creates a node with children */
547         private ImmutableNode(boolean eosPath,
548                               int length,
549                               ImmutableNode[] children)
550         {
551             this.symbol = 0;
552             this.eosPath = eosPath;
553             this.length = length;
554             if (children.length == 0) {
555                 throw new IllegalArgumentException();
556             }
557             // A list produced by List.of should not be slower than array for
558             // accessing elements by index, and hopefully use additional
559             // optimizations (e.g. jdk.internal.vm.annotation.Stable)
560             this.children = List.of(children);
561         }
562 
563         @Override
564         public boolean isLeaf() {
565             return children.isEmpty();
566         }
567 
568         @Override
569         public boolean isEOSPath() {
570             return eosPath;
571         }
572 
573         @Override
574         public void setEOSPath(boolean value) {
575             throw new UnsupportedOperationException();
576         }
577 
578         @Override
579         public ImmutableNode getChild(int index) {
580             return children.get(index);
581         }
582 
583         @Override
584         public ImmutableNode getOrCreateChild(int index) {
585             throw new UnsupportedOperationException();
586         }
587 
588         @Override
589         public ImmutableNode[] getChildren() {
590             // This method is not expected to be called on an immutable node.
591             // If it is called, it requires some investigation.
592             throw new UnsupportedOperationException();
593         }
594 
595         @Override
596         public char getSymbol() {
597             return symbol;
598         }
599 
600         @Override
601         public void setSymbol(char symbol) {
602             throw new UnsupportedOperationException();
603         }
604 
605         @Override
606         public int getLength() {
607             return length;
608         }
609 
610         @Override
611         public void setLength(int value) {
612             throw new UnsupportedOperationException();
613         }
614     }
615 
616     static final class Reader implements Huffman.Reader {
617 
618         private final BufferUpdateConsumer UPDATER =
619                 (buf, bufLen) -> {
620                     buffer = buf;
621                     bufferLen = bufLen;
622                 };
623 
624         private Node curr = root;  // current position in the trie
625         private long buffer;       // bits left from the previous match (aligned to the left, or MSB)
626         private int bufferLen;     // number of bits in the buffer
627         private int len;           // length (in bits) of path to curr
628         private boolean done;
629 
630         @Override
631         public void read(ByteBuffer source,
632                          Appendable destination,
633                          boolean isLast) throws IOException
634         {
635             while (!done) {
636                 int remaining = source.remaining();
637                 int nBytes = HPACK.read(source, buffer, bufferLen, UPDATER);
638                 // write as much as possible
639                 while (true) {
640                     if (bufferLen < 8) {
641                         if (nBytes < remaining) { // read again
642                             break;
643                         } else if (!isLast) { // exit the method to accept more input
644                             return;
645                         } else if (bufferLen > 0) { // no more data is expected, pad
646                                                     // (this padding may be done more than once)
647                             buffer |= ((0xff00000000000000L >>> bufferLen)
648                                     & 0xff00000000000000L);
649                             // do not update bufferLen, since all those ones are
650                             // synthetic and are appended merely for the sake of
651                             // lookup
652                         } else {
653                             done = true;
654                             break;
655                         }
656                     }
657                     int idx = (int) (buffer >>> 56);
658                     Node node = curr.getChild(idx);
659                     if (node == null) { // TODO: TEST
660                         throw new IOException("Unexpected byte");
661                     }
662                     if (node.isLeaf()) {
663                         if (node.getLength() > bufferLen) { // matched more than we actually could (because of padding)
664                             throw new IOException(
665                                     "Not a EOS prefix padding or unexpected end of data");
666                         }
667                         if (node.isEOSPath()) {
668                             throw new IOException("Encountered EOS");
669                         }
670                         destination.append(node.getSymbol());
671                         curr = root;
672                         len = 0;
673                     } else {
674                         curr = node;
675                         // because of the padding, we can't match more bits than
676                         // there are currently in the buffer
677                         len += Math.min(bufferLen, node.getLength());
678                     }
679                     buffer <<= node.getLength();
680                     bufferLen -= node.getLength();
681                 }
682                 if (done && (curr.isEOSPath() && len > 7)) {
683                     throw new IOException(
684                             "Padding is too long (len=" + len + ") "
685                                     + "or unexpected end of data");
686                 }
687             }
688         }
689 
690         @Override
691         public void reset() {
692             curr = root;
693             len = 0;
694             buffer = 0;
695             bufferLen = 0;
696             done = false;
697         }
698     }
699 
700     static final class Writer implements Huffman.Writer {
701 
702         private final BufferUpdateConsumer UPDATER =
703                 (buf, bufLen) -> {
704                     buffer = buf;
705                     bufferLen = bufLen;
706                 };
707 
708         private CharSequence source;
709         private boolean padded;
710         private int pos;
711         private int end;
712         private long buffer;
713         private int bufferLen;
714 
715         @Override
716         public QuickHuffman.Writer from(CharSequence input, int start, int end) {
717             Objects.checkFromToIndex(start, end, input.length());
718             this.pos = start;
719             this.end = end;
720             this.source = input;
721             return this;
722         }
723 
724         @Override
725         public boolean write(ByteBuffer destination) {
726             while (true) {
727                 while (true) { // stuff codes into long
728                     if (pos >= end) {
729                         break;
730                     }
731                     char c = source.charAt(pos);
732                     if (c > 255) {
733                         throw new IllegalArgumentException("char=" + ((int) c));
734                     }
735                     long len = codeLengthOf(c);
736                     if (bufferLen + len <= 64) {
737                         buffer |= (codeValueOf(c) >>> bufferLen); // append
738                         bufferLen += len;
739                         pos++;
740                     } else {
741                         break;
742                     }
743                 }
744                 if (bufferLen == 0) {
745                     return true;
746                 }
747                 if (pos >= end && !padded) { // no more input chars are expected, pad
748                     padded = true;
749                     // A long shift to 64 will result in the same long, not
750                     // necessarily 0L. In which case padding will be performed
751                     // incorrectly. If bufferLen is equal to 64, the shift (and
752                     // padding) in not performed.
753                     // (see https://docs.oracle.com/javase/specs/jls/se10/html/jls-15.html#jls-15.19)
754                     if (bufferLen != 64) {
755                         buffer |= (EOS_MSB >>> bufferLen);
756                         bufferLen = bytesForBits(bufferLen) << 3;
757                     }
758                 }
759                 int nBytes = HPACK.write(buffer, bufferLen, UPDATER, destination);
760                 if (nBytes == 0) {
761                     return false;
762                 }
763             }
764         }
765 
766         @Override
767         public QuickHuffman.Writer reset() {
768             source = null;
769             buffer = 0;
770             bufferLen = 0;
771             end = 0;
772             pos = 0;
773             padded = false;
774             return this;
775         }
776 
777         @Override
778         public int lengthOf(CharSequence value, int start, int end) {
779             int len = 0;
780             for (int i = start; i < end; i++) {
781                 char c = value.charAt(i);
782                 len += codeLengthOf(c);
783             }
784             return bytesForBits(len);
785         }
786     }
787 }
788