1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtNetwork module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "bitstreams_p.h"
41 #include "huffman_p.h"
42 
43 #include <QtCore/qbytearray.h>
44 
45 #include <algorithm>
46 #include <limits>
47 
48 QT_BEGIN_NAMESPACE
49 
50 namespace HPack
51 {
52 
53 /*
54     The static Huffman code used here was extracted from:
55     https://http2.github.io/http2-spec/compression.html#huffman.code
56 
57     This code was generated from statistics obtained on a large
58     sample of HTTP headers. It is a canonical Huffman code
59     with some tweaking to ensure that no symbol has a unique
60     code length. All codes were left-aligned - for implementation
61     convenience.
62 
63     Using binary trees to implement decoding would be prohibitively
64     expensive (both memory and time-wise). Instead we use a table-based
65     approach and any given code itself works as an index into such table(s).
66     We have 256 possible byte values and code lengths in
67     a range [5, 26]. This would require a huge table (and most of entries
68     would be 'wasted', since we only have to encode 256 elements).
69     Instead we use multi-level tables. The first level table
70     is using 9-bit length index; some entries in this table are 'terminal',
71     some reference the next level table(s).
72 
73     For example, bytes with values 48 and 49 (ASCII codes for '0' and '1')
74     both have code length 5, Huffman codes are: 00000 and 00001. They
75     both are placed in the 'root' table,
76     the 'root' table has index length == 9:
77     [00000 | 4 remaining bits]
78      ...
79     [00001 | 4 remaining bits]
80 
81     All entires with indices between these two will 'point' to value 48
82     with bitLength == 5 so that bit stream (for example) 000001010 will be
83     decoded as: 48 + "put 1010 back into bitstream".
84 
85     A good description can be found here:
86     http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007
87     or just google "Efficient Huffman Decoding".
88     Also see comments below about 'filling holes'.
89 */
90 
91 namespace
92 {
93 
94 const CodeEntry staticHuffmanCodeTable[]
95 {
96     {  0, 0xffc00000ul, 13},  //     11111111|11000
97     {  1, 0xffffb000ul, 23},  //     11111111|11111111|1011000
98     {  2, 0xfffffe20ul, 28},  //     11111111|11111111|11111110|0010
99     {  3, 0xfffffe30ul, 28},  //     11111111|11111111|11111110|0011
100     {  4, 0xfffffe40ul, 28},  //     11111111|11111111|11111110|0100
101     {  5, 0xfffffe50ul, 28},  //     11111111|11111111|11111110|0101
102     {  6, 0xfffffe60ul, 28},  //     11111111|11111111|11111110|0110
103     {  7, 0xfffffe70ul, 28},  //     11111111|11111111|11111110|0111
104     {  8, 0xfffffe80ul, 28},  //     11111111|11111111|11111110|1000
105     {  9, 0xffffea00ul, 24},  //     11111111|11111111|11101010
106     { 10, 0xfffffff0ul, 30},  //     11111111|11111111|11111111|111100
107     { 11, 0xfffffe90ul, 28},  //     11111111|11111111|11111110|1001
108     { 12, 0xfffffea0ul, 28},  //     11111111|11111111|11111110|1010
109     { 13, 0xfffffff4ul, 30},  //     11111111|11111111|11111111|111101
110     { 14, 0xfffffeb0ul, 28},  //     11111111|11111111|11111110|1011
111     { 15, 0xfffffec0ul, 28},  //     11111111|11111111|11111110|1100
112     { 16, 0xfffffed0ul, 28},  //     11111111|11111111|11111110|1101
113     { 17, 0xfffffee0ul, 28},  //     11111111|11111111|11111110|1110
114     { 18, 0xfffffef0ul, 28},  //     11111111|11111111|11111110|1111
115     { 19, 0xffffff00ul, 28},  //     11111111|11111111|11111111|0000
116     { 20, 0xffffff10ul, 28},  //     11111111|11111111|11111111|0001
117     { 21, 0xffffff20ul, 28},  //     11111111|11111111|11111111|0010
118     { 22, 0xfffffff8ul, 30},  //     11111111|11111111|11111111|111110
119     { 23, 0xffffff30ul, 28},  //     11111111|11111111|11111111|0011
120     { 24, 0xffffff40ul, 28},  //     11111111|11111111|11111111|0100
121     { 25, 0xffffff50ul, 28},  //     11111111|11111111|11111111|0101
122     { 26, 0xffffff60ul, 28},  //     11111111|11111111|11111111|0110
123     { 27, 0xffffff70ul, 28},  //     11111111|11111111|11111111|0111
124     { 28, 0xffffff80ul, 28},  //     11111111|11111111|11111111|1000
125     { 29, 0xffffff90ul, 28},  //     11111111|11111111|11111111|1001
126     { 30, 0xffffffa0ul, 28},  //     11111111|11111111|11111111|1010
127     { 31, 0xffffffb0ul, 28},  //     11111111|11111111|11111111|1011
128     { 32, 0x50000000ul,  6},  // ' ' 010100
129     { 33, 0xfe000000ul, 10},  // '!' 11111110|00
130     { 34, 0xfe400000ul, 10},  // '"' 11111110|01
131     { 35, 0xffa00000ul, 12},  // '#' 11111111|1010
132     { 36, 0xffc80000ul, 13},  // '$' 11111111|11001
133     { 37, 0x54000000ul,  6},  // '%' 010101
134     { 38, 0xf8000000ul,  8},  // '&' 11111000
135     { 39, 0xff400000ul, 11},  // ''' 11111111|010
136     { 40, 0xfe800000ul, 10},  // '(' 11111110|10
137     { 41, 0xfec00000ul, 10},  // ')' 11111110|11
138     { 42, 0xf9000000ul,  8},  // '*' 11111001
139     { 43, 0xff600000ul, 11},  // '+' 11111111|011
140     { 44, 0xfa000000ul,  8},  // ',' 11111010
141     { 45, 0x58000000ul,  6},  // '-' 010110
142     { 46, 0x5c000000ul,  6},  // '.' 010111
143     { 47, 0x60000000ul,  6},  // '/' 011000
144     { 48, 0x00000000ul,  5},  // '0' 00000
145     { 49, 0x08000000ul,  5},  // '1' 00001
146     { 50, 0x10000000ul,  5},  // '2' 00010
147     { 51, 0x64000000ul,  6},  // '3' 011001
148     { 52, 0x68000000ul,  6},  // '4' 011010
149     { 53, 0x6c000000ul,  6},  // '5' 011011
150     { 54, 0x70000000ul,  6},  // '6' 011100
151     { 55, 0x74000000ul,  6},  // '7' 011101
152     { 56, 0x78000000ul,  6},  // '8' 011110
153     { 57, 0x7c000000ul,  6},  // '9' 011111
154     { 58, 0xb8000000ul,  7},  // ':' 1011100
155     { 59, 0xfb000000ul,  8},  // ';' 11111011
156     { 60, 0xfff80000ul, 15},  // '<' 11111111|1111100
157     { 61, 0x80000000ul,  6},  // '=' 100000
158     { 62, 0xffb00000ul, 12},  // '>' 11111111|1011
159     { 63, 0xff000000ul, 10},  // '?' 11111111|00
160     { 64, 0xffd00000ul, 13},  // '@' 11111111|11010
161     { 65, 0x84000000ul,  6},  // 'A' 100001
162     { 66, 0xba000000ul,  7},  // 'B' 1011101
163     { 67, 0xbc000000ul,  7},  // 'C' 1011110
164     { 68, 0xbe000000ul,  7},  // 'D' 1011111
165     { 69, 0xc0000000ul,  7},  // 'E' 1100000
166     { 70, 0xc2000000ul,  7},  // 'F' 1100001
167     { 71, 0xc4000000ul,  7},  // 'G' 1100010
168     { 72, 0xc6000000ul,  7},  // 'H' 1100011
169     { 73, 0xc8000000ul,  7},  // 'I' 1100100
170     { 74, 0xca000000ul,  7},  // 'J' 1100101
171     { 75, 0xcc000000ul,  7},  // 'K' 1100110
172     { 76, 0xce000000ul,  7},  // 'L' 1100111
173     { 77, 0xd0000000ul,  7},  // 'M' 1101000
174     { 78, 0xd2000000ul,  7},  // 'N' 1101001
175     { 79, 0xd4000000ul,  7},  // 'O' 1101010
176     { 80, 0xd6000000ul,  7},  // 'P' 1101011
177     { 81, 0xd8000000ul,  7},  // 'Q' 1101100
178     { 82, 0xda000000ul,  7},  // 'R' 1101101
179     { 83, 0xdc000000ul,  7},  // 'S' 1101110
180     { 84, 0xde000000ul,  7},  // 'T' 1101111
181     { 85, 0xe0000000ul,  7},  // 'U' 1110000
182     { 86, 0xe2000000ul,  7},  // 'V' 1110001
183     { 87, 0xe4000000ul,  7},  // 'W' 1110010
184     { 88, 0xfc000000ul,  8},  // 'X' 11111100
185     { 89, 0xe6000000ul,  7},  // 'Y' 1110011
186     { 90, 0xfd000000ul,  8},  // 'Z' 11111101
187     { 91, 0xffd80000ul, 13},  // '[' 11111111|11011
188     { 92, 0xfffe0000ul, 19},  // '\' 11111111|11111110|000
189     { 93, 0xffe00000ul, 13},  // ']' 11111111|11100
190     { 94, 0xfff00000ul, 14},  // '^' 11111111|111100
191     { 95, 0x88000000ul,  6},  // '_' 100010
192     { 96, 0xfffa0000ul, 15},  // '`' 11111111|1111101
193     { 97, 0x18000000ul,  5},  // 'a' 00011
194     { 98, 0x8c000000ul,  6},  // 'b' 100011
195     { 99, 0x20000000ul,  5},  // 'c' 00100
196     {100, 0x90000000ul,  6},  // 'd' 100100
197     {101, 0x28000000ul,  5},  // 'e' 00101
198     {102, 0x94000000ul,  6},  // 'f' 100101
199     {103, 0x98000000ul,  6},  // 'g' 100110
200     {104, 0x9c000000ul,  6},  // 'h' 100111
201     {105, 0x30000000ul,  5},  // 'i' 00110
202     {106, 0xe8000000ul,  7},  // 'j' 1110100
203     {107, 0xea000000ul,  7},  // 'k' 1110101
204     {108, 0xa0000000ul,  6},  // 'l' 101000
205     {109, 0xa4000000ul,  6},  // 'm' 101001
206     {110, 0xa8000000ul,  6},  // 'n' 101010
207     {111, 0x38000000ul,  5},  // 'o' 00111
208     {112, 0xac000000ul,  6},  // 'p' 101011
209     {113, 0xec000000ul,  7},  // 'q' 1110110
210     {114, 0xb0000000ul,  6},  // 'r' 101100
211     {115, 0x40000000ul,  5},  // 's' 01000
212     {116, 0x48000000ul,  5},  // 't' 01001
213     {117, 0xb4000000ul,  6},  // 'u' 101101
214     {118, 0xee000000ul,  7},  // 'v' 1110111
215     {119, 0xf0000000ul,  7},  // 'w' 1111000
216     {120, 0xf2000000ul,  7},  // 'x' 1111001
217     {121, 0xf4000000ul,  7},  // 'y' 1111010
218     {122, 0xf6000000ul,  7},  // 'z' 1111011
219     {123, 0xfffc0000ul, 15},  // '{' 11111111|1111110
220     {124, 0xff800000ul, 11},  // '|' 11111111|100
221     {125, 0xfff40000ul, 14},  // '}' 11111111|111101
222     {126, 0xffe80000ul, 13},  // '~' 11111111|11101
223     {127, 0xffffffc0ul, 28},  //     11111111|11111111|11111111|1100
224     {128, 0xfffe6000ul, 20},  //     11111111|11111110|0110
225     {129, 0xffff4800ul, 22},  //     11111111|11111111|010010
226     {130, 0xfffe7000ul, 20},  //     11111111|11111110|0111
227     {131, 0xfffe8000ul, 20},  //     11111111|11111110|1000
228     {132, 0xffff4c00ul, 22},  //     11111111|11111111|010011
229     {133, 0xffff5000ul, 22},  //     11111111|11111111|010100
230     {134, 0xffff5400ul, 22},  //     11111111|11111111|010101
231     {135, 0xffffb200ul, 23},  //     11111111|11111111|1011001
232     {136, 0xffff5800ul, 22},  //     11111111|11111111|010110
233     {137, 0xffffb400ul, 23},  //     11111111|11111111|1011010
234     {138, 0xffffb600ul, 23},  //     11111111|11111111|1011011
235     {139, 0xffffb800ul, 23},  //     11111111|11111111|1011100
236     {140, 0xffffba00ul, 23},  //     11111111|11111111|1011101
237     {141, 0xffffbc00ul, 23},  //     11111111|11111111|1011110
238     {142, 0xffffeb00ul, 24},  //     11111111|11111111|11101011
239     {143, 0xffffbe00ul, 23},  //     11111111|11111111|1011111
240     {144, 0xffffec00ul, 24},  //     11111111|11111111|11101100
241     {145, 0xffffed00ul, 24},  //     11111111|11111111|11101101
242     {146, 0xffff5c00ul, 22},  //     11111111|11111111|010111
243     {147, 0xffffc000ul, 23},  //     11111111|11111111|1100000
244     {148, 0xffffee00ul, 24},  //     11111111|11111111|11101110
245     {149, 0xffffc200ul, 23},  //     11111111|11111111|1100001
246     {150, 0xffffc400ul, 23},  //     11111111|11111111|1100010
247     {151, 0xffffc600ul, 23},  //     11111111|11111111|1100011
248     {152, 0xffffc800ul, 23},  //     11111111|11111111|1100100
249     {153, 0xfffee000ul, 21},  //     11111111|11111110|11100
250     {154, 0xffff6000ul, 22},  //     11111111|11111111|011000
251     {155, 0xffffca00ul, 23},  //     11111111|11111111|1100101
252     {156, 0xffff6400ul, 22},  //     11111111|11111111|011001
253     {157, 0xffffcc00ul, 23},  //     11111111|11111111|1100110
254     {158, 0xffffce00ul, 23},  //     11111111|11111111|1100111
255     {159, 0xffffef00ul, 24},  //     11111111|11111111|11101111
256     {160, 0xffff6800ul, 22},  //     11111111|11111111|011010
257     {161, 0xfffee800ul, 21},  //     11111111|11111110|11101
258     {162, 0xfffe9000ul, 20},  //     11111111|11111110|1001
259     {163, 0xffff6c00ul, 22},  //     11111111|11111111|011011
260     {164, 0xffff7000ul, 22},  //     11111111|11111111|011100
261     {165, 0xffffd000ul, 23},  //     11111111|11111111|1101000
262     {166, 0xffffd200ul, 23},  //     11111111|11111111|1101001
263     {167, 0xfffef000ul, 21},  //     11111111|11111110|11110
264     {168, 0xffffd400ul, 23},  //     11111111|11111111|1101010
265     {169, 0xffff7400ul, 22},  //     11111111|11111111|011101
266     {170, 0xffff7800ul, 22},  //     11111111|11111111|011110
267     {171, 0xfffff000ul, 24},  //     11111111|11111111|11110000
268     {172, 0xfffef800ul, 21},  //     11111111|11111110|11111
269     {173, 0xffff7c00ul, 22},  //     11111111|11111111|011111
270     {174, 0xffffd600ul, 23},  //     11111111|11111111|1101011
271     {175, 0xffffd800ul, 23},  //     11111111|11111111|1101100
272     {176, 0xffff0000ul, 21},  //     11111111|11111111|00000
273     {177, 0xffff0800ul, 21},  //     11111111|11111111|00001
274     {178, 0xffff8000ul, 22},  //     11111111|11111111|100000
275     {179, 0xffff1000ul, 21},  //     11111111|11111111|00010
276     {180, 0xffffda00ul, 23},  //     11111111|11111111|1101101
277     {181, 0xffff8400ul, 22},  //     11111111|11111111|100001
278     {182, 0xffffdc00ul, 23},  //     11111111|11111111|1101110
279     {183, 0xffffde00ul, 23},  //     11111111|11111111|1101111
280     {184, 0xfffea000ul, 20},  //     11111111|11111110|1010
281     {185, 0xffff8800ul, 22},  //     11111111|11111111|100010
282     {186, 0xffff8c00ul, 22},  //     11111111|11111111|100011
283     {187, 0xffff9000ul, 22},  //     11111111|11111111|100100
284     {188, 0xffffe000ul, 23},  //     11111111|11111111|1110000
285     {189, 0xffff9400ul, 22},  //     11111111|11111111|100101
286     {190, 0xffff9800ul, 22},  //     11111111|11111111|100110
287     {191, 0xffffe200ul, 23},  //     11111111|11111111|1110001
288     {192, 0xfffff800ul, 26},  //     11111111|11111111|11111000|00
289     {193, 0xfffff840ul, 26},  //     11111111|11111111|11111000|01
290     {194, 0xfffeb000ul, 20},  //     11111111|11111110|1011
291     {195, 0xfffe2000ul, 19},  //     11111111|11111110|001
292     {196, 0xffff9c00ul, 22},  //     11111111|11111111|100111
293     {197, 0xffffe400ul, 23},  //     11111111|11111111|1110010
294     {198, 0xffffa000ul, 22},  //     11111111|11111111|101000
295     {199, 0xfffff600ul, 25},  //     11111111|11111111|11110110|0
296     {200, 0xfffff880ul, 26},  //     11111111|11111111|11111000|10
297     {201, 0xfffff8c0ul, 26},  //     11111111|11111111|11111000|11
298     {202, 0xfffff900ul, 26},  //     11111111|11111111|11111001|00
299     {203, 0xfffffbc0ul, 27},  //     11111111|11111111|11111011|110
300     {204, 0xfffffbe0ul, 27},  //     11111111|11111111|11111011|111
301     {205, 0xfffff940ul, 26},  //     11111111|11111111|11111001|01
302     {206, 0xfffff100ul, 24},  //     11111111|11111111|11110001
303     {207, 0xfffff680ul, 25},  //     11111111|11111111|11110110|1
304     {208, 0xfffe4000ul, 19},  //     11111111|11111110|010
305     {209, 0xffff1800ul, 21},  //     11111111|11111111|00011
306     {210, 0xfffff980ul, 26},  //     11111111|11111111|11111001|10
307     {211, 0xfffffc00ul, 27},  //     11111111|11111111|11111100|000
308     {212, 0xfffffc20ul, 27},  //     11111111|11111111|11111100|001
309     {213, 0xfffff9c0ul, 26},  //     11111111|11111111|11111001|11
310     {214, 0xfffffc40ul, 27},  //     11111111|11111111|11111100|010
311     {215, 0xfffff200ul, 24},  //     11111111|11111111|11110010
312     {216, 0xffff2000ul, 21},  //     11111111|11111111|00100
313     {217, 0xffff2800ul, 21},  //     11111111|11111111|00101
314     {218, 0xfffffa00ul, 26},  //     11111111|11111111|11111010|00
315     {219, 0xfffffa40ul, 26},  //     11111111|11111111|11111010|01
316     {220, 0xffffffd0ul, 28},  //     11111111|11111111|11111111|1101
317     {221, 0xfffffc60ul, 27},  //     11111111|11111111|11111100|011
318     {222, 0xfffffc80ul, 27},  //     11111111|11111111|11111100|100
319     {223, 0xfffffca0ul, 27},  //     11111111|11111111|11111100|101
320     {224, 0xfffec000ul, 20},  //     11111111|11111110|1100
321     {225, 0xfffff300ul, 24},  //     11111111|11111111|11110011
322     {226, 0xfffed000ul, 20},  //     11111111|11111110|1101
323     {227, 0xffff3000ul, 21},  //     11111111|11111111|00110
324     {228, 0xffffa400ul, 22},  //     11111111|11111111|101001
325     {229, 0xffff3800ul, 21},  //     11111111|11111111|00111
326     {230, 0xffff4000ul, 21},  //     11111111|11111111|01000
327     {231, 0xffffe600ul, 23},  //     11111111|11111111|1110011
328     {232, 0xffffa800ul, 22},  //     11111111|11111111|101010
329     {233, 0xffffac00ul, 22},  //     11111111|11111111|101011
330     {234, 0xfffff700ul, 25},  //     11111111|11111111|11110111|0
331     {235, 0xfffff780ul, 25},  //     11111111|11111111|11110111|1
332     {236, 0xfffff400ul, 24},  //     11111111|11111111|11110100
333     {237, 0xfffff500ul, 24},  //     11111111|11111111|11110101
334     {238, 0xfffffa80ul, 26},  //     11111111|11111111|11111010|10
335     {239, 0xffffe800ul, 23},  //     11111111|11111111|1110100
336     {240, 0xfffffac0ul, 26},  //     11111111|11111111|11111010|11
337     {241, 0xfffffcc0ul, 27},  //     11111111|11111111|11111100|110
338     {242, 0xfffffb00ul, 26},  //     11111111|11111111|11111011|00
339     {243, 0xfffffb40ul, 26},  //     11111111|11111111|11111011|01
340     {244, 0xfffffce0ul, 27},  //     11111111|11111111|11111100|111
341     {245, 0xfffffd00ul, 27},  //     11111111|11111111|11111101|000
342     {246, 0xfffffd20ul, 27},  //     11111111|11111111|11111101|001
343     {247, 0xfffffd40ul, 27},  //     11111111|11111111|11111101|010
344     {248, 0xfffffd60ul, 27},  //     11111111|11111111|11111101|011
345     {249, 0xffffffe0ul, 28},  //     11111111|11111111|11111111|1110
346     {250, 0xfffffd80ul, 27},  //     11111111|11111111|11111101|100
347     {251, 0xfffffda0ul, 27},  //     11111111|11111111|11111101|101
348     {252, 0xfffffdc0ul, 27},  //     11111111|11111111|11111101|110
349     {253, 0xfffffde0ul, 27},  //     11111111|11111111|11111101|111
350     {254, 0xfffffe00ul, 27},  //     11111111|11111111|11111110|000
351     {255, 0xfffffb80ul, 26},  //     11111111|11111111|11111011|10
352     {256, 0xfffffffcul, 30}   // EOS 11111111|11111111|11111111|111111
353 };
354 
write_huffman_code(BitOStream & outputStream,const CodeEntry & code)355 void write_huffman_code(BitOStream &outputStream, const CodeEntry &code)
356 {
357     // Append octet by octet.
358     auto bitLength = code.bitLength;
359     const auto hc = code.huffmanCode >> (32 - bitLength);
360 
361     if (bitLength > 24) {
362         outputStream.writeBits(uchar(hc >> 24), bitLength - 24);
363         bitLength = 24;
364     }
365 
366     if (bitLength > 16) {
367         outputStream.writeBits(uchar(hc >> 16), bitLength - 16);
368         bitLength = 16;
369     }
370 
371     if (bitLength > 8) {
372         outputStream.writeBits(uchar(hc >> 8), bitLength - 8);
373         bitLength = 8;
374     }
375 
376     outputStream.writeBits(uchar(hc), bitLength);
377 }
378 
379 }
380 
381 // That's from HPACK's specs - we deal with octets.
382 static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected");
383 
huffman_encoded_bit_length(const QByteArray & inputData)384 quint64 huffman_encoded_bit_length(const QByteArray &inputData)
385 {
386     quint64 bitLength = 0;
387     for (int i = 0, e = inputData.size(); i < e; ++i)
388         bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength;
389 
390     return bitLength;
391 }
392 
huffman_encode_string(const QByteArray & inputData,BitOStream & outputStream)393 void huffman_encode_string(const QByteArray &inputData, BitOStream &outputStream)
394 {
395     for (int i = 0, e = inputData.size(); i < e; ++i) {
396         const auto value = uchar(inputData[i]);
397         write_huffman_code(outputStream, staticHuffmanCodeTable[value]);
398     }
399 
400     // Pad bits ...
401     if (outputStream.bitLength() % 8)
402         outputStream.writeBits(0xff, 8 - outputStream.bitLength() % 8);
403 }
404 
padding_is_valid(quint32 chunk,quint32 nBits)405 bool padding_is_valid(quint32 chunk, quint32 nBits)
406 {
407     Q_ASSERT(nBits);
408 
409     // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be
410     // treated as a decoding error."
411     if (nBits > 7)
412         return false;
413     // HPACK, 5.2:
414     // "A padding not corresponding to the most significant bits
415     // of the code for the EOS symbol MUST be treated as a decoding error."
416     return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1);
417 }
418 
HuffmanDecoder()419 HuffmanDecoder::HuffmanDecoder()
420     : minCodeLength()
421 {
422     const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0];
423 
424     std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes);
425     // Now we sort: by bit length first (in the descending order) and by the symbol
426     // value (descending). Descending order: to make sure we do not create prefix tables with
427     // short 'indexLength' first and having longer codes that do not fit into such tables later.
428     std::sort(symbols.begin(), symbols.end(), [](const CodeEntry &code1, const CodeEntry &code2) {
429         if (code1.bitLength == code2.bitLength)
430             return code1.byteValue > code2.byteValue;
431         return code1.bitLength > code2.bitLength;
432     });
433 
434     minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5.
435 
436     // TODO: add a verification - Huffman codes
437     // within a given bit length range also
438     // should be in descending order.
439 
440     // Initialize 'prefixTables' and 'tableData'.
441     addTable(0, quint32(BitConstants::rootPrefix)); // 'root' table.
442 
443     for (const auto &s : symbols) {
444         quint32 tableIndex = 0;
445         while (true) {
446             Q_ASSERT(tableIndex < prefixTables.size());
447             // Note, by value - since prefixTables will be updated in between.
448             const auto table = prefixTables[tableIndex];
449             // We skip prefixed bits (if any) and use indexed bits only:
450             const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength);
451             // Again, by value.
452             PrefixTableEntry entry = tableEntry(table, entryIndex);
453             // How many bits were coded by previous tables and this table:
454             const auto codedLength = table.prefixLength + table.indexLength;
455             if (codedLength < s.bitLength) {
456                 // We have to add a new prefix table ... (if it's not done yet).
457                 if (!entry.bitLength) {
458                     entry.nextTable = addTable(codedLength, std::min<quint32>(quint32(BitConstants::childPrefix),
459                                                                               s.bitLength - codedLength));
460                     entry.bitLength = s.bitLength;
461                     entry.byteValue = s.byteValue;
462                     setTableEntry(table, entryIndex, entry);
463                 }
464                 tableIndex = entry.nextTable;
465             } else {
466                 // We found the slot for our code (terminal entry):
467                 entry.byteValue = s.byteValue;
468                 entry.bitLength = s.bitLength;
469                 // Refer to our own table as 'nextTable':
470                 entry.nextTable = tableIndex;
471                 setTableEntry(table, entryIndex, entry);
472                 break;
473             }
474         }
475     }
476 
477     // Now, we have a table(s) and have to fill 'holes' to
478     // 'fix' terminal entries.
479     for (const auto &table : prefixTables) {
480         const quint32 codedLength = table.prefixLength + table.indexLength;
481         for (quint32 j = 0; j < table.size();) {
482             const PrefixTableEntry &entry = tableEntry(table, j);
483             if (entry.bitLength && entry.bitLength < codedLength) {
484                 const quint32 range = 1 << (codedLength - entry.bitLength);
485                 for (quint32 k = 1; k < range; ++k)
486                     setTableEntry(table, j + k, entry);
487                 j += range;
488             } else {
489                 ++j;
490             }
491         }
492     }
493 }
494 
decodeStream(BitIStream & inputStream,QByteArray & outputBuffer)495 bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer)
496 {
497     while (true) {
498         quint32 chunk = 0;
499         const quint32 readBits = inputStream.peekBits(inputStream.streamOffset(), 32, &chunk);
500         if (!readBits)
501             return !inputStream.hasMoreBits();
502 
503         if (readBits < minCodeLength) {
504             inputStream.skipBits(readBits);
505             return padding_is_valid(chunk, readBits);
506         }
507 
508         quint32 tableIndex = 0;
509         const PrefixTable *table = &prefixTables[tableIndex];
510         quint32 entryIndex = chunk >> (32 - table->indexLength);
511         PrefixTableEntry entry = tableEntry(*table, entryIndex);
512 
513         while (true) {
514             if (entry.nextTable == tableIndex)
515                 break;
516 
517             tableIndex = entry.nextTable;
518             table = &prefixTables[tableIndex];
519             entryIndex = chunk << table->prefixLength >> (32 - table->indexLength);
520             entry = tableEntry(*table, entryIndex);
521         }
522 
523         if (entry.bitLength > readBits) {
524             inputStream.skipBits(readBits);
525             return padding_is_valid(chunk, readBits);
526         }
527 
528         if (!entry.bitLength || entry.byteValue == 256) {
529             //EOS (256) == compression error (HPACK).
530             inputStream.skipBits(readBits);
531             return false;
532         }
533 
534         outputBuffer.append(entry.byteValue);
535         inputStream.skipBits(entry.bitLength);
536     }
537 
538     return false;
539 }
540 
addTable(quint32 prefix,quint32 index)541 quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index)
542 {
543     PrefixTable newTable{prefix, index};
544     newTable.offset = quint32(tableData.size());
545     prefixTables.push_back(newTable);
546     // Add entries for this table:
547     tableData.resize(tableData.size() + newTable.size());
548 
549     return quint32(prefixTables.size() - 1);
550 }
551 
tableEntry(const PrefixTable & table,quint32 index)552 PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index)
553 {
554     Q_ASSERT(index < table.size());
555     return tableData[table.offset + index];
556 }
557 
setTableEntry(const PrefixTable & table,quint32 index,const PrefixTableEntry & entry)558 void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index,
559                                    const PrefixTableEntry &entry)
560 {
561     Q_ASSERT(index < table.size());
562     tableData[table.offset + index] = entry;
563 }
564 
huffman_decode_string(BitIStream & inputStream,QByteArray * outputBuffer)565 bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer)
566 {
567     Q_ASSERT(outputBuffer);
568 
569     static HuffmanDecoder decoder;
570     return decoder.decodeStream(inputStream, *outputBuffer);
571 }
572 
573 }
574 
575 QT_END_NAMESPACE
576