1 /* Copyright 2015 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Brotli state for partial streaming decoding. */
8 
9 #ifndef BROTLI_DEC_STATE_H_
10 #define BROTLI_DEC_STATE_H_
11 
12 #include "../common/constants.h"
13 #include "../common/dictionary.h"
14 #include "../common/platform.h"
15 #include "../common/transform.h"
16 #include <brotli/types.h>
17 #include "./bit_reader.h"
18 #include "./huffman.h"
19 
20 #if defined(__cplusplus) || defined(c_plusplus)
21 extern "C" {
22 #endif
23 
24 /* Graphviz diagram that describes state transitions:
25 
26 digraph States {
27   graph [compound=true]
28   concentrate=true
29   node [shape="box"]
30 
31   UNINITED -> {LARGE_WINDOW_BITS -> INITIALIZE}
32   subgraph cluster_metablock_workflow {
33     style="rounded"
34     label=< <B>METABLOCK CYCLE</B> >
35     METABLOCK_BEGIN -> METABLOCK_HEADER
36     METABLOCK_HEADER:sw -> METADATA
37     METABLOCK_HEADER:s -> UNCOMPRESSED
38     METABLOCK_HEADER:se -> METABLOCK_DONE:ne
39     METADATA:s -> METABLOCK_DONE:w
40     UNCOMPRESSED:s -> METABLOCK_DONE:n
41     METABLOCK_DONE:e -> METABLOCK_BEGIN:e [constraint="false"]
42   }
43   INITIALIZE -> METABLOCK_BEGIN
44   METABLOCK_DONE -> DONE
45 
46   subgraph cluster_compressed_metablock {
47     style="rounded"
48     label=< <B>COMPRESSED METABLOCK</B> >
49 
50     subgraph cluster_command {
51       style="rounded"
52       label=< <B>HOT LOOP</B> >
53 
54       _METABLOCK_DONE_PORT_ [shape=point style=invis]
55 
56       {
57         // Set different shape for nodes returning from "compressed metablock".
58         node [shape=invhouse]; CMD_INNER CMD_POST_DECODE_LITERALS;
59         CMD_POST_WRAP_COPY; CMD_INNER_WRITE; CMD_POST_WRITE_1;
60       }
61 
62       CMD_BEGIN -> CMD_INNER -> CMD_POST_DECODE_LITERALS -> CMD_POST_WRAP_COPY
63 
64       // IO ("write") nodes are not in the hot loop!
65       CMD_INNER_WRITE [style=dashed]
66       CMD_INNER -> CMD_INNER_WRITE
67       CMD_POST_WRITE_1 [style=dashed]
68       CMD_POST_DECODE_LITERALS -> CMD_POST_WRITE_1
69       CMD_POST_WRITE_2 [style=dashed]
70       CMD_POST_WRAP_COPY -> CMD_POST_WRITE_2
71 
72       CMD_POST_WRITE_1 -> CMD_BEGIN:s [constraint="false"]
73       CMD_INNER_WRITE -> {CMD_INNER CMD_POST_DECODE_LITERALS}
74           [constraint="false"]
75       CMD_BEGIN:ne -> CMD_POST_DECODE_LITERALS [constraint="false"]
76       CMD_POST_WRAP_COPY -> CMD_BEGIN [constraint="false"]
77       CMD_POST_DECODE_LITERALS -> CMD_BEGIN:ne [constraint="false"]
78       CMD_POST_WRITE_2 -> CMD_POST_WRAP_COPY [constraint="false"]
79       {rank=same; CMD_BEGIN; CMD_INNER; CMD_POST_DECODE_LITERALS;
80           CMD_POST_WRAP_COPY}
81       {rank=same; CMD_INNER_WRITE; CMD_POST_WRITE_1; CMD_POST_WRITE_2}
82 
83       {CMD_INNER CMD_POST_DECODE_LITERALS CMD_POST_WRAP_COPY} ->
84           _METABLOCK_DONE_PORT_ [style=invis]
85       {CMD_INNER_WRITE CMD_POST_WRITE_1} -> _METABLOCK_DONE_PORT_
86           [constraint="false" style=invis]
87     }
88 
89     BEFORE_COMPRESSED_METABLOCK_HEADER:s -> HUFFMAN_CODE_0:n
90     HUFFMAN_CODE_0 -> HUFFMAN_CODE_1 -> HUFFMAN_CODE_2 -> HUFFMAN_CODE_3
91     HUFFMAN_CODE_0 -> METABLOCK_HEADER_2 -> CONTEXT_MODES -> CONTEXT_MAP_1
92     CONTEXT_MAP_1 -> CONTEXT_MAP_2 -> TREE_GROUP
93     TREE_GROUP -> BEFORE_COMPRESSED_METABLOCK_BODY:e
94     BEFORE_COMPRESSED_METABLOCK_BODY:s -> CMD_BEGIN:n
95 
96     HUFFMAN_CODE_3:e -> HUFFMAN_CODE_0:ne [constraint="false"]
97     {rank=same; HUFFMAN_CODE_0; HUFFMAN_CODE_1; HUFFMAN_CODE_2; HUFFMAN_CODE_3}
98     {rank=same; METABLOCK_HEADER_2; CONTEXT_MODES; CONTEXT_MAP_1; CONTEXT_MAP_2;
99         TREE_GROUP}
100   }
101   METABLOCK_HEADER:e -> BEFORE_COMPRESSED_METABLOCK_HEADER:n
102 
103   _METABLOCK_DONE_PORT_ -> METABLOCK_DONE:se
104       [constraint="false" ltail=cluster_command]
105 
106   UNINITED [shape=Mdiamond];
107   DONE [shape=Msquare];
108 }
109 
110 
111  */
112 
113 typedef enum {
114   BROTLI_STATE_UNINITED,
115   BROTLI_STATE_LARGE_WINDOW_BITS,
116   BROTLI_STATE_INITIALIZE,
117   BROTLI_STATE_METABLOCK_BEGIN,
118   BROTLI_STATE_METABLOCK_HEADER,
119   BROTLI_STATE_METABLOCK_HEADER_2,
120   BROTLI_STATE_CONTEXT_MODES,
121   BROTLI_STATE_COMMAND_BEGIN,
122   BROTLI_STATE_COMMAND_INNER,
123   BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
124   BROTLI_STATE_COMMAND_POST_WRAP_COPY,
125   BROTLI_STATE_UNCOMPRESSED,
126   BROTLI_STATE_METADATA,
127   BROTLI_STATE_COMMAND_INNER_WRITE,
128   BROTLI_STATE_METABLOCK_DONE,
129   BROTLI_STATE_COMMAND_POST_WRITE_1,
130   BROTLI_STATE_COMMAND_POST_WRITE_2,
131   BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER,
132   BROTLI_STATE_HUFFMAN_CODE_0,
133   BROTLI_STATE_HUFFMAN_CODE_1,
134   BROTLI_STATE_HUFFMAN_CODE_2,
135   BROTLI_STATE_HUFFMAN_CODE_3,
136   BROTLI_STATE_CONTEXT_MAP_1,
137   BROTLI_STATE_CONTEXT_MAP_2,
138   BROTLI_STATE_TREE_GROUP,
139   BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY,
140   BROTLI_STATE_DONE
141 } BrotliRunningState;
142 
143 typedef enum {
144   BROTLI_STATE_METABLOCK_HEADER_NONE,
145   BROTLI_STATE_METABLOCK_HEADER_EMPTY,
146   BROTLI_STATE_METABLOCK_HEADER_NIBBLES,
147   BROTLI_STATE_METABLOCK_HEADER_SIZE,
148   BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
149   BROTLI_STATE_METABLOCK_HEADER_RESERVED,
150   BROTLI_STATE_METABLOCK_HEADER_BYTES,
151   BROTLI_STATE_METABLOCK_HEADER_METADATA
152 } BrotliRunningMetablockHeaderState;
153 
154 typedef enum {
155   BROTLI_STATE_UNCOMPRESSED_NONE,
156   BROTLI_STATE_UNCOMPRESSED_WRITE
157 } BrotliRunningUncompressedState;
158 
159 typedef enum {
160   BROTLI_STATE_TREE_GROUP_NONE,
161   BROTLI_STATE_TREE_GROUP_LOOP
162 } BrotliRunningTreeGroupState;
163 
164 typedef enum {
165   BROTLI_STATE_CONTEXT_MAP_NONE,
166   BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
167   BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
168   BROTLI_STATE_CONTEXT_MAP_DECODE,
169   BROTLI_STATE_CONTEXT_MAP_TRANSFORM
170 } BrotliRunningContextMapState;
171 
172 typedef enum {
173   BROTLI_STATE_HUFFMAN_NONE,
174   BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
175   BROTLI_STATE_HUFFMAN_SIMPLE_READ,
176   BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
177   BROTLI_STATE_HUFFMAN_COMPLEX,
178   BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
179 } BrotliRunningHuffmanState;
180 
181 typedef enum {
182   BROTLI_STATE_DECODE_UINT8_NONE,
183   BROTLI_STATE_DECODE_UINT8_SHORT,
184   BROTLI_STATE_DECODE_UINT8_LONG
185 } BrotliRunningDecodeUint8State;
186 
187 typedef enum {
188   BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
189   BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
190 } BrotliRunningReadBlockLengthState;
191 
192 typedef struct BrotliMetablockHeaderArena {
193   BrotliRunningTreeGroupState substate_tree_group;
194   BrotliRunningContextMapState substate_context_map;
195   BrotliRunningHuffmanState substate_huffman;
196 
197   uint32_t sub_loop_counter;
198 
199   uint32_t repeat_code_len;
200   uint32_t prev_code_len;
201 
202   /* For ReadHuffmanCode. */
203   uint32_t symbol;
204   uint32_t repeat;
205   uint32_t space;
206 
207   /* Huffman table for "histograms". */
208   HuffmanCode table[32];
209   /* List of heads of symbol chains. */
210   uint16_t* symbol_lists;
211   /* Storage from symbol_lists. */
212   uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
213                                BROTLI_NUM_COMMAND_SYMBOLS];
214   /* Tails of symbol chains. */
215   int next_symbol[32];
216   uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
217   /* Population counts for the code lengths. */
218   uint16_t code_length_histo[16];
219 
220   /* For HuffmanTreeGroupDecode. */
221   int htree_index;
222   HuffmanCode* next;
223 
224   /* For DecodeContextMap. */
225   uint32_t context_index;
226   uint32_t max_run_length_prefix;
227   uint32_t code;
228   HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
229 } BrotliMetablockHeaderArena;
230 
231 typedef struct BrotliMetablockBodyArena {
232   uint8_t dist_extra_bits[544];
233   uint32_t dist_offset[544];
234 } BrotliMetablockBodyArena;
235 
236 struct BrotliDecoderStateStruct {
237   BrotliRunningState state;
238 
239   /* This counter is reused for several disjoint loops. */
240   int loop_counter;
241 
242   BrotliBitReader br;
243 
244   brotli_alloc_func alloc_func;
245   brotli_free_func free_func;
246   void* memory_manager_opaque;
247 
248   /* Temporary storage for remaining input. Brotli stream format is designed in
249      a way, that 64 bits are enough to make progress in decoding. */
250   union {
251     uint64_t u64;
252     uint8_t u8[8];
253   } buffer;
254   uint32_t buffer_length;
255 
256   int pos;
257   int max_backward_distance;
258   int max_distance;
259   int ringbuffer_size;
260   int ringbuffer_mask;
261   int dist_rb_idx;
262   int dist_rb[4];
263   int error_code;
264   uint8_t* ringbuffer;
265   uint8_t* ringbuffer_end;
266   HuffmanCode* htree_command;
267   const uint8_t* context_lookup;
268   uint8_t* context_map_slice;
269   uint8_t* dist_context_map_slice;
270 
271   /* This ring buffer holds a few past copy distances that will be used by
272      some special distance codes. */
273   HuffmanTreeGroup literal_hgroup;
274   HuffmanTreeGroup insert_copy_hgroup;
275   HuffmanTreeGroup distance_hgroup;
276   HuffmanCode* block_type_trees;
277   HuffmanCode* block_len_trees;
278   /* This is true if the literal context map histogram type always matches the
279      block type. It is then not needed to keep the context (faster decoding). */
280   int trivial_literal_context;
281   /* Distance context is actual after command is decoded and before distance is
282      computed. After distance computation it is used as a temporary variable. */
283   int distance_context;
284   int meta_block_remaining_len;
285   uint32_t block_length_index;
286   uint32_t block_length[3];
287   uint32_t num_block_types[3];
288   uint32_t block_type_rb[6];
289   uint32_t distance_postfix_bits;
290   uint32_t num_direct_distance_codes;
291   uint32_t num_dist_htrees;
292   uint8_t* dist_context_map;
293   HuffmanCode* literal_htree;
294   uint8_t dist_htree_index;
295 
296   int copy_length;
297   int distance_code;
298 
299   /* For partial write operations. */
300   size_t rb_roundtrips;  /* how many times we went around the ring-buffer */
301   size_t partial_pos_out;  /* how much output to the user in total */
302 
303   /* For InverseMoveToFrontTransform. */
304   uint32_t mtf_upper_bound;
305   uint32_t mtf[64 + 1];
306 
307   /* Less used attributes are at the end of this struct. */
308 
309   /* States inside function calls. */
310   BrotliRunningMetablockHeaderState substate_metablock_header;
311   BrotliRunningUncompressedState substate_uncompressed;
312   BrotliRunningDecodeUint8State substate_decode_uint8;
313   BrotliRunningReadBlockLengthState substate_read_block_length;
314 
315   unsigned int is_last_metablock : 1;
316   unsigned int is_uncompressed : 1;
317   unsigned int is_metadata : 1;
318   unsigned int should_wrap_ringbuffer : 1;
319   unsigned int canny_ringbuffer_allocation : 1;
320   unsigned int large_window : 1;
321   unsigned int size_nibbles : 8;
322   uint32_t window_bits;
323 
324   int new_ringbuffer_size;
325 
326   uint32_t num_literal_htrees;
327   uint8_t* context_map;
328   uint8_t* context_modes;
329 
330   const BrotliDictionary* dictionary;
331   const BrotliTransforms* transforms;
332 
333   uint32_t trivial_literal_contexts[8];  /* 256 bits */
334 
335   union {
336     BrotliMetablockHeaderArena header;
337     BrotliMetablockBodyArena body;
338   } arena;
339 };
340 
341 typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
342 #define BrotliDecoderState BrotliDecoderStateInternal
343 
344 BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
345     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
346 BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
347 BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
348 BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
349     BrotliDecoderState* s);
350 BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
351     BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size_max,
352     uint32_t alphabet_size_limit, uint32_t ntrees);
353 
354 #define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
355 
356 #define BROTLI_DECODER_FREE(S, X) {          \
357   S->free_func(S->memory_manager_opaque, X); \
358   X = NULL;                                  \
359 }
360 
361 #if defined(__cplusplus) || defined(c_plusplus)
362 }  /* extern "C" */
363 #endif
364 
365 #endif  /* BROTLI_DEC_STATE_H_ */
366