1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package flate
6
7import (
8	"bytes"
9	"compress/flate"
10	"flag"
11	"io"
12	"io/ioutil"
13	"os/exec"
14	"strings"
15	"testing"
16
17	"github.com/dsnet/compress/internal/errors"
18	"github.com/dsnet/compress/internal/testutil"
19)
20
21var zcheck = flag.Bool("zcheck", false, "verify reader test vectors with C zlib library")
22
23// pyDecompress decompresses the input by using the Python wrapper library
24// over the C zlib library:
25//
26//	>>> hex_string = "010100feff11"
27//	>>> import zlib
28//	>>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE
29//	'\x11'
30//
31func pyDecompress(input []byte) ([]byte, error) {
32	var buf bytes.Buffer
33	cmd := exec.Command("python", "-c", "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read(), -15))")
34	cmd.Stdin = bytes.NewReader(input)
35	cmd.Stdout = &buf
36	err := cmd.Run()
37	return buf.Bytes(), err
38}
39
40func TestReader(t *testing.T) {
41	db := testutil.MustDecodeBitGen
42	dh := testutil.MustDecodeHex
43
44	errFuncs := map[string]func(error) bool{
45		"IsUnexpectedEOF": func(err error) bool { return err == io.ErrUnexpectedEOF },
46		"IsCorrupted":     errors.IsCorrupted,
47	}
48	vectors := []struct {
49		name   string // Sub-test name
50		input  []byte // Test input string
51		output []byte // Expected output string
52		inIdx  int64  // Expected input offset after reading
53		outIdx int64  // Expected output offset after reading
54		errf   string // Name of error checking callback
55	}{{
56		name: "EmptyString",
57		errf: "IsUnexpectedEOF",
58	}, {
59		name: "RawBlock",
60		input: db(`<<<
61			< 0 00 0*5          # Non-last, raw block, padding
62			< H16:000c H16:fff3 # RawSize: 12
63			"hello, world"      # Raw data
64
65			< 1 10    # Last, fixed block
66			> 0000000 # EOB marker
67		`),
68		output: []byte("hello, world"),
69		inIdx:  19,
70		outIdx: 12,
71		errf:   "IsUnexpectedEOF",
72	}, {
73		name: "RawBlockNonZeroPadding",
74		input: db(`<<<
75			< 1 00 10101        # Last, raw block, non-zero padding
76			< H16:0001 H16:fffe # RawSize: 1
77			X:11                # Raw data
78		`),
79		output: dh("11"),
80		inIdx:  6,
81		outIdx: 1,
82	}, {
83		name: "RawBlockShortest",
84		input: db(`<<<
85			< 1 00 0*5          # Last, raw block, padding
86			< H16:0000 H16:ffff # RawSize: 0
87		`),
88		inIdx: 5,
89	}, {
90		name: "RawBlockLongest",
91		input: db(`<<<
92			< 1 00 0*5          # Last, raw block, padding
93			< H16:ffff H16:0000 # RawSize: 65535
94			X:7a*65535
95		`),
96		output: db("<<< X:7a*65535"),
97		inIdx:  65540,
98		outIdx: 65535,
99	}, {
100		name: "RawBlockBadSize",
101		input: db(`<<<
102			< 1 00 0*5          # Last, raw block, padding
103			< H16:0001 H16:fffd # RawSize: 1
104			X:11                # Raw data
105		`),
106		inIdx: 5,
107		errf:  "IsCorrupted",
108	}, {
109		// Truncated after block header.
110		name: "RawBlockTruncated0",
111		input: db(`<<<
112			< 0 00 0*5 # Non-last, raw block, padding
113		`),
114		inIdx: 1,
115		errf:  "IsUnexpectedEOF",
116	}, {
117		// Truncated inside size field.
118		name: "RawBlockTruncated1",
119		input: db(`<<<
120			< 0 00 0*5 # Non-last, raw block, padding
121			< H8:0c    # RawSize: 12
122		`),
123		inIdx: 1,
124		errf:  "IsUnexpectedEOF",
125	}, {
126		// Truncated after size field.
127		name: "RawBlockTruncated2",
128		input: db(`<<<
129			< 0 00 0*5 # Non-last, raw block, padding
130			< H16:000c # RawSize: 12
131		`),
132		inIdx: 3,
133		errf:  "IsUnexpectedEOF",
134	}, {
135		// Truncated before raw data.
136		name: "RawBlockTruncated3",
137		input: db(`<<<
138			< 0 00 0*5          # Non-last, raw block, padding
139			< H16:000c H16:fff3 # RawSize: 12
140		`),
141		inIdx: 5,
142		errf:  "IsUnexpectedEOF",
143	}, {
144		// Truncated inside raw data.
145		name: "RawBlockTruncated4",
146		input: db(`<<<
147			< 0 00 0*5          # Non-last, raw block, padding
148			< H16:000c H16:fff3 # RawSize: 12
149			"hello"             # Raw data
150		`),
151		output: []byte("hello"),
152		inIdx:  10,
153		outIdx: 5,
154		errf:   "IsUnexpectedEOF",
155	}, {
156		// Truncated before next block.
157		name: "RawBlockTruncated5",
158		input: db(`<<<
159			< 0 00 0*5          # Non-last, raw block, padding
160			< H16:000c H16:fff3 # RawSize: 12
161			"hello, world"      # Raw data
162		`),
163		output: []byte("hello, world"),
164		inIdx:  17,
165		outIdx: 12,
166		errf:   "IsUnexpectedEOF",
167	}, {
168		// Truncated after fixed block header.
169		name: "FixedBlockTruncated0",
170		input: db(`<<<
171			< 0 01 # Non-last, fixed block
172		`),
173		inIdx:  1,
174		outIdx: 0,
175		errf:   "IsUnexpectedEOF",
176	}, {
177		// Truncated after mid-block and mid-symbol.
178		name: "FixedBlockTruncated1",
179		input: db(`<<<
180			< 0 01 # Non-last, fixed block
181			> 01111000 10010101 10011 # Truncate 100 from last symbol
182		`),
183		output: []byte("He"),
184		inIdx:  3,
185		outIdx: 2,
186		errf:   "IsUnexpectedEOF",
187	}, {
188		// Truncated after mid-block and post-symbol.
189		name: "FixedBlockTruncated2",
190		input: db(`<<<
191			< 0 01 # Non-last, fixed block
192			> 01111000 10010101 10011100 110010000*5
193		`),
194		output: []byte("Hel\x90\x90\x90\x90\x90"),
195		inIdx:  9,
196		outIdx: 8,
197		errf:   "IsUnexpectedEOF",
198	}, {
199		// Truncated after mid-block and post-EOB.
200		name: "FixedBlockTruncated3",
201		input: db(`<<<
202			< 0 01 # Non-last, fixed block
203			> 01111000 10010101 10011100 110010000*5
204			> 0000000 # EOB marker
205		`),
206		output: []byte("Hel\x90\x90\x90\x90\x90"),
207		inIdx:  10,
208		outIdx: 8,
209		errf:   "IsUnexpectedEOF",
210	}, {
211		name: "FixedBlockShortest",
212		input: db(`<<<
213			< 1 01    # Last, fixed block
214			> 0000000 # EOB marker
215		`),
216		inIdx: 2,
217	}, {
218		name: "FixedBlockHelloWorld",
219		input: db(`<<<
220			< 1 01    # Last, fixed block
221
222			> 01111000 10010101 10011100 10011100 10011111 01011100 01010000
223			  10100111 10011111 10100010 10011100 10010100 01010001
224			> 0000000 # EOB marker
225		`),
226		output: []byte("Hello, world!"),
227		inIdx:  15,
228		outIdx: 13,
229	}, {
230		// Make sure the use of a dynamic block, following a fixed block does
231		// not alter the global Decoder tables.
232		name: "FixedDynamicFixedDynamicBlocks",
233		input: db(`<<<
234			< 0 01               # Non-last, fixed block
235			> 00110000 0000000   # Compressed data
236
237			< 0 10               # Non-last, dynamic block
238			< D5:0 D5:3 D4:15    # HLit: 257, HDist: 4, HCLen: 19
239			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
240			> 0 1*256 0*4        # HLits: {*:8}, HDists: {}
241			> 00000000 11111111  # Compressed data
242
243			< 0 01               # Non-last, fixed block
244			> 00110000 0000000   # Compressed data
245
246			< 1 10               # Last, dynamic block
247			< D5:0 D5:3 D4:15    # HLit: 257, HDist: 4, HCLen: 19
248			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
249			> 0 1*256 0*4        # HLits: {*:8}, HDists: {}
250			> 00000000 11111111  # Compressed data
251		`),
252		output: dh("00010001"),
253		inIdx:  93,
254		outIdx: 4,
255	}, {
256		name: "ReservedBlock",
257		input: db(`<<<
258			< 1 11 0*5 # Last, reserved block, padding
259			X:deadcafe # ???
260		`),
261		inIdx: 1,
262		errf:  "IsCorrupted",
263	}, {
264		// Use reserved HLit symbol 287 in fixed block.
265		name: "ReservedHLitSymbol",
266		input: db(`<<<
267			< 1 01              # Last, fixed block
268			> 01100000 11000111 # Use invalid symbol 287
269		`),
270		output: dh("30"),
271		inIdx:  3,
272		outIdx: 1,
273		errf:   "IsCorrupted",
274	}, {
275		// Use reserved HDist symbol 30 in fixed block.
276		name: "ReservedHDistSymbol",
277		input: db(`<<<
278			< 1 01                   # Last, fixed block
279			> 00110000 0000001 D5:30 # Use invalid HDist symbol 30
280			> 0000000                # EOB marker
281		`),
282		output: dh("00"),
283		inIdx:  3,
284		outIdx: 1,
285		errf:   "IsCorrupted",
286	}, {
287		// Degenerate HCLenTree.
288		name: "HuffmanTree00",
289		input: db(`<<<
290			< 1 10            # Last, dynamic block
291			< D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19
292			< 000*17 001 000  # HCLens: {1:1}
293			> 0*256 1         # Use invalid HCLen code 1
294		`),
295		inIdx: 42,
296		errf:  "IsCorrupted",
297	}, {
298		// Degenerate HCLenTree, empty HLitTree, empty HDistTree.
299		name: "HuffmanTree01",
300		input: db(`<<<
301			< 1 10             # Last, dynamic block
302			< D5:0 D5:0 D4:15  # HLit: 257, HDist: 1, HCLen: 19
303			< 000*3 001 000*15 # HCLens: {0:1}
304			> 0*258            # HLits: {}, HDists: {}
305		`),
306		inIdx: 42,
307		errf:  "IsCorrupted",
308	}, {
309		// Empty HCLenTree.
310		name: "HuffmanTree02",
311		input: db(`<<<
312			< 1 10            # Last, dynamic block
313			< D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19
314			< 000*19          # HCLens: {}
315			> 0*258           # Use invalid HCLen code 0
316		`),
317		inIdx: 10,
318		errf:  "IsCorrupted",
319	}, {
320		// Complete HCLenTree, complete HLitTree, empty HDistTree,
321		// use missing HDist symbol.
322		name: "HuffmanTree03",
323		input: db(`<<<
324			< 0 00 0*5                 # Non-last, raw block, padding
325			< H16:0001 H16:fffe        # RawSize: 1
326			X:7a                       # Raw data
327
328			< 1 10                     # Last, dynamic block
329			< D5:1 D5:0 D4:15          # HLit: 258, HDist: 1, HCLen: 19
330			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
331			> 0*256 1*2                # HLits: {256:1, 257:1}
332			> 0                        # HDists: {}
333			> 1 0                      # Use invalid HDist code 0
334		`),
335		output: dh("7a"),
336		inIdx:  48,
337		outIdx: 1,
338		errf:   "IsCorrupted",
339	}, {
340		// Complete HCLenTree, degenerate HLitTree, empty HDistTree.
341		name: "HuffmanTree04",
342		input: db(`<<<
343			< 1 10                     # Last, dynamic block
344			< D5:0 D5:0 D4:15          # HLit: 257, HDist: 1, HCLen: 19
345			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
346			> 1 0*257                  # HLits: {0:1}, HDists: {}
347			> 0*31 1                   # Use invalid HLit code 1
348		`),
349		output: db("<<< X:00*31"),
350		inIdx:  46,
351		outIdx: 31,
352		errf:   "IsCorrupted",
353	}, {
354		// Complete HCLenTree, degenerate HLitTree, degenerate HDistTree.
355		name: "HuffmanTree05",
356		input: db(`<<<
357			< 1 10                     # Last, dynamic block
358			< D5:0 D5:0 D4:15          # HLit: 257, HDist: 1, HCLen: 19
359			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
360			> 1 0*256 1                # HLits: {0:1}, HDists: {0:1}
361			> 0*31 1                   # Use invalid HLit code 1
362		`),
363		output: db("<<< X:00*31"),
364		inIdx:  46,
365		outIdx: 31,
366		errf:   "IsCorrupted",
367	}, {
368		// Complete HCLenTree, degenerate HLitTree, degenerate HDistTree,
369		// use missing HLit symbol.
370		name: "HuffmanTree06",
371		input: db(`<<<
372			< 1 10                     # Last, dynamic block
373			< D5:0 D5:0 D4:15          # HLit: 257, HDist: 1, HCLen: 19
374			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
375			> 0*256 1*2                # HLits: {256:1}, HDists: {0:1}
376			> 1                        # Use invalid HLit code 1
377		`),
378		inIdx: 42,
379		errf:  "IsCorrupted",
380	}, {
381		// Complete HCLenTree, complete HLitTree, too large HDistTree.
382		name: "HuffmanTree07",
383		input: db(`<<<
384			< 1 10              # Last, dynamic block
385			< D5:29 D5:31 D4:15 # HLit: 286, HDist: 32, HCLen: 19
386			<1000011 X:05000000002004 X:00*39 X:04 # ???
387		`),
388		inIdx: 3,
389		errf:  "IsCorrupted",
390	}, {
391		// Complete HCLenTree, complete HLitTree, empty HDistTree,
392		// excessive repeater symbol.
393		name: "HuffmanTree08",
394		input: db(`<<<
395			< 1 10                           # Last, dynamic block
396			< D5:29 D5:29 D4:15              # HLit: 286, HDist: 30, HCLen: 19
397			< 011 000 011 001 000*13 010 000 # HCLens: {0:0, 1:2, 16:3, 18:3}
398			> 10 0*255 10 111 <D7:49 1       # Excessive repeater symbol
399		`),
400		inIdx: 43,
401		errf:  "IsCorrupted",
402	}, {
403		// Complete HCLenTree, complete HLitTree, empty HDistTree of length 30.
404		name: "HuffmanTree09",
405		input: db(`<<<
406			< 1 10               # Last, dynamic block
407			< D5:0 D5:29 D4:15   # HLit: 257, HDist: 30, HCLen: 19
408			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
409			> 0 1*256 0*30       # HLits: {*:8}, HDists: {}
410			> 11111111           # Compressed data (has only EOB)
411		`),
412		inIdx: 47,
413	}, {
414		// Complete HCLenTree, complete HLitTree, under-subscribed HDistTree.
415		name: "HuffmanTree10",
416		input: db(`<<<
417			< 1 10               # Last, dynamic block
418			< D5:0 D5:29 D4:15   # HLit: 257, HDist: 30, HCLen: 19
419			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
420			> 0 1*256 0*28 1*2   # HLits: {*:8}, HDists: {28:8, 29:8}
421		`),
422		inIdx: 46,
423		errf:  "IsCorrupted",
424	}, {
425		// HDistTree of excessive length 31.
426		name: "HuffmanTree11",
427		input: db(`<<<
428			< 1 10             # Last, dynamic block
429			< D5:0 D5:30 D4:15 # HLit: 257, HDist: 31, HCLen: 19
430			<0*7 X:240000000000f8 X:ff*31 X:07000000fc03 # ???
431		`),
432		inIdx: 3,
433		errf:  "IsCorrupted",
434	}, {
435		// Complete HCLenTree, over-subscribed HLitTree.
436		name: "HuffmanTree12",
437		input: db(`<<<
438			< 1 10               # Last, dynamic block
439			< D5:0 D5:0 D4:15    # HLit: 257, HDist: 1, HCLen: 19
440			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
441			> 1*257 0            # HLits: {*:8}
442			<0*4 X:f00f          # ???
443		`),
444		inIdx: 42,
445		errf:  "IsCorrupted",
446	}, {
447		// Complete HCLenTree, under-subscribed HLitTree.
448		name: "HuffmanTree13",
449		input: db(`<<<
450			< 1 10               # Last, dynamic block
451			< D5:0 D5:0 D4:15    # HLit: 257, HDist: 1, HCLen: 19
452			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
453			> 1*214 0*2 1*41 0   # HLits: {*:8}
454			<0*4 X:f00f          # ???
455		`),
456		inIdx: 42,
457		errf:  "IsCorrupted",
458	}, {
459		// Complete HCLenTree, complete HLitTree, empty HDistTree,
460		// no EOB symbol.
461		name: "HuffmanTree14",
462		input: db(`<<<
463			< 1 10               # Last, dynamic block
464			< D5:0 D5:0 D4:15    # HLit: 257, HDist: 1, HCLen: 19
465			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
466			> 1*256 0*2          # HLits: {*:8}, HDists: {}
467			> 00000000 11111111  # Compressed data
468		`),
469		output: dh("00ff"),
470		inIdx:  44,
471		outIdx: 2,
472		errf:   "IsUnexpectedEOF",
473	}, {
474		// Complete HCLenTree, complete HLitTree, empty HDistTree.
475		name: "HuffmanTree15",
476		input: db(`<<<
477			< 1 10               # Last, dynamic block
478			< D5:0 D5:3 D4:15    # HLit: 257, HDist: 4, HCLen: 19
479			< 000*3 001*2 000*14 # HCLens: {0:1, 8:1}
480			> 0 1*256 0*4        # HLits: {*:8}, HDists: {}
481			> 00000000 11111111  # Compressed data
482		`),
483		output: dh("01"),
484		inIdx:  44,
485		outIdx: 1,
486	}, {
487		// Complete HCLenTree, complete HLitTree, degenerate HDistTree,
488		// use valid HDist symbol.
489		name: "HuffmanTree16",
490		input: db(`<<<
491			< 0 00 0*5                 # Non-last, raw block, padding
492			< H16:0001 H16:fffe        # RawSize: 1
493			X:7a                       # Raw data
494
495			< 1 10                     # Last, dynamic block
496			< D5:1 D5:0 D4:15          # HLit: 258, HDist: 1, HCLen: 19
497			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
498			> 0*256 1*3                # HLits: {256:1, 257:1}, HDists: {0:1}
499			> 1 0*2                    # Compressed data
500		`),
501		output: dh("7a7a7a7a"),
502		inIdx:  48,
503		outIdx: 4,
504	}, {
505		// Complete HCLenTree, degenerate HLitTree, degenerate HDistTree.
506		name: "HuffmanTree17",
507		input: db(`<<<
508			< 1 10                     # Last, dynamic block
509			< D5:0 D5:0 D4:15          # HLit: 257, HDist: 1, HCLen: 19
510			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
511			> 0*256 1*2                # HLits: {256:1}, HDists: {0:1}
512			> 0                        # Compressed data
513		`),
514		inIdx: 42,
515	}, {
516		// Complete HCLenTree, degenerate HLitTree, empty HDistTree.
517		name: "HuffmanTree18",
518		input: db(`<<<
519			< 1 10                     # Last, dynamic block
520			< D5:0 D5:0 D4:15          # HLit: 257, HDist: 1, HCLen: 19
521			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
522			> 0*256 1 0                # HLits: {256:1}, # HDists: {}
523			> 0                        # Compressed data
524		`),
525		inIdx: 42,
526	}, {
527		// Complete HCLenTree, complete HLitTree, empty HDistTree,
528		// spanning zero repeater symbol.
529		name: "HuffmanTree19",
530		input: db(`<<<
531			< 1 10                           # Last, dynamic block
532			< D5:29 D5:29 D4:15              # HLit: 286, HDist: 30, HCLen: 19
533			< 011 000 011 001 000*13 010 000 # HCLens: {0:1, 1:2, 16:3, 18:3}
534			> 10 0*255 10 111 <D7:48         # HLits: {0:1, 256:1}, HDists: {}
535			> 1                              # Compressed data
536		`),
537		inIdx: 43,
538	}, {
539		// Complete HCLenTree, use last repeater on non-zero code.
540		name: "HuffmanTree20",
541		input: db(`<<<
542			< 1 10           # Last, dynamic block
543			< D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HClen: 12
544			# HCLens: {0:2, 4:2, 16:2, 18:2}
545			< 010 000 010*2 000*7 010
546			# HLits: {0-14:4, 256:4}, HDists: {}
547			> 01*12 10 <D2:0 11 <D7:127 11 <D7:92 01 00
548			# Compressed data
549			> 0000 0001 0010 1111
550		`),
551		output: dh("000102"),
552		inIdx:  15,
553		outIdx: 3,
554	}, {
555		// Complete HCLenTree, use last repeater on zero code.
556		name: "HuffmanTree21",
557		input: db(`<<<
558			< 1 10           # Last, dynamic block
559			< D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HClen: 12
560			# HCLens: {0:2, 4:2, 16:2, 18:2}
561			< 010 000 010*2 000*7 010
562			# HLits: {241-256:4}, HDists: {}
563			> 00 10 <D2:3 11 <D7:127 11 <D7:85 01*16 00
564			# Compressed data
565			> 0000 0001 0010 1111
566		`),
567		output: dh("f1f2f3"),
568		inIdx:  16,
569		outIdx: 3,
570	}, {
571		// Complete HCLenTree, use last repeater without first code.
572		name: "HuffmanTree22",
573		input: db(`<<<
574			< 1 10           # Last, dynamic block
575			< D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HClen: 12
576			# HCLens: {0:2, 4:2, 16:2, 18:2}
577			< 010 000 010*2 000*7 010
578			# HLits: {???}, HDists: {???}
579			> 10 <D2:3 11 <D7:127 11 <D7:86 01*16 00
580			# ???
581			> 0000 0001 0010 1111
582		`),
583		inIdx: 7,
584		errf:  "IsCorrupted",
585	}, {
586		// Complete HCLenTree with length codes, complete HLitTree,
587		// empty HDistTree.
588		name: "HuffmanTree23",
589		input: db(`<<<
590			< 1 10                     # Last, dynamic block
591			< D5:29 D5:0 D4:15         # HLit: 286, HDist: 1, HCLen: 19
592			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
593			> 0*256 1 0*27 1 0*2       # HLits: {256:1, 284:1}, HDists: {}
594			> 0                        # Compressed data
595		`),
596		inIdx: 46,
597	}, {
598		// Complete HCLenTree, complete HLitTree, degenerate HDistTree,
599		// use valid HLit symbol 284 with count 31.
600		name: "HuffmanTree24",
601		input: db(`<<<
602			< 0 00 0*5                 # Non-last, raw block, padding
603			< H16:0001 H16:fffe        # RawSize: 1
604			X:00                       # Raw data
605
606			< 1 10                     # Last, dynamic block
607			< D5:29 D5:0 D4:15         # HLit: 286, HDist: 1, HCLen: 19
608			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
609			> 0*256 1 0*27 1 0 1       # HLits: {256:1, 284:1}, HDists: {0:1}
610			> 1 <D5:31 0*2             # Compressed data
611		`),
612		output: db("<<< X:00*259"),
613		inIdx:  53,
614		outIdx: 259,
615	}, {
616		// Complete HCLenTree, complete HLitTree, degenerate HDistTree,
617		// use valid HLit symbol 285.
618		name: "HuffmanTree25",
619		input: db(`<<<
620			< 0 00 0*5                 # Non-last, raw block, padding
621			< H16:0001 H16:fffe        # RawSize: 1
622			X:00                       # Raw data
623
624			< 1 10                     # Last, dynamic block
625			< D5:29 D5:0 D4:15         # HLit: 286, HDist: 1, HCLen: 19
626			< 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1}
627			> 0*256 1 0*28 1*2         # HLits: {256:1, 285:1}, HDists: {0:1}
628			> 1 0*2                    # Compressed data
629		`),
630		output: db("<<< X:00*259"),
631		inIdx:  52,
632		outIdx: 259,
633	}, {
634		// Complete HCLenTree, complete HLitTree, degenerate HDistTree,
635		// use valid HLit and HDist symbols.
636		name: "HuffmanTree26",
637		input: db(`<<<
638			< 0 10            # Non-last, dynamic block
639			< D5:1 D5:2 D4:14 # HLit: 258, HDist: 3, HCLen: 18
640			# HCLens: {0:3, 1:3, 2:2, 3:2, 18:2}
641			< 000*2 010 011 000*9 010 000 010 000 011
642			# HLits: {97:3, 98:3, 99:2, 256:2, 257:2}, HDists: {2:1}
643			> 10 <D7:86 01 01 00 10 <D7:127 10 <D7:7 00 00 110 110 111
644			# Compressed data
645			> 110 111 00 10 0 01
646
647			< 1 00 0*3          # Last, raw block, padding
648			< H16:0000 H16:ffff # RawSize: 0
649		`),
650		output: []byte("abcabc"),
651		inIdx:  21,
652		outIdx: 6,
653	}, {
654		// Valid short distance match.
655		name: "DistanceMatch0",
656		input: db(`<<<
657			< 0 00 0*5          # Non-last, raw block, padding
658			< H16:0001 H16:fffe # RawSize: 1
659			X:0f                # Raw data
660
661			< 1 01         # Last, fixed block
662			> 0000001 D5:0 # Length: 3, Distance: 1
663			> 0000000      # EOB marker
664		`),
665		output: db("<<< X:0f0f0f0f"),
666		inIdx:  9,
667		outIdx: 4,
668	}, {
669		// Valid long distance match.
670		name: "DistanceMatch1",
671		input: db(`<<<
672			< 0 00 0*5                              # Non-last, raw block, padding
673			< H16:8000 H16:7fff                     # RawSize: 32768
674			X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2048 # Raw data
675
676			< 1 01                     # Last, fixed block
677			> 0000001 D5:29 <H13:1fff  # Length: 3, Distance: 32768
678			> 11000101 D5:29 <H13:1fff # Length: 258, Distance: 32768
679			> 0000000                  # EOB marker
680		`),
681		output: db(`<<<
682			X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2048
683			X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*16
684			X:0f1e2d3c4b
685		`),
686		inIdx:  32781,
687		outIdx: 33029,
688	}, {
689		// Invalid long distance match with not enough data.
690		name: "DistanceMatch2",
691		input: db(`<<<
692			< 0 00 0*5                              # Non-last, raw block, padding
693			< H16:7fff H16:8000                     # RawSize: 32767
694			X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2047 # Raw data
695			X:0f1e2d3c4b5a69788796a5b4c3d2e1
696
697			< 1 01                     # Last, fixed block
698			> 0000001 D5:29 <H13:1fff  # Length: 3, Distance: 32768
699			> 0000000                  # EOB marker
700		`),
701		output: db(`<<<
702			X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2047
703			X:0f1e2d3c4b5a69788796a5b4c3d2e1
704		`),
705		inIdx:  32776,
706		outIdx: 32767,
707		errf:   "IsCorrupted",
708	}, {
709		// Invalid short distance match with no data.
710		name: "DistanceMatch3",
711		input: db(`<<<
712			< 1 01         # Last, fixed block
713			> 0000001 D5:0 # Length: 3, Distance: 1
714			> 0000000      # EOB marker
715		`),
716		inIdx: 2,
717		errf:  "IsCorrupted",
718	}, {
719		// Invalid long distance match with no data.
720		name: "DistanceMatch4",
721		input: db(`<<<
722			< 1 01                    # Last, fixed block
723			> 0000001 D5:29 <H13:1fff # Length: 3, Distance: 32768
724			> 0000000                 # EOB marker
725		`),
726		inIdx: 4,
727		errf:  "IsCorrupted",
728	}, {
729		// Large HLitTree caused a panic.
730		name: "Issue3815",
731		input: db(`<<<
732			< 0 10             # Non-last, dynamic block
733			< D5:31 D5:30 D4:7 # HLit: 288, HDist: 31, HCLen: 11
734
735			# ???
736			<0011011
737			X:e75e1cefb3555877b656b543f46ff2d2e63d99a0858c48ebf8da83042a75c4f8
738			X:0f1211b9b44b09a0be8b914c
739		`),
740		inIdx: 3,
741		errf:  "IsCorrupted",
742	}, {
743		// Incomplete HCLenTree causes a panic.
744		name: "Issue5915",
745		input: db(`<<<
746			< 1 10             # Last, dynamic block
747			< 10000 11001 1010 # HLit: 273, HDist: 26, HCLen: 14
748
749			# HCLens: {...}, this is valid
750			< 101 101 110 100 011 011 100 010 100 011 101 100 000 110
751
752			# HLits: {...}, this is invalid
753			> 11110 <D3:7 11100 111111 <D7:10 011 100 00 1010 11100 1010 100
754			  1010 00 00 1101 100 00 1100 00 011 010 011 011 1100 100
755			  11101 <D2:0 1101 100 00 011 1101 00 1101 1100 1010 1100 1100 100
756			  1100 11101 <D2:1 100 1010 1010 1100 010 11101 <D2:3 00 1101
757			  11101 <D00:0 1100 1101 11110 <D3:2 00 011 100 100 1010 1100 00 100
758			  100 1100 00 010 011 011 00 00 011 011 00 00 00 010 00 1100 00 010
759			  010 00 011 011 100 011 100 00 011 011 111111 <D7:119 1101 1011
760			  1011 010 010 010 00 011 011 00 011 00
761
762			# HDists: {...}, this is invalid
763			> 100 00 100 100 11100 100 11110 <D3:0 100 010 010 010 1011 1011
764			  111110 010 1011 010 1011 010 1011 010 1011 010
765		`),
766		inIdx: 61,
767		errf:  "IsCorrupted",
768	}, {
769		// Incomplete HCLenTree causes a panic.
770		name: "Issue5962",
771		input: db(`<<<
772			< 0 10             # Non-last, dynamic block
773			< 10101 10011 1000 # HLit: 278, HDist: 20, HCLen: 12
774			# HCLens: {0:7, 5:7, 6:6, 9:3, 10:7, 11:1, 16:5, 18:6}
775			< 101 000 110 111 000*2 011 110 111*2 001 000
776		`),
777		inIdx: 7,
778		errf:  "IsCorrupted",
779	}, {
780		// Over-subscribed HCLenTree caused a hang.
781		name: "Issue10426",
782		input: db(`<<<
783			< 0 10                  # Non-last, dynamic block
784			< D5:6 D5:12 D4:2       # HLit: 263, HDist: 13, HCLen: 6
785			< 101 100*2 011 010 001 # HCLens: {0:3, 7:1, 8:2, 16:5, 17:4, 18:4}, invalid
786			<01001 X:4d4b070000ff2e2eff2e2e2e2e2eff # ???
787		`),
788		inIdx: 5,
789		errf:  "IsCorrupted",
790	}, {
791		// Empty HDistTree unexpectedly led to an error.
792		name: "Issue11030",
793		input: db(`<<<
794			< 1 10            # Last, dynamic block
795			< D5:0 D5:0 D4:14 # HLit: 257, HDist: 1, HCLen: 18
796			# HCLens: {0:1, 1:4, 2:2, 16:3, 18:4}
797			< 011 000 100 001 000*11 010 000 100
798			# HLits: {253:2, 254:2, 255:2, 256:2}
799			> 0 1111 <D7:112 1111 <D7:111 0 0 0 0 0 0 0 10 10 10 10
800			# HDists: {}
801			> 0
802			# Compressed data
803			> 11
804		`),
805		inIdx: 14,
806	}, {
807		// Empty HDistTree unexpectedly led to an error.
808		name: "Issue11033",
809		input: db(`<<<
810			< 1 10           # Last, dynamic block
811			< D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HCLen: 12
812			# HCLens: {0:2, 4:3, 5:2, 6:3, 17:3, 18:3}
813			< 000 011*2 010 000*3 011 000 010 000 011
814			# HLits: {...}
815			> 01 110 100 101 00 00 101 111 1010000 01 110 110 01 111 0100000
816			  101 00 100 01 00 00 100 01 01 111 0001000 01 111 1000000 01 110
817			  010 100 00 01 110 010 01 00 00 100 110 001 100 111 0100000 01
818			  111 0110000 01 00 01 111 0001010 100 110 011 01 110 110 101 00
819			  101 110 011 101 110 001 101 111 0001000 101 100
820			# HDists: {}
821			> 00
822			# Compressed data
823			> 10001 0000 0000 10011 0001 0001 10000 0011 10111 111010 0100
824			  0011 0100 01110 0010 111000 10010 10110 11000 111100 10101
825			  111111 111001 10100 11001 11010 0010 01111 111101 111110 0101
826			  11011 0101 111011 0110
827		`),
828		output: dh("" +
829			"3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" +
830			"c6d7",
831		),
832		inIdx:  57,
833		outIdx: 34,
834	}}
835
836	for _, v := range vectors {
837		t.Run(v.name, func(t *testing.T) {
838			rd, err := NewReader(bytes.NewReader(v.input), nil)
839			if err != nil {
840				t.Fatalf("unexpected NewReader error: %v", err)
841			}
842			output, err := ioutil.ReadAll(rd)
843			if cerr := rd.Close(); cerr != nil {
844				err = cerr
845			}
846
847			if got, want, ok := testutil.BytesCompare(output, v.output); !ok {
848				t.Errorf("output mismatch:\ngot  %s\nwant %s", got, want)
849			}
850			if rd.InputOffset != v.inIdx {
851				t.Errorf("input offset mismatch: got %d, want %d", rd.InputOffset, v.inIdx)
852			}
853			if rd.OutputOffset != v.outIdx {
854				t.Errorf("output offset mismatch: got %d, want %d", rd.OutputOffset, v.outIdx)
855			}
856			if v.errf != "" && !errFuncs[v.errf](err) {
857				t.Errorf("mismatching error:\ngot %v\nwant %s(err) == true", err, v.errf)
858			} else if v.errf == "" && err != nil {
859				t.Errorf("unexpected error: got %v", err)
860			}
861
862			// If the zcheck flag is set, then we verify that the test vectors
863			// themselves are consistent with what the C zlib library outputs.
864			// To do that, we use the python wrapper around the library.
865			if *zcheck {
866				output, err := pyDecompress(v.input)
867				if got, want := bool(v.errf == ""), bool(err == nil); got != want {
868					t.Errorf("pass mismatch: got %v, want %v", got, want)
869				}
870				if got, want, ok := testutil.BytesCompare(output, v.output); !ok && err == nil {
871					t.Errorf("output mismatch:\ngot  %s\nwant %s", got, want)
872				}
873			}
874		})
875	}
876}
877
878// TestReaderEarlyEOF tests that Reader returns io.EOF eagerly when possible.
879// There are two situations when it is unable to do so:
880//	* There is an non-last, empty, raw block at the end of the stream.
881//	Flushing semantics dictate that we must return at that point in the stream
882//	prior to knowing whether the end of the stream has been hit or not.
883//	* We previously returned from Read because the internal dictionary was full
884//	and it so happens that there is no more data to read. This is rare.
885func TestReaderEarlyEOF(t *testing.T) {
886	const maxSize = 1 << 18
887	const dampRatio = 32 // Higher value means more trials
888
889	data := make([]byte, maxSize)
890	for i := range data {
891		data[i] = byte(i)
892	}
893
894	// generateStream generates a DEFLATE stream that decompresses to n bytes
895	// of arbitrary data. If flush is set, then a final Flush is called at the
896	// very end of the stream.
897	var wrBuf bytes.Buffer
898	wr, _ := flate.NewWriter(nil, flate.BestSpeed)
899	generateStream := func(n int, flush bool) []byte {
900		wrBuf.Reset()
901		wr.Reset(&wrBuf)
902		wr.Write(data[:n])
903		if flush {
904			wr.Flush()
905		}
906		wr.Close()
907		return wrBuf.Bytes()
908	}
909
910	// readStream reads all the data and reports whether an early EOF occurred.
911	var rd Reader
912	rdBuf := make([]byte, 2111)
913	readStream := func(data []byte) (bool, error) {
914		rd.Reset(bytes.NewReader(data))
915		for {
916			n, err := rd.Read(rdBuf)
917			if err != nil {
918				if err == io.EOF {
919					err = nil
920				}
921				return n > 0, err
922			}
923		}
924	}
925
926	// There is no guarantee that early io.EOF occurs for all DEFLATE streams,
927	// but it should occur for most cases.
928	var numEarly, numTotal int
929	for i := 0; i < maxSize; i += 1 + i/dampRatio {
930		earlyEOF, err := readStream(generateStream(i, false))
931		if err != nil {
932			t.Errorf("unexpected Read error: %v", err)
933		}
934		if earlyEOF {
935			numEarly++
936		}
937		numTotal++
938	}
939	got := 100 * float64(numEarly) / float64(numTotal)
940	if want := 95.0; got < want {
941		t.Errorf("too few early EOFs: %0.1f%% < %0.1f%%", got, want)
942	}
943
944	// If there is a flush block at the end of all the data, an early io.EOF
945	// is never possible. Check for this case.
946	for i := 0; i < maxSize; i += 1 + i/dampRatio {
947		earlyEOF, err := readStream(generateStream(i, true))
948		if err != nil {
949			t.Errorf("unexpected Read error: %v", err)
950		}
951		if earlyEOF {
952			t.Errorf("size: %d, unexpected early EOF with terminating flush", i)
953		}
954	}
955}
956
957func TestReaderReset(t *testing.T) {
958	const data = "\x00\x0c\x00\xf3\xffhello, world\x01\x00\x00\xff\xff"
959
960	var rd Reader
961	if err := rd.Close(); err != nil {
962		t.Errorf("unexpected Close error: %v", err)
963	}
964
965	rd.Reset(strings.NewReader("garbage"))
966	if _, err := ioutil.ReadAll(&rd); !errors.IsCorrupted(err) {
967		t.Errorf("mismatching Read error: got %v, want IsCorrupted(err) == true", err)
968	}
969	if err := rd.Close(); !errors.IsCorrupted(err) {
970		t.Errorf("mismatching Close error: got %v, want IsCorrupted(err) == true", err)
971	}
972
973	rd.Reset(strings.NewReader(data))
974	if _, err := ioutil.ReadAll(&rd); err != nil {
975		t.Errorf("unexpected Read error: %v", err)
976	}
977	if err := rd.Close(); err != nil {
978		t.Errorf("unexpected Close error: %v", err)
979	}
980}
981
982func BenchmarkDecode(b *testing.B) {
983	runBenchmarks(b, func(b *testing.B, data []byte, lvl int) {
984		b.StopTimer()
985		b.ReportAllocs()
986
987		buf := new(bytes.Buffer)
988		wr, _ := flate.NewWriter(buf, lvl)
989		wr.Write(data)
990		wr.Close()
991
992		br := new(bytes.Reader)
993		rd := new(Reader)
994
995		b.SetBytes(int64(len(data)))
996		b.StartTimer()
997		for i := 0; i < b.N; i++ {
998			br.Reset(buf.Bytes())
999			rd.Reset(br)
1000
1001			n, err := io.Copy(ioutil.Discard, rd)
1002			if n != int64(len(data)) || err != nil {
1003				b.Fatalf("Copy() = (%d, %v), want (%d, nil)", n, err, len(data))
1004			}
1005			if err := rd.Close(); err != nil {
1006				b.Fatalf("Close() = %v, want nil", err)
1007			}
1008		}
1009	})
1010}
1011