1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package flate 6 7import ( 8 "bytes" 9 "compress/flate" 10 "flag" 11 "io" 12 "io/ioutil" 13 "os/exec" 14 "strings" 15 "testing" 16 17 "github.com/dsnet/compress/internal/errors" 18 "github.com/dsnet/compress/internal/testutil" 19) 20 21var zcheck = flag.Bool("zcheck", false, "verify reader test vectors with C zlib library") 22 23// pyDecompress decompresses the input by using the Python wrapper library 24// over the C zlib library: 25// 26// >>> hex_string = "010100feff11" 27// >>> import zlib 28// >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE 29// '\x11' 30// 31func pyDecompress(input []byte) ([]byte, error) { 32 var buf bytes.Buffer 33 cmd := exec.Command("python", "-c", "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read(), -15))") 34 cmd.Stdin = bytes.NewReader(input) 35 cmd.Stdout = &buf 36 err := cmd.Run() 37 return buf.Bytes(), err 38} 39 40func TestReader(t *testing.T) { 41 db := testutil.MustDecodeBitGen 42 dh := testutil.MustDecodeHex 43 44 errFuncs := map[string]func(error) bool{ 45 "IsUnexpectedEOF": func(err error) bool { return err == io.ErrUnexpectedEOF }, 46 "IsCorrupted": errors.IsCorrupted, 47 } 48 vectors := []struct { 49 name string // Sub-test name 50 input []byte // Test input string 51 output []byte // Expected output string 52 inIdx int64 // Expected input offset after reading 53 outIdx int64 // Expected output offset after reading 54 errf string // Name of error checking callback 55 }{{ 56 name: "EmptyString", 57 errf: "IsUnexpectedEOF", 58 }, { 59 name: "RawBlock", 60 input: db(`<<< 61 < 0 00 0*5 # Non-last, raw block, padding 62 < H16:000c H16:fff3 # RawSize: 12 63 "hello, world" # Raw data 64 65 < 1 10 # Last, fixed block 66 > 0000000 # EOB marker 67 `), 68 output: []byte("hello, world"), 69 inIdx: 19, 70 outIdx: 12, 71 errf: "IsUnexpectedEOF", 72 }, { 73 name: "RawBlockNonZeroPadding", 74 input: db(`<<< 75 < 1 00 10101 # Last, raw block, non-zero padding 76 < H16:0001 H16:fffe # RawSize: 1 77 X:11 # Raw data 78 `), 79 output: dh("11"), 80 inIdx: 6, 81 outIdx: 1, 82 }, { 83 name: "RawBlockShortest", 84 input: db(`<<< 85 < 1 00 0*5 # Last, raw block, padding 86 < H16:0000 H16:ffff # RawSize: 0 87 `), 88 inIdx: 5, 89 }, { 90 name: "RawBlockLongest", 91 input: db(`<<< 92 < 1 00 0*5 # Last, raw block, padding 93 < H16:ffff H16:0000 # RawSize: 65535 94 X:7a*65535 95 `), 96 output: db("<<< X:7a*65535"), 97 inIdx: 65540, 98 outIdx: 65535, 99 }, { 100 name: "RawBlockBadSize", 101 input: db(`<<< 102 < 1 00 0*5 # Last, raw block, padding 103 < H16:0001 H16:fffd # RawSize: 1 104 X:11 # Raw data 105 `), 106 inIdx: 5, 107 errf: "IsCorrupted", 108 }, { 109 // Truncated after block header. 110 name: "RawBlockTruncated0", 111 input: db(`<<< 112 < 0 00 0*5 # Non-last, raw block, padding 113 `), 114 inIdx: 1, 115 errf: "IsUnexpectedEOF", 116 }, { 117 // Truncated inside size field. 118 name: "RawBlockTruncated1", 119 input: db(`<<< 120 < 0 00 0*5 # Non-last, raw block, padding 121 < H8:0c # RawSize: 12 122 `), 123 inIdx: 1, 124 errf: "IsUnexpectedEOF", 125 }, { 126 // Truncated after size field. 127 name: "RawBlockTruncated2", 128 input: db(`<<< 129 < 0 00 0*5 # Non-last, raw block, padding 130 < H16:000c # RawSize: 12 131 `), 132 inIdx: 3, 133 errf: "IsUnexpectedEOF", 134 }, { 135 // Truncated before raw data. 136 name: "RawBlockTruncated3", 137 input: db(`<<< 138 < 0 00 0*5 # Non-last, raw block, padding 139 < H16:000c H16:fff3 # RawSize: 12 140 `), 141 inIdx: 5, 142 errf: "IsUnexpectedEOF", 143 }, { 144 // Truncated inside raw data. 145 name: "RawBlockTruncated4", 146 input: db(`<<< 147 < 0 00 0*5 # Non-last, raw block, padding 148 < H16:000c H16:fff3 # RawSize: 12 149 "hello" # Raw data 150 `), 151 output: []byte("hello"), 152 inIdx: 10, 153 outIdx: 5, 154 errf: "IsUnexpectedEOF", 155 }, { 156 // Truncated before next block. 157 name: "RawBlockTruncated5", 158 input: db(`<<< 159 < 0 00 0*5 # Non-last, raw block, padding 160 < H16:000c H16:fff3 # RawSize: 12 161 "hello, world" # Raw data 162 `), 163 output: []byte("hello, world"), 164 inIdx: 17, 165 outIdx: 12, 166 errf: "IsUnexpectedEOF", 167 }, { 168 // Truncated after fixed block header. 169 name: "FixedBlockTruncated0", 170 input: db(`<<< 171 < 0 01 # Non-last, fixed block 172 `), 173 inIdx: 1, 174 outIdx: 0, 175 errf: "IsUnexpectedEOF", 176 }, { 177 // Truncated after mid-block and mid-symbol. 178 name: "FixedBlockTruncated1", 179 input: db(`<<< 180 < 0 01 # Non-last, fixed block 181 > 01111000 10010101 10011 # Truncate 100 from last symbol 182 `), 183 output: []byte("He"), 184 inIdx: 3, 185 outIdx: 2, 186 errf: "IsUnexpectedEOF", 187 }, { 188 // Truncated after mid-block and post-symbol. 189 name: "FixedBlockTruncated2", 190 input: db(`<<< 191 < 0 01 # Non-last, fixed block 192 > 01111000 10010101 10011100 110010000*5 193 `), 194 output: []byte("Hel\x90\x90\x90\x90\x90"), 195 inIdx: 9, 196 outIdx: 8, 197 errf: "IsUnexpectedEOF", 198 }, { 199 // Truncated after mid-block and post-EOB. 200 name: "FixedBlockTruncated3", 201 input: db(`<<< 202 < 0 01 # Non-last, fixed block 203 > 01111000 10010101 10011100 110010000*5 204 > 0000000 # EOB marker 205 `), 206 output: []byte("Hel\x90\x90\x90\x90\x90"), 207 inIdx: 10, 208 outIdx: 8, 209 errf: "IsUnexpectedEOF", 210 }, { 211 name: "FixedBlockShortest", 212 input: db(`<<< 213 < 1 01 # Last, fixed block 214 > 0000000 # EOB marker 215 `), 216 inIdx: 2, 217 }, { 218 name: "FixedBlockHelloWorld", 219 input: db(`<<< 220 < 1 01 # Last, fixed block 221 222 > 01111000 10010101 10011100 10011100 10011111 01011100 01010000 223 10100111 10011111 10100010 10011100 10010100 01010001 224 > 0000000 # EOB marker 225 `), 226 output: []byte("Hello, world!"), 227 inIdx: 15, 228 outIdx: 13, 229 }, { 230 // Make sure the use of a dynamic block, following a fixed block does 231 // not alter the global Decoder tables. 232 name: "FixedDynamicFixedDynamicBlocks", 233 input: db(`<<< 234 < 0 01 # Non-last, fixed block 235 > 00110000 0000000 # Compressed data 236 237 < 0 10 # Non-last, dynamic block 238 < D5:0 D5:3 D4:15 # HLit: 257, HDist: 4, HCLen: 19 239 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 240 > 0 1*256 0*4 # HLits: {*:8}, HDists: {} 241 > 00000000 11111111 # Compressed data 242 243 < 0 01 # Non-last, fixed block 244 > 00110000 0000000 # Compressed data 245 246 < 1 10 # Last, dynamic block 247 < D5:0 D5:3 D4:15 # HLit: 257, HDist: 4, HCLen: 19 248 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 249 > 0 1*256 0*4 # HLits: {*:8}, HDists: {} 250 > 00000000 11111111 # Compressed data 251 `), 252 output: dh("00010001"), 253 inIdx: 93, 254 outIdx: 4, 255 }, { 256 name: "ReservedBlock", 257 input: db(`<<< 258 < 1 11 0*5 # Last, reserved block, padding 259 X:deadcafe # ??? 260 `), 261 inIdx: 1, 262 errf: "IsCorrupted", 263 }, { 264 // Use reserved HLit symbol 287 in fixed block. 265 name: "ReservedHLitSymbol", 266 input: db(`<<< 267 < 1 01 # Last, fixed block 268 > 01100000 11000111 # Use invalid symbol 287 269 `), 270 output: dh("30"), 271 inIdx: 3, 272 outIdx: 1, 273 errf: "IsCorrupted", 274 }, { 275 // Use reserved HDist symbol 30 in fixed block. 276 name: "ReservedHDistSymbol", 277 input: db(`<<< 278 < 1 01 # Last, fixed block 279 > 00110000 0000001 D5:30 # Use invalid HDist symbol 30 280 > 0000000 # EOB marker 281 `), 282 output: dh("00"), 283 inIdx: 3, 284 outIdx: 1, 285 errf: "IsCorrupted", 286 }, { 287 // Degenerate HCLenTree. 288 name: "HuffmanTree00", 289 input: db(`<<< 290 < 1 10 # Last, dynamic block 291 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 292 < 000*17 001 000 # HCLens: {1:1} 293 > 0*256 1 # Use invalid HCLen code 1 294 `), 295 inIdx: 42, 296 errf: "IsCorrupted", 297 }, { 298 // Degenerate HCLenTree, empty HLitTree, empty HDistTree. 299 name: "HuffmanTree01", 300 input: db(`<<< 301 < 1 10 # Last, dynamic block 302 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 303 < 000*3 001 000*15 # HCLens: {0:1} 304 > 0*258 # HLits: {}, HDists: {} 305 `), 306 inIdx: 42, 307 errf: "IsCorrupted", 308 }, { 309 // Empty HCLenTree. 310 name: "HuffmanTree02", 311 input: db(`<<< 312 < 1 10 # Last, dynamic block 313 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 314 < 000*19 # HCLens: {} 315 > 0*258 # Use invalid HCLen code 0 316 `), 317 inIdx: 10, 318 errf: "IsCorrupted", 319 }, { 320 // Complete HCLenTree, complete HLitTree, empty HDistTree, 321 // use missing HDist symbol. 322 name: "HuffmanTree03", 323 input: db(`<<< 324 < 0 00 0*5 # Non-last, raw block, padding 325 < H16:0001 H16:fffe # RawSize: 1 326 X:7a # Raw data 327 328 < 1 10 # Last, dynamic block 329 < D5:1 D5:0 D4:15 # HLit: 258, HDist: 1, HCLen: 19 330 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 331 > 0*256 1*2 # HLits: {256:1, 257:1} 332 > 0 # HDists: {} 333 > 1 0 # Use invalid HDist code 0 334 `), 335 output: dh("7a"), 336 inIdx: 48, 337 outIdx: 1, 338 errf: "IsCorrupted", 339 }, { 340 // Complete HCLenTree, degenerate HLitTree, empty HDistTree. 341 name: "HuffmanTree04", 342 input: db(`<<< 343 < 1 10 # Last, dynamic block 344 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 345 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 346 > 1 0*257 # HLits: {0:1}, HDists: {} 347 > 0*31 1 # Use invalid HLit code 1 348 `), 349 output: db("<<< X:00*31"), 350 inIdx: 46, 351 outIdx: 31, 352 errf: "IsCorrupted", 353 }, { 354 // Complete HCLenTree, degenerate HLitTree, degenerate HDistTree. 355 name: "HuffmanTree05", 356 input: db(`<<< 357 < 1 10 # Last, dynamic block 358 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 359 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 360 > 1 0*256 1 # HLits: {0:1}, HDists: {0:1} 361 > 0*31 1 # Use invalid HLit code 1 362 `), 363 output: db("<<< X:00*31"), 364 inIdx: 46, 365 outIdx: 31, 366 errf: "IsCorrupted", 367 }, { 368 // Complete HCLenTree, degenerate HLitTree, degenerate HDistTree, 369 // use missing HLit symbol. 370 name: "HuffmanTree06", 371 input: db(`<<< 372 < 1 10 # Last, dynamic block 373 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 374 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 375 > 0*256 1*2 # HLits: {256:1}, HDists: {0:1} 376 > 1 # Use invalid HLit code 1 377 `), 378 inIdx: 42, 379 errf: "IsCorrupted", 380 }, { 381 // Complete HCLenTree, complete HLitTree, too large HDistTree. 382 name: "HuffmanTree07", 383 input: db(`<<< 384 < 1 10 # Last, dynamic block 385 < D5:29 D5:31 D4:15 # HLit: 286, HDist: 32, HCLen: 19 386 <1000011 X:05000000002004 X:00*39 X:04 # ??? 387 `), 388 inIdx: 3, 389 errf: "IsCorrupted", 390 }, { 391 // Complete HCLenTree, complete HLitTree, empty HDistTree, 392 // excessive repeater symbol. 393 name: "HuffmanTree08", 394 input: db(`<<< 395 < 1 10 # Last, dynamic block 396 < D5:29 D5:29 D4:15 # HLit: 286, HDist: 30, HCLen: 19 397 < 011 000 011 001 000*13 010 000 # HCLens: {0:0, 1:2, 16:3, 18:3} 398 > 10 0*255 10 111 <D7:49 1 # Excessive repeater symbol 399 `), 400 inIdx: 43, 401 errf: "IsCorrupted", 402 }, { 403 // Complete HCLenTree, complete HLitTree, empty HDistTree of length 30. 404 name: "HuffmanTree09", 405 input: db(`<<< 406 < 1 10 # Last, dynamic block 407 < D5:0 D5:29 D4:15 # HLit: 257, HDist: 30, HCLen: 19 408 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 409 > 0 1*256 0*30 # HLits: {*:8}, HDists: {} 410 > 11111111 # Compressed data (has only EOB) 411 `), 412 inIdx: 47, 413 }, { 414 // Complete HCLenTree, complete HLitTree, under-subscribed HDistTree. 415 name: "HuffmanTree10", 416 input: db(`<<< 417 < 1 10 # Last, dynamic block 418 < D5:0 D5:29 D4:15 # HLit: 257, HDist: 30, HCLen: 19 419 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 420 > 0 1*256 0*28 1*2 # HLits: {*:8}, HDists: {28:8, 29:8} 421 `), 422 inIdx: 46, 423 errf: "IsCorrupted", 424 }, { 425 // HDistTree of excessive length 31. 426 name: "HuffmanTree11", 427 input: db(`<<< 428 < 1 10 # Last, dynamic block 429 < D5:0 D5:30 D4:15 # HLit: 257, HDist: 31, HCLen: 19 430 <0*7 X:240000000000f8 X:ff*31 X:07000000fc03 # ??? 431 `), 432 inIdx: 3, 433 errf: "IsCorrupted", 434 }, { 435 // Complete HCLenTree, over-subscribed HLitTree. 436 name: "HuffmanTree12", 437 input: db(`<<< 438 < 1 10 # Last, dynamic block 439 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 440 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 441 > 1*257 0 # HLits: {*:8} 442 <0*4 X:f00f # ??? 443 `), 444 inIdx: 42, 445 errf: "IsCorrupted", 446 }, { 447 // Complete HCLenTree, under-subscribed HLitTree. 448 name: "HuffmanTree13", 449 input: db(`<<< 450 < 1 10 # Last, dynamic block 451 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 452 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 453 > 1*214 0*2 1*41 0 # HLits: {*:8} 454 <0*4 X:f00f # ??? 455 `), 456 inIdx: 42, 457 errf: "IsCorrupted", 458 }, { 459 // Complete HCLenTree, complete HLitTree, empty HDistTree, 460 // no EOB symbol. 461 name: "HuffmanTree14", 462 input: db(`<<< 463 < 1 10 # Last, dynamic block 464 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 465 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 466 > 1*256 0*2 # HLits: {*:8}, HDists: {} 467 > 00000000 11111111 # Compressed data 468 `), 469 output: dh("00ff"), 470 inIdx: 44, 471 outIdx: 2, 472 errf: "IsUnexpectedEOF", 473 }, { 474 // Complete HCLenTree, complete HLitTree, empty HDistTree. 475 name: "HuffmanTree15", 476 input: db(`<<< 477 < 1 10 # Last, dynamic block 478 < D5:0 D5:3 D4:15 # HLit: 257, HDist: 4, HCLen: 19 479 < 000*3 001*2 000*14 # HCLens: {0:1, 8:1} 480 > 0 1*256 0*4 # HLits: {*:8}, HDists: {} 481 > 00000000 11111111 # Compressed data 482 `), 483 output: dh("01"), 484 inIdx: 44, 485 outIdx: 1, 486 }, { 487 // Complete HCLenTree, complete HLitTree, degenerate HDistTree, 488 // use valid HDist symbol. 489 name: "HuffmanTree16", 490 input: db(`<<< 491 < 0 00 0*5 # Non-last, raw block, padding 492 < H16:0001 H16:fffe # RawSize: 1 493 X:7a # Raw data 494 495 < 1 10 # Last, dynamic block 496 < D5:1 D5:0 D4:15 # HLit: 258, HDist: 1, HCLen: 19 497 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 498 > 0*256 1*3 # HLits: {256:1, 257:1}, HDists: {0:1} 499 > 1 0*2 # Compressed data 500 `), 501 output: dh("7a7a7a7a"), 502 inIdx: 48, 503 outIdx: 4, 504 }, { 505 // Complete HCLenTree, degenerate HLitTree, degenerate HDistTree. 506 name: "HuffmanTree17", 507 input: db(`<<< 508 < 1 10 # Last, dynamic block 509 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 510 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 511 > 0*256 1*2 # HLits: {256:1}, HDists: {0:1} 512 > 0 # Compressed data 513 `), 514 inIdx: 42, 515 }, { 516 // Complete HCLenTree, degenerate HLitTree, empty HDistTree. 517 name: "HuffmanTree18", 518 input: db(`<<< 519 < 1 10 # Last, dynamic block 520 < D5:0 D5:0 D4:15 # HLit: 257, HDist: 1, HCLen: 19 521 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 522 > 0*256 1 0 # HLits: {256:1}, # HDists: {} 523 > 0 # Compressed data 524 `), 525 inIdx: 42, 526 }, { 527 // Complete HCLenTree, complete HLitTree, empty HDistTree, 528 // spanning zero repeater symbol. 529 name: "HuffmanTree19", 530 input: db(`<<< 531 < 1 10 # Last, dynamic block 532 < D5:29 D5:29 D4:15 # HLit: 286, HDist: 30, HCLen: 19 533 < 011 000 011 001 000*13 010 000 # HCLens: {0:1, 1:2, 16:3, 18:3} 534 > 10 0*255 10 111 <D7:48 # HLits: {0:1, 256:1}, HDists: {} 535 > 1 # Compressed data 536 `), 537 inIdx: 43, 538 }, { 539 // Complete HCLenTree, use last repeater on non-zero code. 540 name: "HuffmanTree20", 541 input: db(`<<< 542 < 1 10 # Last, dynamic block 543 < D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HClen: 12 544 # HCLens: {0:2, 4:2, 16:2, 18:2} 545 < 010 000 010*2 000*7 010 546 # HLits: {0-14:4, 256:4}, HDists: {} 547 > 01*12 10 <D2:0 11 <D7:127 11 <D7:92 01 00 548 # Compressed data 549 > 0000 0001 0010 1111 550 `), 551 output: dh("000102"), 552 inIdx: 15, 553 outIdx: 3, 554 }, { 555 // Complete HCLenTree, use last repeater on zero code. 556 name: "HuffmanTree21", 557 input: db(`<<< 558 < 1 10 # Last, dynamic block 559 < D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HClen: 12 560 # HCLens: {0:2, 4:2, 16:2, 18:2} 561 < 010 000 010*2 000*7 010 562 # HLits: {241-256:4}, HDists: {} 563 > 00 10 <D2:3 11 <D7:127 11 <D7:85 01*16 00 564 # Compressed data 565 > 0000 0001 0010 1111 566 `), 567 output: dh("f1f2f3"), 568 inIdx: 16, 569 outIdx: 3, 570 }, { 571 // Complete HCLenTree, use last repeater without first code. 572 name: "HuffmanTree22", 573 input: db(`<<< 574 < 1 10 # Last, dynamic block 575 < D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HClen: 12 576 # HCLens: {0:2, 4:2, 16:2, 18:2} 577 < 010 000 010*2 000*7 010 578 # HLits: {???}, HDists: {???} 579 > 10 <D2:3 11 <D7:127 11 <D7:86 01*16 00 580 # ??? 581 > 0000 0001 0010 1111 582 `), 583 inIdx: 7, 584 errf: "IsCorrupted", 585 }, { 586 // Complete HCLenTree with length codes, complete HLitTree, 587 // empty HDistTree. 588 name: "HuffmanTree23", 589 input: db(`<<< 590 < 1 10 # Last, dynamic block 591 < D5:29 D5:0 D4:15 # HLit: 286, HDist: 1, HCLen: 19 592 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 593 > 0*256 1 0*27 1 0*2 # HLits: {256:1, 284:1}, HDists: {} 594 > 0 # Compressed data 595 `), 596 inIdx: 46, 597 }, { 598 // Complete HCLenTree, complete HLitTree, degenerate HDistTree, 599 // use valid HLit symbol 284 with count 31. 600 name: "HuffmanTree24", 601 input: db(`<<< 602 < 0 00 0*5 # Non-last, raw block, padding 603 < H16:0001 H16:fffe # RawSize: 1 604 X:00 # Raw data 605 606 < 1 10 # Last, dynamic block 607 < D5:29 D5:0 D4:15 # HLit: 286, HDist: 1, HCLen: 19 608 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 609 > 0*256 1 0*27 1 0 1 # HLits: {256:1, 284:1}, HDists: {0:1} 610 > 1 <D5:31 0*2 # Compressed data 611 `), 612 output: db("<<< X:00*259"), 613 inIdx: 53, 614 outIdx: 259, 615 }, { 616 // Complete HCLenTree, complete HLitTree, degenerate HDistTree, 617 // use valid HLit symbol 285. 618 name: "HuffmanTree25", 619 input: db(`<<< 620 < 0 00 0*5 # Non-last, raw block, padding 621 < H16:0001 H16:fffe # RawSize: 1 622 X:00 # Raw data 623 624 < 1 10 # Last, dynamic block 625 < D5:29 D5:0 D4:15 # HLit: 286, HDist: 1, HCLen: 19 626 < 000*3 001 000*13 001 000 # HCLens: {0:1, 1:1} 627 > 0*256 1 0*28 1*2 # HLits: {256:1, 285:1}, HDists: {0:1} 628 > 1 0*2 # Compressed data 629 `), 630 output: db("<<< X:00*259"), 631 inIdx: 52, 632 outIdx: 259, 633 }, { 634 // Complete HCLenTree, complete HLitTree, degenerate HDistTree, 635 // use valid HLit and HDist symbols. 636 name: "HuffmanTree26", 637 input: db(`<<< 638 < 0 10 # Non-last, dynamic block 639 < D5:1 D5:2 D4:14 # HLit: 258, HDist: 3, HCLen: 18 640 # HCLens: {0:3, 1:3, 2:2, 3:2, 18:2} 641 < 000*2 010 011 000*9 010 000 010 000 011 642 # HLits: {97:3, 98:3, 99:2, 256:2, 257:2}, HDists: {2:1} 643 > 10 <D7:86 01 01 00 10 <D7:127 10 <D7:7 00 00 110 110 111 644 # Compressed data 645 > 110 111 00 10 0 01 646 647 < 1 00 0*3 # Last, raw block, padding 648 < H16:0000 H16:ffff # RawSize: 0 649 `), 650 output: []byte("abcabc"), 651 inIdx: 21, 652 outIdx: 6, 653 }, { 654 // Valid short distance match. 655 name: "DistanceMatch0", 656 input: db(`<<< 657 < 0 00 0*5 # Non-last, raw block, padding 658 < H16:0001 H16:fffe # RawSize: 1 659 X:0f # Raw data 660 661 < 1 01 # Last, fixed block 662 > 0000001 D5:0 # Length: 3, Distance: 1 663 > 0000000 # EOB marker 664 `), 665 output: db("<<< X:0f0f0f0f"), 666 inIdx: 9, 667 outIdx: 4, 668 }, { 669 // Valid long distance match. 670 name: "DistanceMatch1", 671 input: db(`<<< 672 < 0 00 0*5 # Non-last, raw block, padding 673 < H16:8000 H16:7fff # RawSize: 32768 674 X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2048 # Raw data 675 676 < 1 01 # Last, fixed block 677 > 0000001 D5:29 <H13:1fff # Length: 3, Distance: 32768 678 > 11000101 D5:29 <H13:1fff # Length: 258, Distance: 32768 679 > 0000000 # EOB marker 680 `), 681 output: db(`<<< 682 X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2048 683 X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*16 684 X:0f1e2d3c4b 685 `), 686 inIdx: 32781, 687 outIdx: 33029, 688 }, { 689 // Invalid long distance match with not enough data. 690 name: "DistanceMatch2", 691 input: db(`<<< 692 < 0 00 0*5 # Non-last, raw block, padding 693 < H16:7fff H16:8000 # RawSize: 32767 694 X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2047 # Raw data 695 X:0f1e2d3c4b5a69788796a5b4c3d2e1 696 697 < 1 01 # Last, fixed block 698 > 0000001 D5:29 <H13:1fff # Length: 3, Distance: 32768 699 > 0000000 # EOB marker 700 `), 701 output: db(`<<< 702 X:0f1e2d3c4b5a69788796a5b4c3d2e1f0*2047 703 X:0f1e2d3c4b5a69788796a5b4c3d2e1 704 `), 705 inIdx: 32776, 706 outIdx: 32767, 707 errf: "IsCorrupted", 708 }, { 709 // Invalid short distance match with no data. 710 name: "DistanceMatch3", 711 input: db(`<<< 712 < 1 01 # Last, fixed block 713 > 0000001 D5:0 # Length: 3, Distance: 1 714 > 0000000 # EOB marker 715 `), 716 inIdx: 2, 717 errf: "IsCorrupted", 718 }, { 719 // Invalid long distance match with no data. 720 name: "DistanceMatch4", 721 input: db(`<<< 722 < 1 01 # Last, fixed block 723 > 0000001 D5:29 <H13:1fff # Length: 3, Distance: 32768 724 > 0000000 # EOB marker 725 `), 726 inIdx: 4, 727 errf: "IsCorrupted", 728 }, { 729 // Large HLitTree caused a panic. 730 name: "Issue3815", 731 input: db(`<<< 732 < 0 10 # Non-last, dynamic block 733 < D5:31 D5:30 D4:7 # HLit: 288, HDist: 31, HCLen: 11 734 735 # ??? 736 <0011011 737 X:e75e1cefb3555877b656b543f46ff2d2e63d99a0858c48ebf8da83042a75c4f8 738 X:0f1211b9b44b09a0be8b914c 739 `), 740 inIdx: 3, 741 errf: "IsCorrupted", 742 }, { 743 // Incomplete HCLenTree causes a panic. 744 name: "Issue5915", 745 input: db(`<<< 746 < 1 10 # Last, dynamic block 747 < 10000 11001 1010 # HLit: 273, HDist: 26, HCLen: 14 748 749 # HCLens: {...}, this is valid 750 < 101 101 110 100 011 011 100 010 100 011 101 100 000 110 751 752 # HLits: {...}, this is invalid 753 > 11110 <D3:7 11100 111111 <D7:10 011 100 00 1010 11100 1010 100 754 1010 00 00 1101 100 00 1100 00 011 010 011 011 1100 100 755 11101 <D2:0 1101 100 00 011 1101 00 1101 1100 1010 1100 1100 100 756 1100 11101 <D2:1 100 1010 1010 1100 010 11101 <D2:3 00 1101 757 11101 <D00:0 1100 1101 11110 <D3:2 00 011 100 100 1010 1100 00 100 758 100 1100 00 010 011 011 00 00 011 011 00 00 00 010 00 1100 00 010 759 010 00 011 011 100 011 100 00 011 011 111111 <D7:119 1101 1011 760 1011 010 010 010 00 011 011 00 011 00 761 762 # HDists: {...}, this is invalid 763 > 100 00 100 100 11100 100 11110 <D3:0 100 010 010 010 1011 1011 764 111110 010 1011 010 1011 010 1011 010 1011 010 765 `), 766 inIdx: 61, 767 errf: "IsCorrupted", 768 }, { 769 // Incomplete HCLenTree causes a panic. 770 name: "Issue5962", 771 input: db(`<<< 772 < 0 10 # Non-last, dynamic block 773 < 10101 10011 1000 # HLit: 278, HDist: 20, HCLen: 12 774 # HCLens: {0:7, 5:7, 6:6, 9:3, 10:7, 11:1, 16:5, 18:6} 775 < 101 000 110 111 000*2 011 110 111*2 001 000 776 `), 777 inIdx: 7, 778 errf: "IsCorrupted", 779 }, { 780 // Over-subscribed HCLenTree caused a hang. 781 name: "Issue10426", 782 input: db(`<<< 783 < 0 10 # Non-last, dynamic block 784 < D5:6 D5:12 D4:2 # HLit: 263, HDist: 13, HCLen: 6 785 < 101 100*2 011 010 001 # HCLens: {0:3, 7:1, 8:2, 16:5, 17:4, 18:4}, invalid 786 <01001 X:4d4b070000ff2e2eff2e2e2e2e2eff # ??? 787 `), 788 inIdx: 5, 789 errf: "IsCorrupted", 790 }, { 791 // Empty HDistTree unexpectedly led to an error. 792 name: "Issue11030", 793 input: db(`<<< 794 < 1 10 # Last, dynamic block 795 < D5:0 D5:0 D4:14 # HLit: 257, HDist: 1, HCLen: 18 796 # HCLens: {0:1, 1:4, 2:2, 16:3, 18:4} 797 < 011 000 100 001 000*11 010 000 100 798 # HLits: {253:2, 254:2, 255:2, 256:2} 799 > 0 1111 <D7:112 1111 <D7:111 0 0 0 0 0 0 0 10 10 10 10 800 # HDists: {} 801 > 0 802 # Compressed data 803 > 11 804 `), 805 inIdx: 14, 806 }, { 807 // Empty HDistTree unexpectedly led to an error. 808 name: "Issue11033", 809 input: db(`<<< 810 < 1 10 # Last, dynamic block 811 < D5:0 D5:0 D4:8 # HLit: 257, HDist: 1, HCLen: 12 812 # HCLens: {0:2, 4:3, 5:2, 6:3, 17:3, 18:3} 813 < 000 011*2 010 000*3 011 000 010 000 011 814 # HLits: {...} 815 > 01 110 100 101 00 00 101 111 1010000 01 110 110 01 111 0100000 816 101 00 100 01 00 00 100 01 01 111 0001000 01 111 1000000 01 110 817 010 100 00 01 110 010 01 00 00 100 110 001 100 111 0100000 01 818 111 0110000 01 00 01 111 0001010 100 110 011 01 110 110 101 00 819 101 110 011 101 110 001 101 111 0001000 101 100 820 # HDists: {} 821 > 00 822 # Compressed data 823 > 10001 0000 0000 10011 0001 0001 10000 0011 10111 111010 0100 824 0011 0100 01110 0010 111000 10010 10110 11000 111100 10101 825 111111 111001 10100 11001 11010 0010 01111 111101 111110 0101 826 11011 0101 111011 0110 827 `), 828 output: dh("" + 829 "3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" + 830 "c6d7", 831 ), 832 inIdx: 57, 833 outIdx: 34, 834 }} 835 836 for _, v := range vectors { 837 t.Run(v.name, func(t *testing.T) { 838 rd, err := NewReader(bytes.NewReader(v.input), nil) 839 if err != nil { 840 t.Fatalf("unexpected NewReader error: %v", err) 841 } 842 output, err := ioutil.ReadAll(rd) 843 if cerr := rd.Close(); cerr != nil { 844 err = cerr 845 } 846 847 if got, want, ok := testutil.BytesCompare(output, v.output); !ok { 848 t.Errorf("output mismatch:\ngot %s\nwant %s", got, want) 849 } 850 if rd.InputOffset != v.inIdx { 851 t.Errorf("input offset mismatch: got %d, want %d", rd.InputOffset, v.inIdx) 852 } 853 if rd.OutputOffset != v.outIdx { 854 t.Errorf("output offset mismatch: got %d, want %d", rd.OutputOffset, v.outIdx) 855 } 856 if v.errf != "" && !errFuncs[v.errf](err) { 857 t.Errorf("mismatching error:\ngot %v\nwant %s(err) == true", err, v.errf) 858 } else if v.errf == "" && err != nil { 859 t.Errorf("unexpected error: got %v", err) 860 } 861 862 // If the zcheck flag is set, then we verify that the test vectors 863 // themselves are consistent with what the C zlib library outputs. 864 // To do that, we use the python wrapper around the library. 865 if *zcheck { 866 output, err := pyDecompress(v.input) 867 if got, want := bool(v.errf == ""), bool(err == nil); got != want { 868 t.Errorf("pass mismatch: got %v, want %v", got, want) 869 } 870 if got, want, ok := testutil.BytesCompare(output, v.output); !ok && err == nil { 871 t.Errorf("output mismatch:\ngot %s\nwant %s", got, want) 872 } 873 } 874 }) 875 } 876} 877 878// TestReaderEarlyEOF tests that Reader returns io.EOF eagerly when possible. 879// There are two situations when it is unable to do so: 880// * There is an non-last, empty, raw block at the end of the stream. 881// Flushing semantics dictate that we must return at that point in the stream 882// prior to knowing whether the end of the stream has been hit or not. 883// * We previously returned from Read because the internal dictionary was full 884// and it so happens that there is no more data to read. This is rare. 885func TestReaderEarlyEOF(t *testing.T) { 886 const maxSize = 1 << 18 887 const dampRatio = 32 // Higher value means more trials 888 889 data := make([]byte, maxSize) 890 for i := range data { 891 data[i] = byte(i) 892 } 893 894 // generateStream generates a DEFLATE stream that decompresses to n bytes 895 // of arbitrary data. If flush is set, then a final Flush is called at the 896 // very end of the stream. 897 var wrBuf bytes.Buffer 898 wr, _ := flate.NewWriter(nil, flate.BestSpeed) 899 generateStream := func(n int, flush bool) []byte { 900 wrBuf.Reset() 901 wr.Reset(&wrBuf) 902 wr.Write(data[:n]) 903 if flush { 904 wr.Flush() 905 } 906 wr.Close() 907 return wrBuf.Bytes() 908 } 909 910 // readStream reads all the data and reports whether an early EOF occurred. 911 var rd Reader 912 rdBuf := make([]byte, 2111) 913 readStream := func(data []byte) (bool, error) { 914 rd.Reset(bytes.NewReader(data)) 915 for { 916 n, err := rd.Read(rdBuf) 917 if err != nil { 918 if err == io.EOF { 919 err = nil 920 } 921 return n > 0, err 922 } 923 } 924 } 925 926 // There is no guarantee that early io.EOF occurs for all DEFLATE streams, 927 // but it should occur for most cases. 928 var numEarly, numTotal int 929 for i := 0; i < maxSize; i += 1 + i/dampRatio { 930 earlyEOF, err := readStream(generateStream(i, false)) 931 if err != nil { 932 t.Errorf("unexpected Read error: %v", err) 933 } 934 if earlyEOF { 935 numEarly++ 936 } 937 numTotal++ 938 } 939 got := 100 * float64(numEarly) / float64(numTotal) 940 if want := 95.0; got < want { 941 t.Errorf("too few early EOFs: %0.1f%% < %0.1f%%", got, want) 942 } 943 944 // If there is a flush block at the end of all the data, an early io.EOF 945 // is never possible. Check for this case. 946 for i := 0; i < maxSize; i += 1 + i/dampRatio { 947 earlyEOF, err := readStream(generateStream(i, true)) 948 if err != nil { 949 t.Errorf("unexpected Read error: %v", err) 950 } 951 if earlyEOF { 952 t.Errorf("size: %d, unexpected early EOF with terminating flush", i) 953 } 954 } 955} 956 957func TestReaderReset(t *testing.T) { 958 const data = "\x00\x0c\x00\xf3\xffhello, world\x01\x00\x00\xff\xff" 959 960 var rd Reader 961 if err := rd.Close(); err != nil { 962 t.Errorf("unexpected Close error: %v", err) 963 } 964 965 rd.Reset(strings.NewReader("garbage")) 966 if _, err := ioutil.ReadAll(&rd); !errors.IsCorrupted(err) { 967 t.Errorf("mismatching Read error: got %v, want IsCorrupted(err) == true", err) 968 } 969 if err := rd.Close(); !errors.IsCorrupted(err) { 970 t.Errorf("mismatching Close error: got %v, want IsCorrupted(err) == true", err) 971 } 972 973 rd.Reset(strings.NewReader(data)) 974 if _, err := ioutil.ReadAll(&rd); err != nil { 975 t.Errorf("unexpected Read error: %v", err) 976 } 977 if err := rd.Close(); err != nil { 978 t.Errorf("unexpected Close error: %v", err) 979 } 980} 981 982func BenchmarkDecode(b *testing.B) { 983 runBenchmarks(b, func(b *testing.B, data []byte, lvl int) { 984 b.StopTimer() 985 b.ReportAllocs() 986 987 buf := new(bytes.Buffer) 988 wr, _ := flate.NewWriter(buf, lvl) 989 wr.Write(data) 990 wr.Close() 991 992 br := new(bytes.Reader) 993 rd := new(Reader) 994 995 b.SetBytes(int64(len(data))) 996 b.StartTimer() 997 for i := 0; i < b.N; i++ { 998 br.Reset(buf.Bytes()) 999 rd.Reset(br) 1000 1001 n, err := io.Copy(ioutil.Discard, rd) 1002 if n != int64(len(data)) || err != nil { 1003 b.Fatalf("Copy() = (%d, %v), want (%d, nil)", n, err, len(data)) 1004 } 1005 if err := rd.Close(); err != nil { 1006 b.Fatalf("Close() = %v, want nil", err) 1007 } 1008 } 1009 }) 1010} 1011