1// Copyright 2011 The Snappy-Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package snappy 6 7import ( 8 "bytes" 9 "encoding/binary" 10 "flag" 11 "fmt" 12 "io" 13 "io/ioutil" 14 "math/rand" 15 "net/http" 16 "os" 17 "os/exec" 18 "path/filepath" 19 "runtime" 20 "strings" 21 "testing" 22) 23 24var ( 25 download = flag.Bool("download", false, "If true, download any missing files before running benchmarks") 26 testdataDir = flag.String("testdataDir", "testdata", "Directory containing the test data") 27 benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data") 28) 29 30// goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by 31// Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on 32// this GOARCH. There is more than one valid encoding of any given input, and 33// there is more than one good algorithm along the frontier of trading off 34// throughput for output size. Nonetheless, we presume that the C++ encoder's 35// algorithm is a good one and has been tested on a wide range of inputs, so 36// matching that exactly should mean that the Go encoder's algorithm is also 37// good, without needing to gather our own corpus of test data. 38// 39// The exact algorithm used by the C++ code is potentially endian dependent, as 40// it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes 41// at a time. The Go implementation is endian agnostic, in that its output is 42// the same (as little-endian C++ code), regardless of the CPU's endianness. 43// 44// Thus, when comparing Go's output to C++ output generated beforehand, such as 45// the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little- 46// endian system, we can run that test regardless of the runtime.GOARCH value. 47// 48// When comparing Go's output to dynamically generated C++ output, i.e. the 49// result of fork/exec'ing a C++ program, we can run that test only on 50// little-endian systems, because the C++ output might be different on 51// big-endian systems. The runtime package doesn't export endianness per se, 52// but we can restrict this match-C++ test to common little-endian systems. 53const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm" 54 55func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) { 56 got := maxEncodedLenOfMaxBlockSize 57 want := MaxEncodedLen(maxBlockSize) 58 if got != want { 59 t.Fatalf("got %d, want %d", got, want) 60 } 61} 62 63func cmp(a, b []byte) error { 64 if bytes.Equal(a, b) { 65 return nil 66 } 67 if len(a) != len(b) { 68 return fmt.Errorf("got %d bytes, want %d", len(a), len(b)) 69 } 70 for i := range a { 71 if a[i] != b[i] { 72 return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i]) 73 } 74 } 75 return nil 76} 77 78func roundtrip(b, ebuf, dbuf []byte) error { 79 d, err := Decode(dbuf, Encode(ebuf, b)) 80 if err != nil { 81 return fmt.Errorf("decoding error: %v", err) 82 } 83 if err := cmp(d, b); err != nil { 84 return fmt.Errorf("roundtrip mismatch: %v", err) 85 } 86 return nil 87} 88 89func TestEmpty(t *testing.T) { 90 if err := roundtrip(nil, nil, nil); err != nil { 91 t.Fatal(err) 92 } 93} 94 95func TestSmallCopy(t *testing.T) { 96 for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { 97 for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { 98 for i := 0; i < 32; i++ { 99 s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb" 100 if err := roundtrip([]byte(s), ebuf, dbuf); err != nil { 101 t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err) 102 } 103 } 104 } 105 } 106} 107 108func TestSmallRand(t *testing.T) { 109 rng := rand.New(rand.NewSource(1)) 110 for n := 1; n < 20000; n += 23 { 111 b := make([]byte, n) 112 for i := range b { 113 b[i] = uint8(rng.Intn(256)) 114 } 115 if err := roundtrip(b, nil, nil); err != nil { 116 t.Fatal(err) 117 } 118 } 119} 120 121func TestSmallRegular(t *testing.T) { 122 for n := 1; n < 20000; n += 23 { 123 b := make([]byte, n) 124 for i := range b { 125 b[i] = uint8(i%10 + 'a') 126 } 127 if err := roundtrip(b, nil, nil); err != nil { 128 t.Fatal(err) 129 } 130 } 131} 132 133func TestInvalidVarint(t *testing.T) { 134 testCases := []struct { 135 desc string 136 input string 137 }{{ 138 "invalid varint, final byte has continuation bit set", 139 "\xff", 140 }, { 141 "invalid varint, value overflows uint64", 142 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00", 143 }, { 144 // https://github.com/google/snappy/blob/master/format_description.txt 145 // says that "the stream starts with the uncompressed length [as a 146 // varint] (up to a maximum of 2^32 - 1)". 147 "valid varint (as uint64), but value overflows uint32", 148 "\x80\x80\x80\x80\x10", 149 }} 150 151 for _, tc := range testCases { 152 input := []byte(tc.input) 153 if _, err := DecodedLen(input); err != ErrCorrupt { 154 t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err) 155 } 156 if _, err := Decode(nil, input); err != ErrCorrupt { 157 t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err) 158 } 159 } 160} 161 162func TestDecode(t *testing.T) { 163 lit40Bytes := make([]byte, 40) 164 for i := range lit40Bytes { 165 lit40Bytes[i] = byte(i) 166 } 167 lit40 := string(lit40Bytes) 168 169 testCases := []struct { 170 desc string 171 input string 172 want string 173 wantErr error 174 }{{ 175 `decodedLen=0; valid input`, 176 "\x00", 177 "", 178 nil, 179 }, { 180 `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`, 181 "\x03" + "\x08\xff\xff\xff", 182 "\xff\xff\xff", 183 nil, 184 }, { 185 `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`, 186 "\x02" + "\x08\xff\xff\xff", 187 "", 188 ErrCorrupt, 189 }, { 190 `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`, 191 "\x03" + "\x08\xff\xff", 192 "", 193 ErrCorrupt, 194 }, { 195 `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`, 196 "\x28" + "\x9c" + lit40, 197 lit40, 198 nil, 199 }, { 200 `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`, 201 "\x01" + "\xf0", 202 "", 203 ErrCorrupt, 204 }, { 205 `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`, 206 "\x03" + "\xf0\x02\xff\xff\xff", 207 "\xff\xff\xff", 208 nil, 209 }, { 210 `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`, 211 "\x01" + "\xf4\x00", 212 "", 213 ErrCorrupt, 214 }, { 215 `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`, 216 "\x03" + "\xf4\x02\x00\xff\xff\xff", 217 "\xff\xff\xff", 218 nil, 219 }, { 220 `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`, 221 "\x01" + "\xf8\x00\x00", 222 "", 223 ErrCorrupt, 224 }, { 225 `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`, 226 "\x03" + "\xf8\x02\x00\x00\xff\xff\xff", 227 "\xff\xff\xff", 228 nil, 229 }, { 230 `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`, 231 "\x01" + "\xfc\x00\x00\x00", 232 "", 233 ErrCorrupt, 234 }, { 235 `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`, 236 "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff", 237 "", 238 ErrCorrupt, 239 }, { 240 `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`, 241 "\x04" + "\xfc\x02\x00\x00\x00\xff", 242 "", 243 ErrCorrupt, 244 }, { 245 `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`, 246 "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff", 247 "\xff\xff\xff", 248 nil, 249 }, { 250 `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`, 251 "\x04" + "\x01", 252 "", 253 ErrCorrupt, 254 }, { 255 `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`, 256 "\x04" + "\x02\x00", 257 "", 258 ErrCorrupt, 259 }, { 260 `decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`, 261 "\x04" + "\x03\x00\x00\x00", 262 "", 263 ErrCorrupt, 264 }, { 265 `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`, 266 "\x04" + "\x0cabcd", 267 "abcd", 268 nil, 269 }, { 270 `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`, 271 "\x0d" + "\x0cabcd" + "\x15\x04", 272 "abcdabcdabcda", 273 nil, 274 }, { 275 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`, 276 "\x08" + "\x0cabcd" + "\x01\x04", 277 "abcdabcd", 278 nil, 279 }, { 280 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`, 281 "\x08" + "\x0cabcd" + "\x01\x02", 282 "abcdcdcd", 283 nil, 284 }, { 285 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`, 286 "\x08" + "\x0cabcd" + "\x01\x01", 287 "abcddddd", 288 nil, 289 }, { 290 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`, 291 "\x08" + "\x0cabcd" + "\x01\x00", 292 "", 293 ErrCorrupt, 294 }, { 295 `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`, 296 "\x09" + "\x0cabcd" + "\x01\x04", 297 "", 298 ErrCorrupt, 299 }, { 300 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`, 301 "\x08" + "\x0cabcd" + "\x01\x05", 302 "", 303 ErrCorrupt, 304 }, { 305 `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`, 306 "\x07" + "\x0cabcd" + "\x01\x04", 307 "", 308 ErrCorrupt, 309 }, { 310 `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`, 311 "\x06" + "\x0cabcd" + "\x06\x03\x00", 312 "abcdbc", 313 nil, 314 }, { 315 `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`, 316 "\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00", 317 "abcdbc", 318 nil, 319 }, { 320 `decodedLen=0; tagCopy4, 4 extra length|offset bytes; with msb set (0x93); discovered by go-fuzz`, 321 "\x00\xfc000\x93", 322 "", 323 ErrCorrupt, 324 }} 325 326 const ( 327 // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are 328 // not present in either the input or the output. It is written to dBuf 329 // to check that Decode does not write bytes past the end of 330 // dBuf[:dLen]. 331 // 332 // The magic number 37 was chosen because it is prime. A more 'natural' 333 // number like 32 might lead to a false negative if, for example, a 334 // byte was incorrectly copied 4*8 bytes later. 335 notPresentBase = 0xa0 336 notPresentLen = 37 337 ) 338 339 var dBuf [100]byte 340loop: 341 for i, tc := range testCases { 342 input := []byte(tc.input) 343 for _, x := range input { 344 if notPresentBase <= x && x < notPresentBase+notPresentLen { 345 t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input) 346 continue loop 347 } 348 } 349 350 dLen, n := binary.Uvarint(input) 351 if n <= 0 { 352 t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc) 353 continue 354 } 355 if dLen > uint64(len(dBuf)) { 356 t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen) 357 continue 358 } 359 360 for j := range dBuf { 361 dBuf[j] = byte(notPresentBase + j%notPresentLen) 362 } 363 g, gotErr := Decode(dBuf[:], input) 364 if got := string(g); got != tc.want || gotErr != tc.wantErr { 365 t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v", 366 i, tc.desc, got, gotErr, tc.want, tc.wantErr) 367 continue 368 } 369 for j, x := range dBuf { 370 if uint64(j) < dLen { 371 continue 372 } 373 if w := byte(notPresentBase + j%notPresentLen); x != w { 374 t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x", 375 i, tc.desc, j, x, w, dBuf) 376 continue loop 377 } 378 } 379 } 380} 381 382func TestDecodeCopy4(t *testing.T) { 383 dots := strings.Repeat(".", 65536) 384 385 input := strings.Join([]string{ 386 "\x89\x80\x04", // decodedLen = 65545. 387 "\x0cpqrs", // 4-byte literal "pqrs". 388 "\xf4\xff\xff" + dots, // 65536-byte literal dots. 389 "\x13\x04\x00\x01\x00", // tagCopy4; length=5 offset=65540. 390 }, "") 391 392 gotBytes, err := Decode(nil, []byte(input)) 393 if err != nil { 394 t.Fatal(err) 395 } 396 got := string(gotBytes) 397 want := "pqrs" + dots + "pqrs." 398 if len(got) != len(want) { 399 t.Fatalf("got %d bytes, want %d", len(got), len(want)) 400 } 401 if got != want { 402 for i := 0; i < len(got); i++ { 403 if g, w := got[i], want[i]; g != w { 404 t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w) 405 } 406 } 407 } 408} 409 410// TestDecodeLengthOffset tests decoding an encoding of the form literal + 411// copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB". 412func TestDecodeLengthOffset(t *testing.T) { 413 const ( 414 prefix = "abcdefghijklmnopqr" 415 suffix = "ABCDEFGHIJKLMNOPQR" 416 417 // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are 418 // not present in either the input or the output. It is written to 419 // gotBuf to check that Decode does not write bytes past the end of 420 // gotBuf[:totalLen]. 421 // 422 // The magic number 37 was chosen because it is prime. A more 'natural' 423 // number like 32 might lead to a false negative if, for example, a 424 // byte was incorrectly copied 4*8 bytes later. 425 notPresentBase = 0xa0 426 notPresentLen = 37 427 ) 428 var gotBuf, wantBuf, inputBuf [128]byte 429 for length := 1; length <= 18; length++ { 430 for offset := 1; offset <= 18; offset++ { 431 loop: 432 for suffixLen := 0; suffixLen <= 18; suffixLen++ { 433 totalLen := len(prefix) + length + suffixLen 434 435 inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen)) 436 inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1) 437 inputLen++ 438 inputLen += copy(inputBuf[inputLen:], prefix) 439 inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1) 440 inputBuf[inputLen+1] = byte(offset) 441 inputBuf[inputLen+2] = 0x00 442 inputLen += 3 443 if suffixLen > 0 { 444 inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1) 445 inputLen++ 446 inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen]) 447 } 448 input := inputBuf[:inputLen] 449 450 for i := range gotBuf { 451 gotBuf[i] = byte(notPresentBase + i%notPresentLen) 452 } 453 got, err := Decode(gotBuf[:], input) 454 if err != nil { 455 t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err) 456 continue 457 } 458 459 wantLen := 0 460 wantLen += copy(wantBuf[wantLen:], prefix) 461 for i := 0; i < length; i++ { 462 wantBuf[wantLen] = wantBuf[wantLen-offset] 463 wantLen++ 464 } 465 wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen]) 466 want := wantBuf[:wantLen] 467 468 for _, x := range input { 469 if notPresentBase <= x && x < notPresentBase+notPresentLen { 470 t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x", 471 length, offset, suffixLen, x, input) 472 continue loop 473 } 474 } 475 for i, x := range gotBuf { 476 if i < totalLen { 477 continue 478 } 479 if w := byte(notPresentBase + i%notPresentLen); x != w { 480 t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+ 481 "Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x", 482 length, offset, suffixLen, totalLen, i, x, w, gotBuf) 483 continue loop 484 } 485 } 486 for _, x := range want { 487 if notPresentBase <= x && x < notPresentBase+notPresentLen { 488 t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x", 489 length, offset, suffixLen, x, want) 490 continue loop 491 } 492 } 493 494 if !bytes.Equal(got, want) { 495 t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x", 496 length, offset, suffixLen, input, got, want) 497 continue 498 } 499 } 500 } 501 } 502} 503 504const ( 505 goldenText = "Mark.Twain-Tom.Sawyer.txt" 506 goldenCompressed = goldenText + ".rawsnappy" 507) 508 509func TestDecodeGoldenInput(t *testing.T) { 510 tDir := filepath.FromSlash(*testdataDir) 511 src, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed)) 512 if err != nil { 513 t.Fatalf("ReadFile: %v", err) 514 } 515 got, err := Decode(nil, src) 516 if err != nil { 517 t.Fatalf("Decode: %v", err) 518 } 519 want, err := ioutil.ReadFile(filepath.Join(tDir, goldenText)) 520 if err != nil { 521 t.Fatalf("ReadFile: %v", err) 522 } 523 if err := cmp(got, want); err != nil { 524 t.Fatal(err) 525 } 526} 527 528func TestEncodeGoldenInput(t *testing.T) { 529 tDir := filepath.FromSlash(*testdataDir) 530 src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText)) 531 if err != nil { 532 t.Fatalf("ReadFile: %v", err) 533 } 534 got := Encode(nil, src) 535 want, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed)) 536 if err != nil { 537 t.Fatalf("ReadFile: %v", err) 538 } 539 if err := cmp(got, want); err != nil { 540 t.Fatal(err) 541 } 542} 543 544func TestExtendMatchGoldenInput(t *testing.T) { 545 tDir := filepath.FromSlash(*testdataDir) 546 src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText)) 547 if err != nil { 548 t.Fatalf("ReadFile: %v", err) 549 } 550 for i, tc := range extendMatchGoldenTestCases { 551 got := extendMatch(src, tc.i, tc.j) 552 if got != tc.want { 553 t.Errorf("test #%d: i, j = %5d, %5d: got %5d (= j + %6d), want %5d (= j + %6d)", 554 i, tc.i, tc.j, got, got-tc.j, tc.want, tc.want-tc.j) 555 } 556 } 557} 558 559func TestExtendMatch(t *testing.T) { 560 // ref is a simple, reference implementation of extendMatch. 561 ref := func(src []byte, i, j int) int { 562 for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { 563 } 564 return j 565 } 566 567 nums := []int{0, 1, 2, 7, 8, 9, 29, 30, 31, 32, 33, 34, 38, 39, 40} 568 for yIndex := 40; yIndex > 30; yIndex-- { 569 xxx := bytes.Repeat([]byte("x"), 40) 570 if yIndex < len(xxx) { 571 xxx[yIndex] = 'y' 572 } 573 for _, i := range nums { 574 for _, j := range nums { 575 if i >= j { 576 continue 577 } 578 got := extendMatch(xxx, i, j) 579 want := ref(xxx, i, j) 580 if got != want { 581 t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want) 582 } 583 } 584 } 585 } 586} 587 588const snappytoolCmdName = "cmd/snappytool/snappytool" 589 590func skipTestSameEncodingAsCpp() (msg string) { 591 if !goEncoderShouldMatchCppEncoder { 592 return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH) 593 } 594 if _, err := os.Stat(snappytoolCmdName); err != nil { 595 return fmt.Sprintf("could not find snappytool: %v", err) 596 } 597 return "" 598} 599 600func runTestSameEncodingAsCpp(src []byte) error { 601 got := Encode(nil, src) 602 603 cmd := exec.Command(snappytoolCmdName, "-e") 604 cmd.Stdin = bytes.NewReader(src) 605 want, err := cmd.Output() 606 if err != nil { 607 return fmt.Errorf("could not run snappytool: %v", err) 608 } 609 return cmp(got, want) 610} 611 612func TestSameEncodingAsCppShortCopies(t *testing.T) { 613 if msg := skipTestSameEncodingAsCpp(); msg != "" { 614 t.Skip(msg) 615 } 616 src := bytes.Repeat([]byte{'a'}, 20) 617 for i := 0; i <= len(src); i++ { 618 if err := runTestSameEncodingAsCpp(src[:i]); err != nil { 619 t.Errorf("i=%d: %v", i, err) 620 } 621 } 622} 623 624func TestSameEncodingAsCppLongFiles(t *testing.T) { 625 if msg := skipTestSameEncodingAsCpp(); msg != "" { 626 t.Skip(msg) 627 } 628 bDir := filepath.FromSlash(*benchdataDir) 629 failed := false 630 for i, tf := range testFiles { 631 if err := downloadBenchmarkFiles(t, tf.filename); err != nil { 632 t.Fatalf("failed to download testdata: %s", err) 633 } 634 data := readFile(t, filepath.Join(bDir, tf.filename)) 635 if n := tf.sizeLimit; 0 < n && n < len(data) { 636 data = data[:n] 637 } 638 if err := runTestSameEncodingAsCpp(data); err != nil { 639 t.Errorf("i=%d: %v", i, err) 640 failed = true 641 } 642 } 643 if failed { 644 t.Errorf("was the snappytool program built against the C++ snappy library version " + 645 "d53de187 or later, commited on 2016-04-05? See " + 646 "https://github.com/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc") 647 } 648} 649 650// TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm 651// described in decode_amd64.s and its claim of a 10 byte overrun worst case. 652func TestSlowForwardCopyOverrun(t *testing.T) { 653 const base = 100 654 655 for length := 1; length < 18; length++ { 656 for offset := 1; offset < 18; offset++ { 657 highWaterMark := base 658 d := base 659 l := length 660 o := offset 661 662 // makeOffsetAtLeast8 663 for o < 8 { 664 if end := d + 8; highWaterMark < end { 665 highWaterMark = end 666 } 667 l -= o 668 d += o 669 o += o 670 } 671 672 // fixUpSlowForwardCopy 673 a := d 674 d += l 675 676 // finishSlowForwardCopy 677 for l > 0 { 678 if end := a + 8; highWaterMark < end { 679 highWaterMark = end 680 } 681 a += 8 682 l -= 8 683 } 684 685 dWant := base + length 686 overrun := highWaterMark - dWant 687 if d != dWant || overrun < 0 || 10 < overrun { 688 t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])", 689 length, offset, d, overrun, dWant) 690 } 691 } 692 } 693} 694 695// TestEncodeNoiseThenRepeats encodes input for which the first half is very 696// incompressible and the second half is very compressible. The encoded form's 697// length should be closer to 50% of the original length than 100%. 698func TestEncodeNoiseThenRepeats(t *testing.T) { 699 for _, origLen := range []int{256 * 1024, 2048 * 1024} { 700 src := make([]byte, origLen) 701 rng := rand.New(rand.NewSource(1)) 702 firstHalf, secondHalf := src[:origLen/2], src[origLen/2:] 703 for i := range firstHalf { 704 firstHalf[i] = uint8(rng.Intn(256)) 705 } 706 for i := range secondHalf { 707 secondHalf[i] = uint8(i >> 8) 708 } 709 dst := Encode(nil, src) 710 if got, want := len(dst), origLen*3/4; got >= want { 711 t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want) 712 } 713 } 714} 715 716func TestFramingFormat(t *testing.T) { 717 // src is comprised of alternating 1e5-sized sequences of random 718 // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen 719 // because it is larger than maxBlockSize (64k). 720 src := make([]byte, 1e6) 721 rng := rand.New(rand.NewSource(1)) 722 for i := 0; i < 10; i++ { 723 if i%2 == 0 { 724 for j := 0; j < 1e5; j++ { 725 src[1e5*i+j] = uint8(rng.Intn(256)) 726 } 727 } else { 728 for j := 0; j < 1e5; j++ { 729 src[1e5*i+j] = uint8(i) 730 } 731 } 732 } 733 734 buf := new(bytes.Buffer) 735 if _, err := NewWriter(buf).Write(src); err != nil { 736 t.Fatalf("Write: encoding: %v", err) 737 } 738 dst, err := ioutil.ReadAll(NewReader(buf)) 739 if err != nil { 740 t.Fatalf("ReadAll: decoding: %v", err) 741 } 742 if err := cmp(dst, src); err != nil { 743 t.Fatal(err) 744 } 745} 746 747func TestWriterGoldenOutput(t *testing.T) { 748 buf := new(bytes.Buffer) 749 w := NewBufferedWriter(buf) 750 defer w.Close() 751 w.Write([]byte("abcd")) // Not compressible. 752 w.Flush() 753 w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible. 754 w.Flush() 755 // The next chunk is also compressible, but a naive, greedy encoding of the 756 // overall length 67 copy as a length 64 copy (the longest expressible as a 757 // tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte 758 // tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4 759 // bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2 760 // (of length 60) and a 2-byte tagCopy1 (of length 7). 761 w.Write(bytes.Repeat([]byte{'B'}, 68)) 762 w.Write([]byte("efC")) // Not compressible. 763 w.Write(bytes.Repeat([]byte{'C'}, 20)) // Compressible. 764 w.Write(bytes.Repeat([]byte{'B'}, 20)) // Compressible. 765 w.Write([]byte("g")) // Not compressible. 766 w.Flush() 767 768 got := buf.String() 769 want := strings.Join([]string{ 770 magicChunk, 771 "\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum). 772 "\x68\x10\xe6\xb6", // Checksum. 773 "\x61\x62\x63\x64", // Uncompressed payload: "abcd". 774 "\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum). 775 "\x5f\xeb\xf2\x10", // Checksum. 776 "\x96\x01", // Compressed payload: Uncompressed length (varint encoded): 150. 777 "\x00\x41", // Compressed payload: tagLiteral, length=1, "A". 778 "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1. 779 "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1. 780 "\x52\x01\x00", // Compressed payload: tagCopy2, length=21, offset=1. 781 "\x00\x18\x00\x00", // Compressed chunk, 24 bytes long (including 4 byte checksum). 782 "\x30\x85\x69\xeb", // Checksum. 783 "\x70", // Compressed payload: Uncompressed length (varint encoded): 112. 784 "\x00\x42", // Compressed payload: tagLiteral, length=1, "B". 785 "\xee\x01\x00", // Compressed payload: tagCopy2, length=60, offset=1. 786 "\x0d\x01", // Compressed payload: tagCopy1, length=7, offset=1. 787 "\x08\x65\x66\x43", // Compressed payload: tagLiteral, length=3, "efC". 788 "\x4e\x01\x00", // Compressed payload: tagCopy2, length=20, offset=1. 789 "\x4e\x5a\x00", // Compressed payload: tagCopy2, length=20, offset=90. 790 "\x00\x67", // Compressed payload: tagLiteral, length=1, "g". 791 }, "") 792 if got != want { 793 t.Fatalf("\ngot: % x\nwant: % x", got, want) 794 } 795} 796 797func TestEmitLiteral(t *testing.T) { 798 testCases := []struct { 799 length int 800 want string 801 }{ 802 {1, "\x00"}, 803 {2, "\x04"}, 804 {59, "\xe8"}, 805 {60, "\xec"}, 806 {61, "\xf0\x3c"}, 807 {62, "\xf0\x3d"}, 808 {254, "\xf0\xfd"}, 809 {255, "\xf0\xfe"}, 810 {256, "\xf0\xff"}, 811 {257, "\xf4\x00\x01"}, 812 {65534, "\xf4\xfd\xff"}, 813 {65535, "\xf4\xfe\xff"}, 814 {65536, "\xf4\xff\xff"}, 815 } 816 817 dst := make([]byte, 70000) 818 nines := bytes.Repeat([]byte{0x99}, 65536) 819 for _, tc := range testCases { 820 lit := nines[:tc.length] 821 n := emitLiteral(dst, lit) 822 if !bytes.HasSuffix(dst[:n], lit) { 823 t.Errorf("length=%d: did not end with that many literal bytes", tc.length) 824 continue 825 } 826 got := string(dst[:n-tc.length]) 827 if got != tc.want { 828 t.Errorf("length=%d:\ngot % x\nwant % x", tc.length, got, tc.want) 829 continue 830 } 831 } 832} 833 834func TestEmitCopy(t *testing.T) { 835 testCases := []struct { 836 offset int 837 length int 838 want string 839 }{ 840 {8, 04, "\x01\x08"}, 841 {8, 11, "\x1d\x08"}, 842 {8, 12, "\x2e\x08\x00"}, 843 {8, 13, "\x32\x08\x00"}, 844 {8, 59, "\xea\x08\x00"}, 845 {8, 60, "\xee\x08\x00"}, 846 {8, 61, "\xf2\x08\x00"}, 847 {8, 62, "\xf6\x08\x00"}, 848 {8, 63, "\xfa\x08\x00"}, 849 {8, 64, "\xfe\x08\x00"}, 850 {8, 65, "\xee\x08\x00\x05\x08"}, 851 {8, 66, "\xee\x08\x00\x09\x08"}, 852 {8, 67, "\xee\x08\x00\x0d\x08"}, 853 {8, 68, "\xfe\x08\x00\x01\x08"}, 854 {8, 69, "\xfe\x08\x00\x05\x08"}, 855 {8, 80, "\xfe\x08\x00\x3e\x08\x00"}, 856 857 {256, 04, "\x21\x00"}, 858 {256, 11, "\x3d\x00"}, 859 {256, 12, "\x2e\x00\x01"}, 860 {256, 13, "\x32\x00\x01"}, 861 {256, 59, "\xea\x00\x01"}, 862 {256, 60, "\xee\x00\x01"}, 863 {256, 61, "\xf2\x00\x01"}, 864 {256, 62, "\xf6\x00\x01"}, 865 {256, 63, "\xfa\x00\x01"}, 866 {256, 64, "\xfe\x00\x01"}, 867 {256, 65, "\xee\x00\x01\x25\x00"}, 868 {256, 66, "\xee\x00\x01\x29\x00"}, 869 {256, 67, "\xee\x00\x01\x2d\x00"}, 870 {256, 68, "\xfe\x00\x01\x21\x00"}, 871 {256, 69, "\xfe\x00\x01\x25\x00"}, 872 {256, 80, "\xfe\x00\x01\x3e\x00\x01"}, 873 874 {2048, 04, "\x0e\x00\x08"}, 875 {2048, 11, "\x2a\x00\x08"}, 876 {2048, 12, "\x2e\x00\x08"}, 877 {2048, 13, "\x32\x00\x08"}, 878 {2048, 59, "\xea\x00\x08"}, 879 {2048, 60, "\xee\x00\x08"}, 880 {2048, 61, "\xf2\x00\x08"}, 881 {2048, 62, "\xf6\x00\x08"}, 882 {2048, 63, "\xfa\x00\x08"}, 883 {2048, 64, "\xfe\x00\x08"}, 884 {2048, 65, "\xee\x00\x08\x12\x00\x08"}, 885 {2048, 66, "\xee\x00\x08\x16\x00\x08"}, 886 {2048, 67, "\xee\x00\x08\x1a\x00\x08"}, 887 {2048, 68, "\xfe\x00\x08\x0e\x00\x08"}, 888 {2048, 69, "\xfe\x00\x08\x12\x00\x08"}, 889 {2048, 80, "\xfe\x00\x08\x3e\x00\x08"}, 890 } 891 892 dst := make([]byte, 1024) 893 for _, tc := range testCases { 894 n := emitCopy(dst, tc.offset, tc.length) 895 got := string(dst[:n]) 896 if got != tc.want { 897 t.Errorf("offset=%d, length=%d:\ngot % x\nwant % x", tc.offset, tc.length, got, tc.want) 898 } 899 } 900} 901 902func TestNewBufferedWriter(t *testing.T) { 903 // Test all 32 possible sub-sequences of these 5 input slices. 904 // 905 // Their lengths sum to 400,000, which is over 6 times the Writer ibuf 906 // capacity: 6 * maxBlockSize is 393,216. 907 inputs := [][]byte{ 908 bytes.Repeat([]byte{'a'}, 40000), 909 bytes.Repeat([]byte{'b'}, 150000), 910 bytes.Repeat([]byte{'c'}, 60000), 911 bytes.Repeat([]byte{'d'}, 120000), 912 bytes.Repeat([]byte{'e'}, 30000), 913 } 914loop: 915 for i := 0; i < 1<<uint(len(inputs)); i++ { 916 var want []byte 917 buf := new(bytes.Buffer) 918 w := NewBufferedWriter(buf) 919 for j, input := range inputs { 920 if i&(1<<uint(j)) == 0 { 921 continue 922 } 923 if _, err := w.Write(input); err != nil { 924 t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err) 925 continue loop 926 } 927 want = append(want, input...) 928 } 929 if err := w.Close(); err != nil { 930 t.Errorf("i=%#02x: Close: %v", i, err) 931 continue 932 } 933 got, err := ioutil.ReadAll(NewReader(buf)) 934 if err != nil { 935 t.Errorf("i=%#02x: ReadAll: %v", i, err) 936 continue 937 } 938 if err := cmp(got, want); err != nil { 939 t.Errorf("i=%#02x: %v", i, err) 940 continue 941 } 942 } 943} 944 945func TestFlush(t *testing.T) { 946 buf := new(bytes.Buffer) 947 w := NewBufferedWriter(buf) 948 defer w.Close() 949 if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil { 950 t.Fatalf("Write: %v", err) 951 } 952 if n := buf.Len(); n != 0 { 953 t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n) 954 } 955 if err := w.Flush(); err != nil { 956 t.Fatalf("Flush: %v", err) 957 } 958 if n := buf.Len(); n == 0 { 959 t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n) 960 } 961} 962 963func TestReaderUncompressedDataOK(t *testing.T) { 964 r := NewReader(strings.NewReader(magicChunk + 965 "\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum). 966 "\x68\x10\xe6\xb6" + // Checksum. 967 "\x61\x62\x63\x64", // Uncompressed payload: "abcd". 968 )) 969 g, err := ioutil.ReadAll(r) 970 if err != nil { 971 t.Fatal(err) 972 } 973 if got, want := string(g), "abcd"; got != want { 974 t.Fatalf("got %q, want %q", got, want) 975 } 976} 977 978func TestReaderUncompressedDataNoPayload(t *testing.T) { 979 r := NewReader(strings.NewReader(magicChunk + 980 "\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long. 981 "", // No payload; corrupt input. 982 )) 983 if _, err := ioutil.ReadAll(r); err != ErrCorrupt { 984 t.Fatalf("got %v, want %v", err, ErrCorrupt) 985 } 986} 987 988func TestReaderUncompressedDataTooLong(t *testing.T) { 989 // https://github.com/google/snappy/blob/master/framing_format.txt section 990 // 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004. 991 const n = 0x10005 992 993 r := NewReader(strings.NewReader(magicChunk + 994 "\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long. 995 strings.Repeat("\x00", n), 996 )) 997 if _, err := ioutil.ReadAll(r); err != ErrCorrupt { 998 t.Fatalf("got %v, want %v", err, ErrCorrupt) 999 } 1000} 1001 1002func TestReaderReset(t *testing.T) { 1003 gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000) 1004 buf := new(bytes.Buffer) 1005 if _, err := NewWriter(buf).Write(gold); err != nil { 1006 t.Fatalf("Write: %v", err) 1007 } 1008 encoded, invalid, partial := buf.String(), "invalid", "partial" 1009 r := NewReader(nil) 1010 for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} { 1011 if s == partial { 1012 r.Reset(strings.NewReader(encoded)) 1013 if _, err := r.Read(make([]byte, 101)); err != nil { 1014 t.Errorf("#%d: %v", i, err) 1015 continue 1016 } 1017 continue 1018 } 1019 r.Reset(strings.NewReader(s)) 1020 got, err := ioutil.ReadAll(r) 1021 switch s { 1022 case encoded: 1023 if err != nil { 1024 t.Errorf("#%d: %v", i, err) 1025 continue 1026 } 1027 if err := cmp(got, gold); err != nil { 1028 t.Errorf("#%d: %v", i, err) 1029 continue 1030 } 1031 case invalid: 1032 if err == nil { 1033 t.Errorf("#%d: got nil error, want non-nil", i) 1034 continue 1035 } 1036 } 1037 } 1038} 1039 1040func TestWriterReset(t *testing.T) { 1041 gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000) 1042 const n = 20 1043 for _, buffered := range []bool{false, true} { 1044 var w *Writer 1045 if buffered { 1046 w = NewBufferedWriter(nil) 1047 defer w.Close() 1048 } else { 1049 w = NewWriter(nil) 1050 } 1051 1052 var gots, wants [][]byte 1053 failed := false 1054 for i := 0; i <= n; i++ { 1055 buf := new(bytes.Buffer) 1056 w.Reset(buf) 1057 want := gold[:len(gold)*i/n] 1058 if _, err := w.Write(want); err != nil { 1059 t.Errorf("#%d: Write: %v", i, err) 1060 failed = true 1061 continue 1062 } 1063 if buffered { 1064 if err := w.Flush(); err != nil { 1065 t.Errorf("#%d: Flush: %v", i, err) 1066 failed = true 1067 continue 1068 } 1069 } 1070 got, err := ioutil.ReadAll(NewReader(buf)) 1071 if err != nil { 1072 t.Errorf("#%d: ReadAll: %v", i, err) 1073 failed = true 1074 continue 1075 } 1076 gots = append(gots, got) 1077 wants = append(wants, want) 1078 } 1079 if failed { 1080 continue 1081 } 1082 for i := range gots { 1083 if err := cmp(gots[i], wants[i]); err != nil { 1084 t.Errorf("#%d: %v", i, err) 1085 } 1086 } 1087 } 1088} 1089 1090func TestWriterResetWithoutFlush(t *testing.T) { 1091 buf0 := new(bytes.Buffer) 1092 buf1 := new(bytes.Buffer) 1093 w := NewBufferedWriter(buf0) 1094 if _, err := w.Write([]byte("xxx")); err != nil { 1095 t.Fatalf("Write #0: %v", err) 1096 } 1097 // Note that we don't Flush the Writer before calling Reset. 1098 w.Reset(buf1) 1099 if _, err := w.Write([]byte("yyy")); err != nil { 1100 t.Fatalf("Write #1: %v", err) 1101 } 1102 if err := w.Flush(); err != nil { 1103 t.Fatalf("Flush: %v", err) 1104 } 1105 got, err := ioutil.ReadAll(NewReader(buf1)) 1106 if err != nil { 1107 t.Fatalf("ReadAll: %v", err) 1108 } 1109 if err := cmp(got, []byte("yyy")); err != nil { 1110 t.Fatal(err) 1111 } 1112} 1113 1114type writeCounter int 1115 1116func (c *writeCounter) Write(p []byte) (int, error) { 1117 *c++ 1118 return len(p), nil 1119} 1120 1121// TestNumUnderlyingWrites tests that each Writer flush only makes one or two 1122// Write calls on its underlying io.Writer, depending on whether or not the 1123// flushed buffer was compressible. 1124func TestNumUnderlyingWrites(t *testing.T) { 1125 testCases := []struct { 1126 input []byte 1127 want int 1128 }{ 1129 {bytes.Repeat([]byte{'x'}, 100), 1}, 1130 {bytes.Repeat([]byte{'y'}, 100), 1}, 1131 {[]byte("ABCDEFGHIJKLMNOPQRST"), 2}, 1132 } 1133 1134 var c writeCounter 1135 w := NewBufferedWriter(&c) 1136 defer w.Close() 1137 for i, tc := range testCases { 1138 c = 0 1139 if _, err := w.Write(tc.input); err != nil { 1140 t.Errorf("#%d: Write: %v", i, err) 1141 continue 1142 } 1143 if err := w.Flush(); err != nil { 1144 t.Errorf("#%d: Flush: %v", i, err) 1145 continue 1146 } 1147 if int(c) != tc.want { 1148 t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want) 1149 continue 1150 } 1151 } 1152} 1153 1154func benchDecode(b *testing.B, src []byte) { 1155 encoded := Encode(nil, src) 1156 // Bandwidth is in amount of uncompressed data. 1157 b.SetBytes(int64(len(src))) 1158 b.ResetTimer() 1159 for i := 0; i < b.N; i++ { 1160 Decode(src, encoded) 1161 } 1162} 1163 1164func benchEncode(b *testing.B, src []byte) { 1165 // Bandwidth is in amount of uncompressed data. 1166 b.SetBytes(int64(len(src))) 1167 dst := make([]byte, MaxEncodedLen(len(src))) 1168 b.ResetTimer() 1169 for i := 0; i < b.N; i++ { 1170 Encode(dst, src) 1171 } 1172} 1173 1174func testOrBenchmark(b testing.TB) string { 1175 if _, ok := b.(*testing.B); ok { 1176 return "benchmark" 1177 } 1178 return "test" 1179} 1180 1181func readFile(b testing.TB, filename string) []byte { 1182 src, err := ioutil.ReadFile(filename) 1183 if err != nil { 1184 b.Skipf("skipping %s: %v", testOrBenchmark(b), err) 1185 } 1186 if len(src) == 0 { 1187 b.Fatalf("%s has zero length", filename) 1188 } 1189 return src 1190} 1191 1192// expand returns a slice of length n containing repeated copies of src. 1193func expand(src []byte, n int) []byte { 1194 dst := make([]byte, n) 1195 for x := dst; len(x) > 0; { 1196 i := copy(x, src) 1197 x = x[i:] 1198 } 1199 return dst 1200} 1201 1202func benchWords(b *testing.B, n int, decode bool) { 1203 // Note: the file is OS-language dependent so the resulting values are not 1204 // directly comparable for non-US-English OS installations. 1205 data := expand(readFile(b, "/usr/share/dict/words"), n) 1206 if decode { 1207 benchDecode(b, data) 1208 } else { 1209 benchEncode(b, data) 1210 } 1211} 1212 1213func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) } 1214func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) } 1215func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) } 1216func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) } 1217func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) } 1218func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) } 1219func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) } 1220func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) } 1221func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) } 1222func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) } 1223func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) } 1224func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) } 1225 1226func BenchmarkRandomEncode(b *testing.B) { 1227 rng := rand.New(rand.NewSource(1)) 1228 data := make([]byte, 1<<20) 1229 for i := range data { 1230 data[i] = uint8(rng.Intn(256)) 1231 } 1232 benchEncode(b, data) 1233} 1234 1235// testFiles' values are copied directly from 1236// https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc 1237// The label field is unused in snappy-go. 1238var testFiles = []struct { 1239 label string 1240 filename string 1241 sizeLimit int 1242}{ 1243 {"html", "html", 0}, 1244 {"urls", "urls.10K", 0}, 1245 {"jpg", "fireworks.jpeg", 0}, 1246 {"jpg_200", "fireworks.jpeg", 200}, 1247 {"pdf", "paper-100k.pdf", 0}, 1248 {"html4", "html_x_4", 0}, 1249 {"txt1", "alice29.txt", 0}, 1250 {"txt2", "asyoulik.txt", 0}, 1251 {"txt3", "lcet10.txt", 0}, 1252 {"txt4", "plrabn12.txt", 0}, 1253 {"pb", "geo.protodata", 0}, 1254 {"gaviota", "kppkn.gtb", 0}, 1255} 1256 1257const ( 1258 // The benchmark data files are at this canonical URL. 1259 benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/" 1260) 1261 1262func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) { 1263 bDir := filepath.FromSlash(*benchdataDir) 1264 filename := filepath.Join(bDir, basename) 1265 if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 { 1266 return nil 1267 } 1268 1269 if !*download { 1270 b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b)) 1271 } 1272 // Download the official snappy C++ implementation reference test data 1273 // files for benchmarking. 1274 if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) { 1275 return fmt.Errorf("failed to create %s: %s", bDir, err) 1276 } 1277 1278 f, err := os.Create(filename) 1279 if err != nil { 1280 return fmt.Errorf("failed to create %s: %s", filename, err) 1281 } 1282 defer f.Close() 1283 defer func() { 1284 if errRet != nil { 1285 os.Remove(filename) 1286 } 1287 }() 1288 url := benchURL + basename 1289 resp, err := http.Get(url) 1290 if err != nil { 1291 return fmt.Errorf("failed to download %s: %s", url, err) 1292 } 1293 defer resp.Body.Close() 1294 if s := resp.StatusCode; s != http.StatusOK { 1295 return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s)) 1296 } 1297 _, err = io.Copy(f, resp.Body) 1298 if err != nil { 1299 return fmt.Errorf("failed to download %s to %s: %s", url, filename, err) 1300 } 1301 return nil 1302} 1303 1304func benchFile(b *testing.B, i int, decode bool) { 1305 if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil { 1306 b.Fatalf("failed to download testdata: %s", err) 1307 } 1308 bDir := filepath.FromSlash(*benchdataDir) 1309 data := readFile(b, filepath.Join(bDir, testFiles[i].filename)) 1310 if n := testFiles[i].sizeLimit; 0 < n && n < len(data) { 1311 data = data[:n] 1312 } 1313 if decode { 1314 benchDecode(b, data) 1315 } else { 1316 benchEncode(b, data) 1317 } 1318} 1319 1320// Naming convention is kept similar to what snappy's C++ implementation uses. 1321func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) } 1322func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) } 1323func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) } 1324func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) } 1325func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) } 1326func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) } 1327func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) } 1328func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) } 1329func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) } 1330func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) } 1331func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) } 1332func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) } 1333func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) } 1334func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) } 1335func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) } 1336func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) } 1337func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) } 1338func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) } 1339func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) } 1340func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) } 1341func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) } 1342func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) } 1343func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) } 1344func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) } 1345 1346func BenchmarkExtendMatch(b *testing.B) { 1347 tDir := filepath.FromSlash(*testdataDir) 1348 src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText)) 1349 if err != nil { 1350 b.Fatalf("ReadFile: %v", err) 1351 } 1352 b.ResetTimer() 1353 for i := 0; i < b.N; i++ { 1354 for _, tc := range extendMatchGoldenTestCases { 1355 extendMatch(src, tc.i, tc.j) 1356 } 1357 } 1358} 1359