1// Copyright 2011 The Snappy-Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package snappy
6
7import (
8	"bytes"
9	"encoding/binary"
10	"flag"
11	"fmt"
12	"io"
13	"io/ioutil"
14	"math/rand"
15	"net/http"
16	"os"
17	"os/exec"
18	"path/filepath"
19	"runtime"
20	"strings"
21	"testing"
22)
23
24var (
25	download     = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
26	testdataDir  = flag.String("testdataDir", "testdata", "Directory containing the test data")
27	benchdataDir = flag.String("benchdataDir", "testdata/bench", "Directory containing the benchmark data")
28)
29
30// goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by
31// Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on
32// this GOARCH. There is more than one valid encoding of any given input, and
33// there is more than one good algorithm along the frontier of trading off
34// throughput for output size. Nonetheless, we presume that the C++ encoder's
35// algorithm is a good one and has been tested on a wide range of inputs, so
36// matching that exactly should mean that the Go encoder's algorithm is also
37// good, without needing to gather our own corpus of test data.
38//
39// The exact algorithm used by the C++ code is potentially endian dependent, as
40// it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes
41// at a time. The Go implementation is endian agnostic, in that its output is
42// the same (as little-endian C++ code), regardless of the CPU's endianness.
43//
44// Thus, when comparing Go's output to C++ output generated beforehand, such as
45// the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little-
46// endian system, we can run that test regardless of the runtime.GOARCH value.
47//
48// When comparing Go's output to dynamically generated C++ output, i.e. the
49// result of fork/exec'ing a C++ program, we can run that test only on
50// little-endian systems, because the C++ output might be different on
51// big-endian systems. The runtime package doesn't export endianness per se,
52// but we can restrict this match-C++ test to common little-endian systems.
53const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm"
54
55func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
56	got := maxEncodedLenOfMaxBlockSize
57	want := MaxEncodedLen(maxBlockSize)
58	if got != want {
59		t.Fatalf("got %d, want %d", got, want)
60	}
61}
62
63func cmp(a, b []byte) error {
64	if bytes.Equal(a, b) {
65		return nil
66	}
67	if len(a) != len(b) {
68		return fmt.Errorf("got %d bytes, want %d", len(a), len(b))
69	}
70	for i := range a {
71		if a[i] != b[i] {
72			return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i])
73		}
74	}
75	return nil
76}
77
78func roundtrip(b, ebuf, dbuf []byte) error {
79	d, err := Decode(dbuf, Encode(ebuf, b))
80	if err != nil {
81		return fmt.Errorf("decoding error: %v", err)
82	}
83	if err := cmp(d, b); err != nil {
84		return fmt.Errorf("roundtrip mismatch: %v", err)
85	}
86	return nil
87}
88
89func TestEmpty(t *testing.T) {
90	if err := roundtrip(nil, nil, nil); err != nil {
91		t.Fatal(err)
92	}
93}
94
95func TestSmallCopy(t *testing.T) {
96	for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
97		for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} {
98			for i := 0; i < 32; i++ {
99				s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb"
100				if err := roundtrip([]byte(s), ebuf, dbuf); err != nil {
101					t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err)
102				}
103			}
104		}
105	}
106}
107
108func TestSmallRand(t *testing.T) {
109	rng := rand.New(rand.NewSource(1))
110	for n := 1; n < 20000; n += 23 {
111		b := make([]byte, n)
112		for i := range b {
113			b[i] = uint8(rng.Intn(256))
114		}
115		if err := roundtrip(b, nil, nil); err != nil {
116			t.Fatal(err)
117		}
118	}
119}
120
121func TestSmallRegular(t *testing.T) {
122	for n := 1; n < 20000; n += 23 {
123		b := make([]byte, n)
124		for i := range b {
125			b[i] = uint8(i%10 + 'a')
126		}
127		if err := roundtrip(b, nil, nil); err != nil {
128			t.Fatal(err)
129		}
130	}
131}
132
133func TestInvalidVarint(t *testing.T) {
134	testCases := []struct {
135		desc  string
136		input string
137	}{{
138		"invalid varint, final byte has continuation bit set",
139		"\xff",
140	}, {
141		"invalid varint, value overflows uint64",
142		"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00",
143	}, {
144		// https://github.com/google/snappy/blob/master/format_description.txt
145		// says that "the stream starts with the uncompressed length [as a
146		// varint] (up to a maximum of 2^32 - 1)".
147		"valid varint (as uint64), but value overflows uint32",
148		"\x80\x80\x80\x80\x10",
149	}}
150
151	for _, tc := range testCases {
152		input := []byte(tc.input)
153		if _, err := DecodedLen(input); err != ErrCorrupt {
154			t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err)
155		}
156		if _, err := Decode(nil, input); err != ErrCorrupt {
157			t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err)
158		}
159	}
160}
161
162func TestDecode(t *testing.T) {
163	lit40Bytes := make([]byte, 40)
164	for i := range lit40Bytes {
165		lit40Bytes[i] = byte(i)
166	}
167	lit40 := string(lit40Bytes)
168
169	testCases := []struct {
170		desc    string
171		input   string
172		want    string
173		wantErr error
174	}{{
175		`decodedLen=0; valid input`,
176		"\x00",
177		"",
178		nil,
179	}, {
180		`decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`,
181		"\x03" + "\x08\xff\xff\xff",
182		"\xff\xff\xff",
183		nil,
184	}, {
185		`decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`,
186		"\x02" + "\x08\xff\xff\xff",
187		"",
188		ErrCorrupt,
189	}, {
190		`decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`,
191		"\x03" + "\x08\xff\xff",
192		"",
193		ErrCorrupt,
194	}, {
195		`decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`,
196		"\x28" + "\x9c" + lit40,
197		lit40,
198		nil,
199	}, {
200		`decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`,
201		"\x01" + "\xf0",
202		"",
203		ErrCorrupt,
204	}, {
205		`decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`,
206		"\x03" + "\xf0\x02\xff\xff\xff",
207		"\xff\xff\xff",
208		nil,
209	}, {
210		`decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`,
211		"\x01" + "\xf4\x00",
212		"",
213		ErrCorrupt,
214	}, {
215		`decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`,
216		"\x03" + "\xf4\x02\x00\xff\xff\xff",
217		"\xff\xff\xff",
218		nil,
219	}, {
220		`decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`,
221		"\x01" + "\xf8\x00\x00",
222		"",
223		ErrCorrupt,
224	}, {
225		`decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`,
226		"\x03" + "\xf8\x02\x00\x00\xff\xff\xff",
227		"\xff\xff\xff",
228		nil,
229	}, {
230		`decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`,
231		"\x01" + "\xfc\x00\x00\x00",
232		"",
233		ErrCorrupt,
234	}, {
235		`decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`,
236		"\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
237		"",
238		ErrCorrupt,
239	}, {
240		`decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`,
241		"\x04" + "\xfc\x02\x00\x00\x00\xff",
242		"",
243		ErrCorrupt,
244	}, {
245		`decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`,
246		"\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff",
247		"\xff\xff\xff",
248		nil,
249	}, {
250		`decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`,
251		"\x04" + "\x01",
252		"",
253		ErrCorrupt,
254	}, {
255		`decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`,
256		"\x04" + "\x02\x00",
257		"",
258		ErrCorrupt,
259	}, {
260		`decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`,
261		"\x04" + "\x03\x00\x00\x00",
262		"",
263		ErrCorrupt,
264	}, {
265		`decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
266		"\x04" + "\x0cabcd",
267		"abcd",
268		nil,
269	}, {
270		`decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`,
271		"\x0d" + "\x0cabcd" + "\x15\x04",
272		"abcdabcdabcda",
273		nil,
274	}, {
275		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`,
276		"\x08" + "\x0cabcd" + "\x01\x04",
277		"abcdabcd",
278		nil,
279	}, {
280		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`,
281		"\x08" + "\x0cabcd" + "\x01\x02",
282		"abcdcdcd",
283		nil,
284	}, {
285		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`,
286		"\x08" + "\x0cabcd" + "\x01\x01",
287		"abcddddd",
288		nil,
289	}, {
290		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`,
291		"\x08" + "\x0cabcd" + "\x01\x00",
292		"",
293		ErrCorrupt,
294	}, {
295		`decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`,
296		"\x09" + "\x0cabcd" + "\x01\x04",
297		"",
298		ErrCorrupt,
299	}, {
300		`decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`,
301		"\x08" + "\x0cabcd" + "\x01\x05",
302		"",
303		ErrCorrupt,
304	}, {
305		`decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`,
306		"\x07" + "\x0cabcd" + "\x01\x04",
307		"",
308		ErrCorrupt,
309	}, {
310		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`,
311		"\x06" + "\x0cabcd" + "\x06\x03\x00",
312		"abcdbc",
313		nil,
314	}, {
315		`decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`,
316		"\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
317		"abcdbc",
318		nil,
319	}, {
320		`decodedLen=0; tagCopy4, 4 extra length|offset bytes; with msb set (0x93); discovered by go-fuzz`,
321		"\x00\xfc000\x93",
322		"",
323		ErrCorrupt,
324	}}
325
326	const (
327		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
328		// not present in either the input or the output. It is written to dBuf
329		// to check that Decode does not write bytes past the end of
330		// dBuf[:dLen].
331		//
332		// The magic number 37 was chosen because it is prime. A more 'natural'
333		// number like 32 might lead to a false negative if, for example, a
334		// byte was incorrectly copied 4*8 bytes later.
335		notPresentBase = 0xa0
336		notPresentLen  = 37
337	)
338
339	var dBuf [100]byte
340loop:
341	for i, tc := range testCases {
342		input := []byte(tc.input)
343		for _, x := range input {
344			if notPresentBase <= x && x < notPresentBase+notPresentLen {
345				t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input)
346				continue loop
347			}
348		}
349
350		dLen, n := binary.Uvarint(input)
351		if n <= 0 {
352			t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc)
353			continue
354		}
355		if dLen > uint64(len(dBuf)) {
356			t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen)
357			continue
358		}
359
360		for j := range dBuf {
361			dBuf[j] = byte(notPresentBase + j%notPresentLen)
362		}
363		g, gotErr := Decode(dBuf[:], input)
364		if got := string(g); got != tc.want || gotErr != tc.wantErr {
365			t.Errorf("#%d (%s):\ngot  %q, %v\nwant %q, %v",
366				i, tc.desc, got, gotErr, tc.want, tc.wantErr)
367			continue
368		}
369		for j, x := range dBuf {
370			if uint64(j) < dLen {
371				continue
372			}
373			if w := byte(notPresentBase + j%notPresentLen); x != w {
374				t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x",
375					i, tc.desc, j, x, w, dBuf)
376				continue loop
377			}
378		}
379	}
380}
381
382func TestDecodeCopy4(t *testing.T) {
383	dots := strings.Repeat(".", 65536)
384
385	input := strings.Join([]string{
386		"\x89\x80\x04",         // decodedLen = 65545.
387		"\x0cpqrs",             // 4-byte literal "pqrs".
388		"\xf4\xff\xff" + dots,  // 65536-byte literal dots.
389		"\x13\x04\x00\x01\x00", // tagCopy4; length=5 offset=65540.
390	}, "")
391
392	gotBytes, err := Decode(nil, []byte(input))
393	if err != nil {
394		t.Fatal(err)
395	}
396	got := string(gotBytes)
397	want := "pqrs" + dots + "pqrs."
398	if len(got) != len(want) {
399		t.Fatalf("got %d bytes, want %d", len(got), len(want))
400	}
401	if got != want {
402		for i := 0; i < len(got); i++ {
403			if g, w := got[i], want[i]; g != w {
404				t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w)
405			}
406		}
407	}
408}
409
410// TestDecodeLengthOffset tests decoding an encoding of the form literal +
411// copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
412func TestDecodeLengthOffset(t *testing.T) {
413	const (
414		prefix = "abcdefghijklmnopqr"
415		suffix = "ABCDEFGHIJKLMNOPQR"
416
417		// notPresentXxx defines a range of byte values [0xa0, 0xc5) that are
418		// not present in either the input or the output. It is written to
419		// gotBuf to check that Decode does not write bytes past the end of
420		// gotBuf[:totalLen].
421		//
422		// The magic number 37 was chosen because it is prime. A more 'natural'
423		// number like 32 might lead to a false negative if, for example, a
424		// byte was incorrectly copied 4*8 bytes later.
425		notPresentBase = 0xa0
426		notPresentLen  = 37
427	)
428	var gotBuf, wantBuf, inputBuf [128]byte
429	for length := 1; length <= 18; length++ {
430		for offset := 1; offset <= 18; offset++ {
431		loop:
432			for suffixLen := 0; suffixLen <= 18; suffixLen++ {
433				totalLen := len(prefix) + length + suffixLen
434
435				inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen))
436				inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1)
437				inputLen++
438				inputLen += copy(inputBuf[inputLen:], prefix)
439				inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1)
440				inputBuf[inputLen+1] = byte(offset)
441				inputBuf[inputLen+2] = 0x00
442				inputLen += 3
443				if suffixLen > 0 {
444					inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1)
445					inputLen++
446					inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen])
447				}
448				input := inputBuf[:inputLen]
449
450				for i := range gotBuf {
451					gotBuf[i] = byte(notPresentBase + i%notPresentLen)
452				}
453				got, err := Decode(gotBuf[:], input)
454				if err != nil {
455					t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err)
456					continue
457				}
458
459				wantLen := 0
460				wantLen += copy(wantBuf[wantLen:], prefix)
461				for i := 0; i < length; i++ {
462					wantBuf[wantLen] = wantBuf[wantLen-offset]
463					wantLen++
464				}
465				wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen])
466				want := wantBuf[:wantLen]
467
468				for _, x := range input {
469					if notPresentBase <= x && x < notPresentBase+notPresentLen {
470						t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x",
471							length, offset, suffixLen, x, input)
472						continue loop
473					}
474				}
475				for i, x := range gotBuf {
476					if i < totalLen {
477						continue
478					}
479					if w := byte(notPresentBase + i%notPresentLen); x != w {
480						t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+
481							"Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x",
482							length, offset, suffixLen, totalLen, i, x, w, gotBuf)
483						continue loop
484					}
485				}
486				for _, x := range want {
487					if notPresentBase <= x && x < notPresentBase+notPresentLen {
488						t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x",
489							length, offset, suffixLen, x, want)
490						continue loop
491					}
492				}
493
494				if !bytes.Equal(got, want) {
495					t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot   % x\nwant  % x",
496						length, offset, suffixLen, input, got, want)
497					continue
498				}
499			}
500		}
501	}
502}
503
504const (
505	goldenText       = "Mark.Twain-Tom.Sawyer.txt"
506	goldenCompressed = goldenText + ".rawsnappy"
507)
508
509func TestDecodeGoldenInput(t *testing.T) {
510	tDir := filepath.FromSlash(*testdataDir)
511	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
512	if err != nil {
513		t.Fatalf("ReadFile: %v", err)
514	}
515	got, err := Decode(nil, src)
516	if err != nil {
517		t.Fatalf("Decode: %v", err)
518	}
519	want, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
520	if err != nil {
521		t.Fatalf("ReadFile: %v", err)
522	}
523	if err := cmp(got, want); err != nil {
524		t.Fatal(err)
525	}
526}
527
528func TestEncodeGoldenInput(t *testing.T) {
529	tDir := filepath.FromSlash(*testdataDir)
530	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
531	if err != nil {
532		t.Fatalf("ReadFile: %v", err)
533	}
534	got := Encode(nil, src)
535	want, err := ioutil.ReadFile(filepath.Join(tDir, goldenCompressed))
536	if err != nil {
537		t.Fatalf("ReadFile: %v", err)
538	}
539	if err := cmp(got, want); err != nil {
540		t.Fatal(err)
541	}
542}
543
544func TestExtendMatchGoldenInput(t *testing.T) {
545	tDir := filepath.FromSlash(*testdataDir)
546	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
547	if err != nil {
548		t.Fatalf("ReadFile: %v", err)
549	}
550	for i, tc := range extendMatchGoldenTestCases {
551		got := extendMatch(src, tc.i, tc.j)
552		if got != tc.want {
553			t.Errorf("test #%d: i, j = %5d, %5d: got %5d (= j + %6d), want %5d (= j + %6d)",
554				i, tc.i, tc.j, got, got-tc.j, tc.want, tc.want-tc.j)
555		}
556	}
557}
558
559func TestExtendMatch(t *testing.T) {
560	// ref is a simple, reference implementation of extendMatch.
561	ref := func(src []byte, i, j int) int {
562		for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
563		}
564		return j
565	}
566
567	nums := []int{0, 1, 2, 7, 8, 9, 29, 30, 31, 32, 33, 34, 38, 39, 40}
568	for yIndex := 40; yIndex > 30; yIndex-- {
569		xxx := bytes.Repeat([]byte("x"), 40)
570		if yIndex < len(xxx) {
571			xxx[yIndex] = 'y'
572		}
573		for _, i := range nums {
574			for _, j := range nums {
575				if i >= j {
576					continue
577				}
578				got := extendMatch(xxx, i, j)
579				want := ref(xxx, i, j)
580				if got != want {
581					t.Errorf("yIndex=%d, i=%d, j=%d: got %d, want %d", yIndex, i, j, got, want)
582				}
583			}
584		}
585	}
586}
587
588const snappytoolCmdName = "cmd/snappytool/snappytool"
589
590func skipTestSameEncodingAsCpp() (msg string) {
591	if !goEncoderShouldMatchCppEncoder {
592		return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
593	}
594	if _, err := os.Stat(snappytoolCmdName); err != nil {
595		return fmt.Sprintf("could not find snappytool: %v", err)
596	}
597	return ""
598}
599
600func runTestSameEncodingAsCpp(src []byte) error {
601	got := Encode(nil, src)
602
603	cmd := exec.Command(snappytoolCmdName, "-e")
604	cmd.Stdin = bytes.NewReader(src)
605	want, err := cmd.Output()
606	if err != nil {
607		return fmt.Errorf("could not run snappytool: %v", err)
608	}
609	return cmp(got, want)
610}
611
612func TestSameEncodingAsCppShortCopies(t *testing.T) {
613	if msg := skipTestSameEncodingAsCpp(); msg != "" {
614		t.Skip(msg)
615	}
616	src := bytes.Repeat([]byte{'a'}, 20)
617	for i := 0; i <= len(src); i++ {
618		if err := runTestSameEncodingAsCpp(src[:i]); err != nil {
619			t.Errorf("i=%d: %v", i, err)
620		}
621	}
622}
623
624func TestSameEncodingAsCppLongFiles(t *testing.T) {
625	if msg := skipTestSameEncodingAsCpp(); msg != "" {
626		t.Skip(msg)
627	}
628	bDir := filepath.FromSlash(*benchdataDir)
629	failed := false
630	for i, tf := range testFiles {
631		if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
632			t.Fatalf("failed to download testdata: %s", err)
633		}
634		data := readFile(t, filepath.Join(bDir, tf.filename))
635		if n := tf.sizeLimit; 0 < n && n < len(data) {
636			data = data[:n]
637		}
638		if err := runTestSameEncodingAsCpp(data); err != nil {
639			t.Errorf("i=%d: %v", i, err)
640			failed = true
641		}
642	}
643	if failed {
644		t.Errorf("was the snappytool program built against the C++ snappy library version " +
645			"d53de187 or later, commited on 2016-04-05? See " +
646			"https://github.com/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc")
647	}
648}
649
650// TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
651// described in decode_amd64.s and its claim of a 10 byte overrun worst case.
652func TestSlowForwardCopyOverrun(t *testing.T) {
653	const base = 100
654
655	for length := 1; length < 18; length++ {
656		for offset := 1; offset < 18; offset++ {
657			highWaterMark := base
658			d := base
659			l := length
660			o := offset
661
662			// makeOffsetAtLeast8
663			for o < 8 {
664				if end := d + 8; highWaterMark < end {
665					highWaterMark = end
666				}
667				l -= o
668				d += o
669				o += o
670			}
671
672			// fixUpSlowForwardCopy
673			a := d
674			d += l
675
676			// finishSlowForwardCopy
677			for l > 0 {
678				if end := a + 8; highWaterMark < end {
679					highWaterMark = end
680				}
681				a += 8
682				l -= 8
683			}
684
685			dWant := base + length
686			overrun := highWaterMark - dWant
687			if d != dWant || overrun < 0 || 10 < overrun {
688				t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])",
689					length, offset, d, overrun, dWant)
690			}
691		}
692	}
693}
694
695// TestEncodeNoiseThenRepeats encodes input for which the first half is very
696// incompressible and the second half is very compressible. The encoded form's
697// length should be closer to 50% of the original length than 100%.
698func TestEncodeNoiseThenRepeats(t *testing.T) {
699	for _, origLen := range []int{256 * 1024, 2048 * 1024} {
700		src := make([]byte, origLen)
701		rng := rand.New(rand.NewSource(1))
702		firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
703		for i := range firstHalf {
704			firstHalf[i] = uint8(rng.Intn(256))
705		}
706		for i := range secondHalf {
707			secondHalf[i] = uint8(i >> 8)
708		}
709		dst := Encode(nil, src)
710		if got, want := len(dst), origLen*3/4; got >= want {
711			t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want)
712		}
713	}
714}
715
716func TestFramingFormat(t *testing.T) {
717	// src is comprised of alternating 1e5-sized sequences of random
718	// (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen
719	// because it is larger than maxBlockSize (64k).
720	src := make([]byte, 1e6)
721	rng := rand.New(rand.NewSource(1))
722	for i := 0; i < 10; i++ {
723		if i%2 == 0 {
724			for j := 0; j < 1e5; j++ {
725				src[1e5*i+j] = uint8(rng.Intn(256))
726			}
727		} else {
728			for j := 0; j < 1e5; j++ {
729				src[1e5*i+j] = uint8(i)
730			}
731		}
732	}
733
734	buf := new(bytes.Buffer)
735	if _, err := NewWriter(buf).Write(src); err != nil {
736		t.Fatalf("Write: encoding: %v", err)
737	}
738	dst, err := ioutil.ReadAll(NewReader(buf))
739	if err != nil {
740		t.Fatalf("ReadAll: decoding: %v", err)
741	}
742	if err := cmp(dst, src); err != nil {
743		t.Fatal(err)
744	}
745}
746
747func TestWriterGoldenOutput(t *testing.T) {
748	buf := new(bytes.Buffer)
749	w := NewBufferedWriter(buf)
750	defer w.Close()
751	w.Write([]byte("abcd")) // Not compressible.
752	w.Flush()
753	w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible.
754	w.Flush()
755	// The next chunk is also compressible, but a naive, greedy encoding of the
756	// overall length 67 copy as a length 64 copy (the longest expressible as a
757	// tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte
758	// tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4
759	// bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2
760	// (of length 60) and a 2-byte tagCopy1 (of length 7).
761	w.Write(bytes.Repeat([]byte{'B'}, 68))
762	w.Write([]byte("efC"))                 // Not compressible.
763	w.Write(bytes.Repeat([]byte{'C'}, 20)) // Compressible.
764	w.Write(bytes.Repeat([]byte{'B'}, 20)) // Compressible.
765	w.Write([]byte("g"))                   // Not compressible.
766	w.Flush()
767
768	got := buf.String()
769	want := strings.Join([]string{
770		magicChunk,
771		"\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
772		"\x68\x10\xe6\xb6", // Checksum.
773		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
774		"\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum).
775		"\x5f\xeb\xf2\x10", // Checksum.
776		"\x96\x01",         // Compressed payload: Uncompressed length (varint encoded): 150.
777		"\x00\x41",         // Compressed payload: tagLiteral, length=1,  "A".
778		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
779		"\xfe\x01\x00",     // Compressed payload: tagCopy2,   length=64, offset=1.
780		"\x52\x01\x00",     // Compressed payload: tagCopy2,   length=21, offset=1.
781		"\x00\x18\x00\x00", // Compressed chunk, 24 bytes long (including 4 byte checksum).
782		"\x30\x85\x69\xeb", // Checksum.
783		"\x70",             // Compressed payload: Uncompressed length (varint encoded): 112.
784		"\x00\x42",         // Compressed payload: tagLiteral, length=1,  "B".
785		"\xee\x01\x00",     // Compressed payload: tagCopy2,   length=60, offset=1.
786		"\x0d\x01",         // Compressed payload: tagCopy1,   length=7,  offset=1.
787		"\x08\x65\x66\x43", // Compressed payload: tagLiteral, length=3,  "efC".
788		"\x4e\x01\x00",     // Compressed payload: tagCopy2,   length=20, offset=1.
789		"\x4e\x5a\x00",     // Compressed payload: tagCopy2,   length=20, offset=90.
790		"\x00\x67",         // Compressed payload: tagLiteral, length=1,  "g".
791	}, "")
792	if got != want {
793		t.Fatalf("\ngot:  % x\nwant: % x", got, want)
794	}
795}
796
797func TestEmitLiteral(t *testing.T) {
798	testCases := []struct {
799		length int
800		want   string
801	}{
802		{1, "\x00"},
803		{2, "\x04"},
804		{59, "\xe8"},
805		{60, "\xec"},
806		{61, "\xf0\x3c"},
807		{62, "\xf0\x3d"},
808		{254, "\xf0\xfd"},
809		{255, "\xf0\xfe"},
810		{256, "\xf0\xff"},
811		{257, "\xf4\x00\x01"},
812		{65534, "\xf4\xfd\xff"},
813		{65535, "\xf4\xfe\xff"},
814		{65536, "\xf4\xff\xff"},
815	}
816
817	dst := make([]byte, 70000)
818	nines := bytes.Repeat([]byte{0x99}, 65536)
819	for _, tc := range testCases {
820		lit := nines[:tc.length]
821		n := emitLiteral(dst, lit)
822		if !bytes.HasSuffix(dst[:n], lit) {
823			t.Errorf("length=%d: did not end with that many literal bytes", tc.length)
824			continue
825		}
826		got := string(dst[:n-tc.length])
827		if got != tc.want {
828			t.Errorf("length=%d:\ngot  % x\nwant % x", tc.length, got, tc.want)
829			continue
830		}
831	}
832}
833
834func TestEmitCopy(t *testing.T) {
835	testCases := []struct {
836		offset int
837		length int
838		want   string
839	}{
840		{8, 04, "\x01\x08"},
841		{8, 11, "\x1d\x08"},
842		{8, 12, "\x2e\x08\x00"},
843		{8, 13, "\x32\x08\x00"},
844		{8, 59, "\xea\x08\x00"},
845		{8, 60, "\xee\x08\x00"},
846		{8, 61, "\xf2\x08\x00"},
847		{8, 62, "\xf6\x08\x00"},
848		{8, 63, "\xfa\x08\x00"},
849		{8, 64, "\xfe\x08\x00"},
850		{8, 65, "\xee\x08\x00\x05\x08"},
851		{8, 66, "\xee\x08\x00\x09\x08"},
852		{8, 67, "\xee\x08\x00\x0d\x08"},
853		{8, 68, "\xfe\x08\x00\x01\x08"},
854		{8, 69, "\xfe\x08\x00\x05\x08"},
855		{8, 80, "\xfe\x08\x00\x3e\x08\x00"},
856
857		{256, 04, "\x21\x00"},
858		{256, 11, "\x3d\x00"},
859		{256, 12, "\x2e\x00\x01"},
860		{256, 13, "\x32\x00\x01"},
861		{256, 59, "\xea\x00\x01"},
862		{256, 60, "\xee\x00\x01"},
863		{256, 61, "\xf2\x00\x01"},
864		{256, 62, "\xf6\x00\x01"},
865		{256, 63, "\xfa\x00\x01"},
866		{256, 64, "\xfe\x00\x01"},
867		{256, 65, "\xee\x00\x01\x25\x00"},
868		{256, 66, "\xee\x00\x01\x29\x00"},
869		{256, 67, "\xee\x00\x01\x2d\x00"},
870		{256, 68, "\xfe\x00\x01\x21\x00"},
871		{256, 69, "\xfe\x00\x01\x25\x00"},
872		{256, 80, "\xfe\x00\x01\x3e\x00\x01"},
873
874		{2048, 04, "\x0e\x00\x08"},
875		{2048, 11, "\x2a\x00\x08"},
876		{2048, 12, "\x2e\x00\x08"},
877		{2048, 13, "\x32\x00\x08"},
878		{2048, 59, "\xea\x00\x08"},
879		{2048, 60, "\xee\x00\x08"},
880		{2048, 61, "\xf2\x00\x08"},
881		{2048, 62, "\xf6\x00\x08"},
882		{2048, 63, "\xfa\x00\x08"},
883		{2048, 64, "\xfe\x00\x08"},
884		{2048, 65, "\xee\x00\x08\x12\x00\x08"},
885		{2048, 66, "\xee\x00\x08\x16\x00\x08"},
886		{2048, 67, "\xee\x00\x08\x1a\x00\x08"},
887		{2048, 68, "\xfe\x00\x08\x0e\x00\x08"},
888		{2048, 69, "\xfe\x00\x08\x12\x00\x08"},
889		{2048, 80, "\xfe\x00\x08\x3e\x00\x08"},
890	}
891
892	dst := make([]byte, 1024)
893	for _, tc := range testCases {
894		n := emitCopy(dst, tc.offset, tc.length)
895		got := string(dst[:n])
896		if got != tc.want {
897			t.Errorf("offset=%d, length=%d:\ngot  % x\nwant % x", tc.offset, tc.length, got, tc.want)
898		}
899	}
900}
901
902func TestNewBufferedWriter(t *testing.T) {
903	// Test all 32 possible sub-sequences of these 5 input slices.
904	//
905	// Their lengths sum to 400,000, which is over 6 times the Writer ibuf
906	// capacity: 6 * maxBlockSize is 393,216.
907	inputs := [][]byte{
908		bytes.Repeat([]byte{'a'}, 40000),
909		bytes.Repeat([]byte{'b'}, 150000),
910		bytes.Repeat([]byte{'c'}, 60000),
911		bytes.Repeat([]byte{'d'}, 120000),
912		bytes.Repeat([]byte{'e'}, 30000),
913	}
914loop:
915	for i := 0; i < 1<<uint(len(inputs)); i++ {
916		var want []byte
917		buf := new(bytes.Buffer)
918		w := NewBufferedWriter(buf)
919		for j, input := range inputs {
920			if i&(1<<uint(j)) == 0 {
921				continue
922			}
923			if _, err := w.Write(input); err != nil {
924				t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err)
925				continue loop
926			}
927			want = append(want, input...)
928		}
929		if err := w.Close(); err != nil {
930			t.Errorf("i=%#02x: Close: %v", i, err)
931			continue
932		}
933		got, err := ioutil.ReadAll(NewReader(buf))
934		if err != nil {
935			t.Errorf("i=%#02x: ReadAll: %v", i, err)
936			continue
937		}
938		if err := cmp(got, want); err != nil {
939			t.Errorf("i=%#02x: %v", i, err)
940			continue
941		}
942	}
943}
944
945func TestFlush(t *testing.T) {
946	buf := new(bytes.Buffer)
947	w := NewBufferedWriter(buf)
948	defer w.Close()
949	if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil {
950		t.Fatalf("Write: %v", err)
951	}
952	if n := buf.Len(); n != 0 {
953		t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n)
954	}
955	if err := w.Flush(); err != nil {
956		t.Fatalf("Flush: %v", err)
957	}
958	if n := buf.Len(); n == 0 {
959		t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n)
960	}
961}
962
963func TestReaderUncompressedDataOK(t *testing.T) {
964	r := NewReader(strings.NewReader(magicChunk +
965		"\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum).
966		"\x68\x10\xe6\xb6" + // Checksum.
967		"\x61\x62\x63\x64", // Uncompressed payload: "abcd".
968	))
969	g, err := ioutil.ReadAll(r)
970	if err != nil {
971		t.Fatal(err)
972	}
973	if got, want := string(g), "abcd"; got != want {
974		t.Fatalf("got %q, want %q", got, want)
975	}
976}
977
978func TestReaderUncompressedDataNoPayload(t *testing.T) {
979	r := NewReader(strings.NewReader(magicChunk +
980		"\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long.
981		"", // No payload; corrupt input.
982	))
983	if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
984		t.Fatalf("got %v, want %v", err, ErrCorrupt)
985	}
986}
987
988func TestReaderUncompressedDataTooLong(t *testing.T) {
989	// https://github.com/google/snappy/blob/master/framing_format.txt section
990	// 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004.
991	const n = 0x10005
992
993	r := NewReader(strings.NewReader(magicChunk +
994		"\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long.
995		strings.Repeat("\x00", n),
996	))
997	if _, err := ioutil.ReadAll(r); err != ErrCorrupt {
998		t.Fatalf("got %v, want %v", err, ErrCorrupt)
999	}
1000}
1001
1002func TestReaderReset(t *testing.T) {
1003	gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000)
1004	buf := new(bytes.Buffer)
1005	if _, err := NewWriter(buf).Write(gold); err != nil {
1006		t.Fatalf("Write: %v", err)
1007	}
1008	encoded, invalid, partial := buf.String(), "invalid", "partial"
1009	r := NewReader(nil)
1010	for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} {
1011		if s == partial {
1012			r.Reset(strings.NewReader(encoded))
1013			if _, err := r.Read(make([]byte, 101)); err != nil {
1014				t.Errorf("#%d: %v", i, err)
1015				continue
1016			}
1017			continue
1018		}
1019		r.Reset(strings.NewReader(s))
1020		got, err := ioutil.ReadAll(r)
1021		switch s {
1022		case encoded:
1023			if err != nil {
1024				t.Errorf("#%d: %v", i, err)
1025				continue
1026			}
1027			if err := cmp(got, gold); err != nil {
1028				t.Errorf("#%d: %v", i, err)
1029				continue
1030			}
1031		case invalid:
1032			if err == nil {
1033				t.Errorf("#%d: got nil error, want non-nil", i)
1034				continue
1035			}
1036		}
1037	}
1038}
1039
1040func TestWriterReset(t *testing.T) {
1041	gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000)
1042	const n = 20
1043	for _, buffered := range []bool{false, true} {
1044		var w *Writer
1045		if buffered {
1046			w = NewBufferedWriter(nil)
1047			defer w.Close()
1048		} else {
1049			w = NewWriter(nil)
1050		}
1051
1052		var gots, wants [][]byte
1053		failed := false
1054		for i := 0; i <= n; i++ {
1055			buf := new(bytes.Buffer)
1056			w.Reset(buf)
1057			want := gold[:len(gold)*i/n]
1058			if _, err := w.Write(want); err != nil {
1059				t.Errorf("#%d: Write: %v", i, err)
1060				failed = true
1061				continue
1062			}
1063			if buffered {
1064				if err := w.Flush(); err != nil {
1065					t.Errorf("#%d: Flush: %v", i, err)
1066					failed = true
1067					continue
1068				}
1069			}
1070			got, err := ioutil.ReadAll(NewReader(buf))
1071			if err != nil {
1072				t.Errorf("#%d: ReadAll: %v", i, err)
1073				failed = true
1074				continue
1075			}
1076			gots = append(gots, got)
1077			wants = append(wants, want)
1078		}
1079		if failed {
1080			continue
1081		}
1082		for i := range gots {
1083			if err := cmp(gots[i], wants[i]); err != nil {
1084				t.Errorf("#%d: %v", i, err)
1085			}
1086		}
1087	}
1088}
1089
1090func TestWriterResetWithoutFlush(t *testing.T) {
1091	buf0 := new(bytes.Buffer)
1092	buf1 := new(bytes.Buffer)
1093	w := NewBufferedWriter(buf0)
1094	if _, err := w.Write([]byte("xxx")); err != nil {
1095		t.Fatalf("Write #0: %v", err)
1096	}
1097	// Note that we don't Flush the Writer before calling Reset.
1098	w.Reset(buf1)
1099	if _, err := w.Write([]byte("yyy")); err != nil {
1100		t.Fatalf("Write #1: %v", err)
1101	}
1102	if err := w.Flush(); err != nil {
1103		t.Fatalf("Flush: %v", err)
1104	}
1105	got, err := ioutil.ReadAll(NewReader(buf1))
1106	if err != nil {
1107		t.Fatalf("ReadAll: %v", err)
1108	}
1109	if err := cmp(got, []byte("yyy")); err != nil {
1110		t.Fatal(err)
1111	}
1112}
1113
1114type writeCounter int
1115
1116func (c *writeCounter) Write(p []byte) (int, error) {
1117	*c++
1118	return len(p), nil
1119}
1120
1121// TestNumUnderlyingWrites tests that each Writer flush only makes one or two
1122// Write calls on its underlying io.Writer, depending on whether or not the
1123// flushed buffer was compressible.
1124func TestNumUnderlyingWrites(t *testing.T) {
1125	testCases := []struct {
1126		input []byte
1127		want  int
1128	}{
1129		{bytes.Repeat([]byte{'x'}, 100), 1},
1130		{bytes.Repeat([]byte{'y'}, 100), 1},
1131		{[]byte("ABCDEFGHIJKLMNOPQRST"), 2},
1132	}
1133
1134	var c writeCounter
1135	w := NewBufferedWriter(&c)
1136	defer w.Close()
1137	for i, tc := range testCases {
1138		c = 0
1139		if _, err := w.Write(tc.input); err != nil {
1140			t.Errorf("#%d: Write: %v", i, err)
1141			continue
1142		}
1143		if err := w.Flush(); err != nil {
1144			t.Errorf("#%d: Flush: %v", i, err)
1145			continue
1146		}
1147		if int(c) != tc.want {
1148			t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want)
1149			continue
1150		}
1151	}
1152}
1153
1154func benchDecode(b *testing.B, src []byte) {
1155	encoded := Encode(nil, src)
1156	// Bandwidth is in amount of uncompressed data.
1157	b.SetBytes(int64(len(src)))
1158	b.ResetTimer()
1159	for i := 0; i < b.N; i++ {
1160		Decode(src, encoded)
1161	}
1162}
1163
1164func benchEncode(b *testing.B, src []byte) {
1165	// Bandwidth is in amount of uncompressed data.
1166	b.SetBytes(int64(len(src)))
1167	dst := make([]byte, MaxEncodedLen(len(src)))
1168	b.ResetTimer()
1169	for i := 0; i < b.N; i++ {
1170		Encode(dst, src)
1171	}
1172}
1173
1174func testOrBenchmark(b testing.TB) string {
1175	if _, ok := b.(*testing.B); ok {
1176		return "benchmark"
1177	}
1178	return "test"
1179}
1180
1181func readFile(b testing.TB, filename string) []byte {
1182	src, err := ioutil.ReadFile(filename)
1183	if err != nil {
1184		b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
1185	}
1186	if len(src) == 0 {
1187		b.Fatalf("%s has zero length", filename)
1188	}
1189	return src
1190}
1191
1192// expand returns a slice of length n containing repeated copies of src.
1193func expand(src []byte, n int) []byte {
1194	dst := make([]byte, n)
1195	for x := dst; len(x) > 0; {
1196		i := copy(x, src)
1197		x = x[i:]
1198	}
1199	return dst
1200}
1201
1202func benchWords(b *testing.B, n int, decode bool) {
1203	// Note: the file is OS-language dependent so the resulting values are not
1204	// directly comparable for non-US-English OS installations.
1205	data := expand(readFile(b, "/usr/share/dict/words"), n)
1206	if decode {
1207		benchDecode(b, data)
1208	} else {
1209		benchEncode(b, data)
1210	}
1211}
1212
1213func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
1214func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
1215func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
1216func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
1217func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
1218func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
1219func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
1220func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
1221func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
1222func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
1223func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
1224func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
1225
1226func BenchmarkRandomEncode(b *testing.B) {
1227	rng := rand.New(rand.NewSource(1))
1228	data := make([]byte, 1<<20)
1229	for i := range data {
1230		data[i] = uint8(rng.Intn(256))
1231	}
1232	benchEncode(b, data)
1233}
1234
1235// testFiles' values are copied directly from
1236// https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc
1237// The label field is unused in snappy-go.
1238var testFiles = []struct {
1239	label     string
1240	filename  string
1241	sizeLimit int
1242}{
1243	{"html", "html", 0},
1244	{"urls", "urls.10K", 0},
1245	{"jpg", "fireworks.jpeg", 0},
1246	{"jpg_200", "fireworks.jpeg", 200},
1247	{"pdf", "paper-100k.pdf", 0},
1248	{"html4", "html_x_4", 0},
1249	{"txt1", "alice29.txt", 0},
1250	{"txt2", "asyoulik.txt", 0},
1251	{"txt3", "lcet10.txt", 0},
1252	{"txt4", "plrabn12.txt", 0},
1253	{"pb", "geo.protodata", 0},
1254	{"gaviota", "kppkn.gtb", 0},
1255}
1256
1257const (
1258	// The benchmark data files are at this canonical URL.
1259	benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/"
1260)
1261
1262func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
1263	bDir := filepath.FromSlash(*benchdataDir)
1264	filename := filepath.Join(bDir, basename)
1265	if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
1266		return nil
1267	}
1268
1269	if !*download {
1270		b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
1271	}
1272	// Download the official snappy C++ implementation reference test data
1273	// files for benchmarking.
1274	if err := os.MkdirAll(bDir, 0777); err != nil && !os.IsExist(err) {
1275		return fmt.Errorf("failed to create %s: %s", bDir, err)
1276	}
1277
1278	f, err := os.Create(filename)
1279	if err != nil {
1280		return fmt.Errorf("failed to create %s: %s", filename, err)
1281	}
1282	defer f.Close()
1283	defer func() {
1284		if errRet != nil {
1285			os.Remove(filename)
1286		}
1287	}()
1288	url := benchURL + basename
1289	resp, err := http.Get(url)
1290	if err != nil {
1291		return fmt.Errorf("failed to download %s: %s", url, err)
1292	}
1293	defer resp.Body.Close()
1294	if s := resp.StatusCode; s != http.StatusOK {
1295		return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s))
1296	}
1297	_, err = io.Copy(f, resp.Body)
1298	if err != nil {
1299		return fmt.Errorf("failed to download %s to %s: %s", url, filename, err)
1300	}
1301	return nil
1302}
1303
1304func benchFile(b *testing.B, i int, decode bool) {
1305	if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
1306		b.Fatalf("failed to download testdata: %s", err)
1307	}
1308	bDir := filepath.FromSlash(*benchdataDir)
1309	data := readFile(b, filepath.Join(bDir, testFiles[i].filename))
1310	if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
1311		data = data[:n]
1312	}
1313	if decode {
1314		benchDecode(b, data)
1315	} else {
1316		benchEncode(b, data)
1317	}
1318}
1319
1320// Naming convention is kept similar to what snappy's C++ implementation uses.
1321func Benchmark_UFlat0(b *testing.B)  { benchFile(b, 0, true) }
1322func Benchmark_UFlat1(b *testing.B)  { benchFile(b, 1, true) }
1323func Benchmark_UFlat2(b *testing.B)  { benchFile(b, 2, true) }
1324func Benchmark_UFlat3(b *testing.B)  { benchFile(b, 3, true) }
1325func Benchmark_UFlat4(b *testing.B)  { benchFile(b, 4, true) }
1326func Benchmark_UFlat5(b *testing.B)  { benchFile(b, 5, true) }
1327func Benchmark_UFlat6(b *testing.B)  { benchFile(b, 6, true) }
1328func Benchmark_UFlat7(b *testing.B)  { benchFile(b, 7, true) }
1329func Benchmark_UFlat8(b *testing.B)  { benchFile(b, 8, true) }
1330func Benchmark_UFlat9(b *testing.B)  { benchFile(b, 9, true) }
1331func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) }
1332func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) }
1333func Benchmark_ZFlat0(b *testing.B)  { benchFile(b, 0, false) }
1334func Benchmark_ZFlat1(b *testing.B)  { benchFile(b, 1, false) }
1335func Benchmark_ZFlat2(b *testing.B)  { benchFile(b, 2, false) }
1336func Benchmark_ZFlat3(b *testing.B)  { benchFile(b, 3, false) }
1337func Benchmark_ZFlat4(b *testing.B)  { benchFile(b, 4, false) }
1338func Benchmark_ZFlat5(b *testing.B)  { benchFile(b, 5, false) }
1339func Benchmark_ZFlat6(b *testing.B)  { benchFile(b, 6, false) }
1340func Benchmark_ZFlat7(b *testing.B)  { benchFile(b, 7, false) }
1341func Benchmark_ZFlat8(b *testing.B)  { benchFile(b, 8, false) }
1342func Benchmark_ZFlat9(b *testing.B)  { benchFile(b, 9, false) }
1343func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) }
1344func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }
1345
1346func BenchmarkExtendMatch(b *testing.B) {
1347	tDir := filepath.FromSlash(*testdataDir)
1348	src, err := ioutil.ReadFile(filepath.Join(tDir, goldenText))
1349	if err != nil {
1350		b.Fatalf("ReadFile: %v", err)
1351	}
1352	b.ResetTimer()
1353	for i := 0; i < b.N; i++ {
1354		for _, tc := range extendMatchGoldenTestCases {
1355			extendMatch(src, tc.i, tc.j)
1356		}
1357	}
1358}
1359