1package tarsum
2
3import (
4	"archive/tar"
5	"bytes"
6	"compress/gzip"
7	"crypto/md5"
8	"crypto/rand"
9	"crypto/sha1"
10	"crypto/sha256"
11	"crypto/sha512"
12	"encoding/hex"
13	"fmt"
14	"io"
15	"io/ioutil"
16	"os"
17	"strings"
18	"testing"
19
20	"github.com/stretchr/testify/assert"
21	"github.com/stretchr/testify/require"
22)
23
24type testLayer struct {
25	filename string
26	options  *sizedOptions
27	jsonfile string
28	gzip     bool
29	tarsum   string
30	version  Version
31	hash     THash
32}
33
34var testLayers = []testLayer{
35	{
36		filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
37		jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
38		version:  Version0,
39		tarsum:   "tarsum+sha256:4095cc12fa5fdb1ab2760377e1cd0c4ecdd3e61b4f9b82319d96fcea6c9a41c6"},
40	{
41		filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
42		jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
43		version:  VersionDev,
44		tarsum:   "tarsum.dev+sha256:db56e35eec6ce65ba1588c20ba6b1ea23743b59e81fb6b7f358ccbde5580345c"},
45	{
46		filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar",
47		jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json",
48		gzip:     true,
49		tarsum:   "tarsum+sha256:4095cc12fa5fdb1ab2760377e1cd0c4ecdd3e61b4f9b82319d96fcea6c9a41c6"},
50	{
51		// Tests existing version of TarSum when xattrs are present
52		filename: "testdata/xattr/layer.tar",
53		jsonfile: "testdata/xattr/json",
54		version:  Version0,
55		tarsum:   "tarsum+sha256:07e304a8dbcb215b37649fde1a699f8aeea47e60815707f1cdf4d55d25ff6ab4"},
56	{
57		// Tests next version of TarSum when xattrs are present
58		filename: "testdata/xattr/layer.tar",
59		jsonfile: "testdata/xattr/json",
60		version:  VersionDev,
61		tarsum:   "tarsum.dev+sha256:6c58917892d77b3b357b0f9ad1e28e1f4ae4de3a8006bd3beb8beda214d8fd16"},
62	{
63		filename: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/layer.tar",
64		jsonfile: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/json",
65		tarsum:   "tarsum+sha256:c66bd5ec9f87b8f4c6135ca37684618f486a3dd1d113b138d0a177bfa39c2571"},
66	{
67		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
68		tarsum:  "tarsum+sha256:8bf12d7e67c51ee2e8306cba569398b1b9f419969521a12ffb9d8875e8836738"},
69	{
70		// this tar has two files with the same path
71		filename: "testdata/collision/collision-0.tar",
72		tarsum:   "tarsum+sha256:08653904a68d3ab5c59e65ef58c49c1581caa3c34744f8d354b3f575ea04424a"},
73	{
74		// this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above
75		filename: "testdata/collision/collision-1.tar",
76		tarsum:   "tarsum+sha256:b51c13fbefe158b5ce420d2b930eef54c5cd55c50a2ee4abdddea8fa9f081e0d"},
77	{
78		// this tar has newer of collider-0.tar, ensuring is has different hash
79		filename: "testdata/collision/collision-2.tar",
80		tarsum:   "tarsum+sha256:381547080919bb82691e995508ae20ed33ce0f6948d41cafbeb70ce20c73ee8e"},
81	{
82		// this tar has newer of collider-1.tar, ensuring is has different hash
83		filename: "testdata/collision/collision-3.tar",
84		tarsum:   "tarsum+sha256:f886e431c08143164a676805205979cd8fa535dfcef714db5515650eea5a7c0f"},
85	{
86		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
87		tarsum:  "tarsum+md5:0d7529ec7a8360155b48134b8e599f53",
88		hash:    md5THash,
89	},
90	{
91		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
92		tarsum:  "tarsum+sha1:f1fee39c5925807ff75ef1925e7a23be444ba4df",
93		hash:    sha1Hash,
94	},
95	{
96		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
97		tarsum:  "tarsum+sha224:6319390c0b061d639085d8748b14cd55f697cf9313805218b21cf61c",
98		hash:    sha224Hash,
99	},
100	{
101		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
102		tarsum:  "tarsum+sha384:a578ce3ce29a2ae03b8ed7c26f47d0f75b4fc849557c62454be4b5ffd66ba021e713b48ce71e947b43aab57afd5a7636",
103		hash:    sha384Hash,
104	},
105	{
106		options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory)
107		tarsum:  "tarsum+sha512:e9bfb90ca5a4dfc93c46ee061a5cf9837de6d2fdf82544d6460d3147290aecfabf7b5e415b9b6e72db9b8941f149d5d69fb17a394cbfaf2eac523bd9eae21855",
108		hash:    sha512Hash,
109	},
110}
111
112type sizedOptions struct {
113	num      int64
114	size     int64
115	isRand   bool
116	realFile bool
117}
118
119// make a tar:
120// * num is the number of files the tar should have
121// * size is the bytes per file
122// * isRand is whether the contents of the files should be a random chunk (otherwise it's all zeros)
123// * realFile will write to a TempFile, instead of an in memory buffer
124func sizedTar(opts sizedOptions) io.Reader {
125	var (
126		fh  io.ReadWriter
127		err error
128	)
129	if opts.realFile {
130		fh, err = ioutil.TempFile("", "tarsum")
131		if err != nil {
132			return nil
133		}
134	} else {
135		fh = bytes.NewBuffer([]byte{})
136	}
137	tarW := tar.NewWriter(fh)
138	defer tarW.Close()
139	for i := int64(0); i < opts.num; i++ {
140		err := tarW.WriteHeader(&tar.Header{
141			Name: fmt.Sprintf("/testdata%d", i),
142			Mode: 0755,
143			Uid:  0,
144			Gid:  0,
145			Size: opts.size,
146		})
147		if err != nil {
148			return nil
149		}
150		var rBuf []byte
151		if opts.isRand {
152			rBuf = make([]byte, 8)
153			_, err = rand.Read(rBuf)
154			if err != nil {
155				return nil
156			}
157		} else {
158			rBuf = []byte{0, 0, 0, 0, 0, 0, 0, 0}
159		}
160
161		for i := int64(0); i < opts.size/int64(8); i++ {
162			tarW.Write(rBuf)
163		}
164	}
165	return fh
166}
167
168func emptyTarSum(gzip bool) (TarSum, error) {
169	reader, writer := io.Pipe()
170	tarWriter := tar.NewWriter(writer)
171
172	// Immediately close tarWriter and write-end of the
173	// Pipe in a separate goroutine so we don't block.
174	go func() {
175		tarWriter.Close()
176		writer.Close()
177	}()
178
179	return NewTarSum(reader, !gzip, Version0)
180}
181
182// Test errors on NewTarsumForLabel
183func TestNewTarSumForLabelInvalid(t *testing.T) {
184	reader := strings.NewReader("")
185
186	if _, err := NewTarSumForLabel(reader, true, "invalidlabel"); err == nil {
187		t.Fatalf("Expected an error, got nothing.")
188	}
189
190	if _, err := NewTarSumForLabel(reader, true, "invalid+sha256"); err == nil {
191		t.Fatalf("Expected an error, got nothing.")
192	}
193	if _, err := NewTarSumForLabel(reader, true, "tarsum.v1+invalid"); err == nil {
194		t.Fatalf("Expected an error, got nothing.")
195	}
196}
197
198func TestNewTarSumForLabel(t *testing.T) {
199
200	layer := testLayers[0]
201
202	reader, err := os.Open(layer.filename)
203	if err != nil {
204		t.Fatal(err)
205	}
206	defer reader.Close()
207
208	label := strings.Split(layer.tarsum, ":")[0]
209	ts, err := NewTarSumForLabel(reader, false, label)
210	if err != nil {
211		t.Fatal(err)
212	}
213
214	// Make sure it actually worked by reading a little bit of it
215	nbByteToRead := 8 * 1024
216	dBuf := make([]byte, nbByteToRead)
217	_, err = ts.Read(dBuf)
218	if err != nil {
219		t.Errorf("failed to read %vKB from %s: %s", nbByteToRead, layer.filename, err)
220	}
221}
222
223// TestEmptyTar tests that tarsum does not fail to read an empty tar
224// and correctly returns the hex digest of an empty hash.
225func TestEmptyTar(t *testing.T) {
226	// Test without gzip.
227	ts, err := emptyTarSum(false)
228	require.NoError(t, err)
229
230	zeroBlock := make([]byte, 1024)
231	buf := new(bytes.Buffer)
232
233	n, err := io.Copy(buf, ts)
234	require.NoError(t, err)
235
236	if n != int64(len(zeroBlock)) || !bytes.Equal(buf.Bytes(), zeroBlock) {
237		t.Fatalf("tarSum did not write the correct number of zeroed bytes: %d", n)
238	}
239
240	expectedSum := ts.Version().String() + "+sha256:" + hex.EncodeToString(sha256.New().Sum(nil))
241	resultSum := ts.Sum(nil)
242
243	if resultSum != expectedSum {
244		t.Fatalf("expected [%s] but got [%s]", expectedSum, resultSum)
245	}
246
247	// Test with gzip.
248	ts, err = emptyTarSum(true)
249	require.NoError(t, err)
250	buf.Reset()
251
252	_, err = io.Copy(buf, ts)
253	require.NoError(t, err)
254
255	bufgz := new(bytes.Buffer)
256	gz := gzip.NewWriter(bufgz)
257	n, err = io.Copy(gz, bytes.NewBuffer(zeroBlock))
258	require.NoError(t, err)
259	gz.Close()
260	gzBytes := bufgz.Bytes()
261
262	if n != int64(len(zeroBlock)) || !bytes.Equal(buf.Bytes(), gzBytes) {
263		t.Fatalf("tarSum did not write the correct number of gzipped-zeroed bytes: %d", n)
264	}
265
266	resultSum = ts.Sum(nil)
267
268	if resultSum != expectedSum {
269		t.Fatalf("expected [%s] but got [%s]", expectedSum, resultSum)
270	}
271
272	// Test without ever actually writing anything.
273	if ts, err = NewTarSum(bytes.NewReader([]byte{}), true, Version0); err != nil {
274		t.Fatal(err)
275	}
276
277	resultSum = ts.Sum(nil)
278	assert.Equal(t, expectedSum, resultSum)
279}
280
281var (
282	md5THash   = NewTHash("md5", md5.New)
283	sha1Hash   = NewTHash("sha1", sha1.New)
284	sha224Hash = NewTHash("sha224", sha256.New224)
285	sha384Hash = NewTHash("sha384", sha512.New384)
286	sha512Hash = NewTHash("sha512", sha512.New)
287)
288
289// Test all the build-in read size : buf8K, buf16K, buf32K and more
290func TestTarSumsReadSize(t *testing.T) {
291	// Test always on the same layer (that is big enough)
292	layer := testLayers[0]
293
294	for i := 0; i < 5; i++ {
295
296		reader, err := os.Open(layer.filename)
297		if err != nil {
298			t.Fatal(err)
299		}
300		defer reader.Close()
301
302		ts, err := NewTarSum(reader, false, layer.version)
303		if err != nil {
304			t.Fatal(err)
305		}
306
307		// Read and discard bytes so that it populates sums
308		nbByteToRead := (i + 1) * 8 * 1024
309		dBuf := make([]byte, nbByteToRead)
310		_, err = ts.Read(dBuf)
311		if err != nil {
312			t.Errorf("failed to read %vKB from %s: %s", nbByteToRead, layer.filename, err)
313			continue
314		}
315	}
316}
317
318func TestTarSums(t *testing.T) {
319	for _, layer := range testLayers {
320		var (
321			fh  io.Reader
322			err error
323		)
324		if len(layer.filename) > 0 {
325			fh, err = os.Open(layer.filename)
326			if err != nil {
327				t.Errorf("failed to open %s: %s", layer.filename, err)
328				continue
329			}
330		} else if layer.options != nil {
331			fh = sizedTar(*layer.options)
332		} else {
333			// What else is there to test?
334			t.Errorf("what to do with %#v", layer)
335			continue
336		}
337		if file, ok := fh.(*os.File); ok {
338			defer file.Close()
339		}
340
341		var ts TarSum
342		if layer.hash == nil {
343			//                           double negatives!
344			ts, err = NewTarSum(fh, !layer.gzip, layer.version)
345		} else {
346			ts, err = NewTarSumHash(fh, !layer.gzip, layer.version, layer.hash)
347		}
348		if err != nil {
349			t.Errorf("%q :: %q", err, layer.filename)
350			continue
351		}
352
353		// Read variable number of bytes to test dynamic buffer
354		dBuf := make([]byte, 1)
355		_, err = ts.Read(dBuf)
356		if err != nil {
357			t.Errorf("failed to read 1B from %s: %s", layer.filename, err)
358			continue
359		}
360		dBuf = make([]byte, 16*1024)
361		_, err = ts.Read(dBuf)
362		if err != nil {
363			t.Errorf("failed to read 16KB from %s: %s", layer.filename, err)
364			continue
365		}
366
367		// Read and discard remaining bytes
368		_, err = io.Copy(ioutil.Discard, ts)
369		if err != nil {
370			t.Errorf("failed to copy from %s: %s", layer.filename, err)
371			continue
372		}
373		var gotSum string
374		if len(layer.jsonfile) > 0 {
375			jfh, err := os.Open(layer.jsonfile)
376			if err != nil {
377				t.Errorf("failed to open %s: %s", layer.jsonfile, err)
378				continue
379			}
380			defer jfh.Close()
381
382			buf, err := ioutil.ReadAll(jfh)
383			if err != nil {
384				t.Errorf("failed to readAll %s: %s", layer.jsonfile, err)
385				continue
386			}
387			gotSum = ts.Sum(buf)
388		} else {
389			gotSum = ts.Sum(nil)
390		}
391
392		if layer.tarsum != gotSum {
393			t.Errorf("expecting [%s], but got [%s]", layer.tarsum, gotSum)
394		}
395		var expectedHashName string
396		if layer.hash != nil {
397			expectedHashName = layer.hash.Name()
398		} else {
399			expectedHashName = DefaultTHash.Name()
400		}
401		if expectedHashName != ts.Hash().Name() {
402			t.Errorf("expecting hash [%v], but got [%s]", expectedHashName, ts.Hash().Name())
403		}
404	}
405}
406
407func TestIteration(t *testing.T) {
408	headerTests := []struct {
409		expectedSum string // TODO(vbatts) it would be nice to get individual sums of each
410		version     Version
411		hdr         *tar.Header
412		data        []byte
413	}{
414		{
415			"tarsum+sha256:626c4a2e9a467d65c33ae81f7f3dedd4de8ccaee72af73223c4bc4718cbc7bbd",
416			Version0,
417			&tar.Header{
418				Name:     "file.txt",
419				Size:     0,
420				Typeflag: tar.TypeReg,
421				Devminor: 0,
422				Devmajor: 0,
423			},
424			[]byte(""),
425		},
426		{
427			"tarsum.dev+sha256:6ffd43a1573a9913325b4918e124ee982a99c0f3cba90fc032a65f5e20bdd465",
428			VersionDev,
429			&tar.Header{
430				Name:     "file.txt",
431				Size:     0,
432				Typeflag: tar.TypeReg,
433				Devminor: 0,
434				Devmajor: 0,
435			},
436			[]byte(""),
437		},
438		{
439			"tarsum.dev+sha256:b38166c059e11fb77bef30bf16fba7584446e80fcc156ff46d47e36c5305d8ef",
440			VersionDev,
441			&tar.Header{
442				Name:     "another.txt",
443				Uid:      1000,
444				Gid:      1000,
445				Uname:    "slartibartfast",
446				Gname:    "users",
447				Size:     4,
448				Typeflag: tar.TypeReg,
449				Devminor: 0,
450				Devmajor: 0,
451			},
452			[]byte("test"),
453		},
454		{
455			"tarsum.dev+sha256:4cc2e71ac5d31833ab2be9b4f7842a14ce595ec96a37af4ed08f87bc374228cd",
456			VersionDev,
457			&tar.Header{
458				Name:     "xattrs.txt",
459				Uid:      1000,
460				Gid:      1000,
461				Uname:    "slartibartfast",
462				Gname:    "users",
463				Size:     4,
464				Typeflag: tar.TypeReg,
465				Xattrs: map[string]string{
466					"user.key1": "value1",
467					"user.key2": "value2",
468				},
469			},
470			[]byte("test"),
471		},
472		{
473			"tarsum.dev+sha256:65f4284fa32c0d4112dd93c3637697805866415b570587e4fd266af241503760",
474			VersionDev,
475			&tar.Header{
476				Name:     "xattrs.txt",
477				Uid:      1000,
478				Gid:      1000,
479				Uname:    "slartibartfast",
480				Gname:    "users",
481				Size:     4,
482				Typeflag: tar.TypeReg,
483				Xattrs: map[string]string{
484					"user.KEY1": "value1", // adding different case to ensure different sum
485					"user.key2": "value2",
486				},
487			},
488			[]byte("test"),
489		},
490		{
491			"tarsum+sha256:c12bb6f1303a9ddbf4576c52da74973c00d14c109bcfa76b708d5da1154a07fa",
492			Version0,
493			&tar.Header{
494				Name:     "xattrs.txt",
495				Uid:      1000,
496				Gid:      1000,
497				Uname:    "slartibartfast",
498				Gname:    "users",
499				Size:     4,
500				Typeflag: tar.TypeReg,
501				Xattrs: map[string]string{
502					"user.NOT": "CALCULATED",
503				},
504			},
505			[]byte("test"),
506		},
507	}
508	for _, htest := range headerTests {
509		s, err := renderSumForHeader(htest.version, htest.hdr, htest.data)
510		if err != nil {
511			t.Fatal(err)
512		}
513
514		if s != htest.expectedSum {
515			t.Errorf("expected sum: %q, got: %q", htest.expectedSum, s)
516		}
517	}
518
519}
520
521func renderSumForHeader(v Version, h *tar.Header, data []byte) (string, error) {
522	buf := bytes.NewBuffer(nil)
523	// first build our test tar
524	tw := tar.NewWriter(buf)
525	if err := tw.WriteHeader(h); err != nil {
526		return "", err
527	}
528	if _, err := tw.Write(data); err != nil {
529		return "", err
530	}
531	tw.Close()
532
533	ts, err := NewTarSum(buf, true, v)
534	if err != nil {
535		return "", err
536	}
537	tr := tar.NewReader(ts)
538	for {
539		hdr, err := tr.Next()
540		if hdr == nil || err == io.EOF {
541			// Signals the end of the archive.
542			break
543		}
544		if err != nil {
545			return "", err
546		}
547		if _, err = io.Copy(ioutil.Discard, tr); err != nil {
548			return "", err
549		}
550	}
551	return ts.Sum(nil), nil
552}
553
554func Benchmark9kTar(b *testing.B) {
555	buf := bytes.NewBuffer([]byte{})
556	fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar")
557	if err != nil {
558		b.Error(err)
559		return
560	}
561	defer fh.Close()
562
563	n, err := io.Copy(buf, fh)
564	if err != nil {
565		b.Error(err)
566		return
567	}
568
569	reader := bytes.NewReader(buf.Bytes())
570
571	b.SetBytes(n)
572	b.ResetTimer()
573	for i := 0; i < b.N; i++ {
574		reader.Seek(0, 0)
575		ts, err := NewTarSum(reader, true, Version0)
576		if err != nil {
577			b.Error(err)
578			return
579		}
580		io.Copy(ioutil.Discard, ts)
581		ts.Sum(nil)
582	}
583}
584
585func Benchmark9kTarGzip(b *testing.B) {
586	buf := bytes.NewBuffer([]byte{})
587	fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar")
588	if err != nil {
589		b.Error(err)
590		return
591	}
592	defer fh.Close()
593
594	n, err := io.Copy(buf, fh)
595	if err != nil {
596		b.Error(err)
597		return
598	}
599
600	reader := bytes.NewReader(buf.Bytes())
601
602	b.SetBytes(n)
603	b.ResetTimer()
604	for i := 0; i < b.N; i++ {
605		reader.Seek(0, 0)
606		ts, err := NewTarSum(reader, false, Version0)
607		if err != nil {
608			b.Error(err)
609			return
610		}
611		io.Copy(ioutil.Discard, ts)
612		ts.Sum(nil)
613	}
614}
615
616// this is a single big file in the tar archive
617func Benchmark1mbSingleFileTar(b *testing.B) {
618	benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, false)
619}
620
621// this is a single big file in the tar archive
622func Benchmark1mbSingleFileTarGzip(b *testing.B) {
623	benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, true)
624}
625
626// this is 1024 1k files in the tar archive
627func Benchmark1kFilesTar(b *testing.B) {
628	benchmarkTar(b, sizedOptions{1024, 1024, true, true}, false)
629}
630
631// this is 1024 1k files in the tar archive
632func Benchmark1kFilesTarGzip(b *testing.B) {
633	benchmarkTar(b, sizedOptions{1024, 1024, true, true}, true)
634}
635
636func benchmarkTar(b *testing.B, opts sizedOptions, isGzip bool) {
637	var fh *os.File
638	tarReader := sizedTar(opts)
639	if br, ok := tarReader.(*os.File); ok {
640		fh = br
641	}
642	defer os.Remove(fh.Name())
643	defer fh.Close()
644
645	b.SetBytes(opts.size * opts.num)
646	b.ResetTimer()
647	for i := 0; i < b.N; i++ {
648		ts, err := NewTarSum(fh, !isGzip, Version0)
649		if err != nil {
650			b.Error(err)
651			return
652		}
653		io.Copy(ioutil.Discard, ts)
654		ts.Sum(nil)
655		fh.Seek(0, 0)
656	}
657}
658