1// Copyright 2016, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package xflate_test
6
7import (
8	"archive/zip"
9	"bytes"
10	"compress/gzip"
11	"encoding/binary"
12	"fmt"
13	"hash/crc32"
14	"io"
15	"io/ioutil"
16	"log"
17
18	"github.com/dsnet/compress/internal/testutil"
19	"github.com/dsnet/compress/xflate"
20)
21
22func init() { log.SetFlags(log.Lshortfile) }
23
24// Zip archives allow for efficient random access between files, however,
25// they do not easily allow for efficient random access within a given file,
26// especially if compressed. In this example, we use XFLATE to compress each
27// file. This is particularly useful for seeking within a relatively large
28// file in a Zip archive.
29func Example_zipFile() {
30	// Test files of non-trivial sizes.
31	files := map[string][]byte{
32		"twain.txt":   testutil.MustLoadFile("../testdata/twain.txt"),
33		"digits.txt":  testutil.MustLoadFile("../testdata/digits.txt"),
34		"huffman.txt": testutil.MustLoadFile("../testdata/huffman.txt"),
35	}
36
37	// Write the Zip archive.
38	buffer := new(bytes.Buffer)
39	zw := zip.NewWriter(buffer)
40	zw.RegisterCompressor(zip.Deflate, func(wr io.Writer) (io.WriteCloser, error) {
41		// Instead of the default DEFLATE compressor, register one that uses
42		// XFLATE instead. We choose a relative small chunk size of 64KiB for
43		// better random access properties, at the expense of compression ratio.
44		return xflate.NewWriter(wr, &xflate.WriterConfig{
45			Level:     xflate.BestSpeed,
46			ChunkSize: 1 << 16,
47		})
48	})
49	for _, name := range []string{"twain.txt", "digits.txt", "huffman.txt"} {
50		body := files[name]
51		f, err := zw.Create(name)
52		if err != nil {
53			log.Fatal(err)
54		}
55		if _, err = f.Write(body); err != nil {
56			log.Fatal(err)
57		}
58	}
59	if err := zw.Close(); err != nil {
60		log.Fatal(err)
61	}
62
63	// Read the Zip archive.
64	rd := bytes.NewReader(buffer.Bytes())
65	zr, err := zip.NewReader(rd, rd.Size())
66	if err != nil {
67		log.Fatal(err)
68	}
69	for _, f := range zr.File {
70		// Verify that the new compression format is backwards compatible with
71		// a standard DEFLATE decompressor.
72		rc, err := f.Open()
73		if err != nil {
74			log.Fatal(err)
75		}
76		buf, err := ioutil.ReadAll(rc)
77		if err != nil {
78			log.Fatal(err)
79		}
80		if err := rc.Close(); err != nil {
81			log.Fatal(err)
82		}
83		if !bytes.Equal(buf, files[f.Name]) {
84			log.Fatal("file content does not match")
85		}
86	}
87	for _, f := range zr.File {
88		// In order for XFLATE to provide random access, it needs to be provided
89		// an io.ReadSeeker in order to operate. Thus, get low-level access to
90		// the compressed file data in archive.
91		off, err := f.DataOffset()
92		if err != nil {
93			log.Fatal(err)
94		}
95		rds := io.NewSectionReader(rd, off, int64(f.CompressedSize64))
96
97		// Since we know that the writer used the XFLATE format, we can open
98		// the compressed file as an xflate.Reader. If the file was compressed
99		// with regular DEFLATE, then this will return an error.
100		xr, err := xflate.NewReader(rds, nil)
101		if err != nil {
102			log.Fatal(err)
103		}
104
105		// Read from the middle of the file.
106		buf := make([]byte, 80)
107		pos := int64(f.UncompressedSize64 / 2)
108		if _, err := xr.Seek(pos, io.SeekStart); err != nil {
109			log.Fatal(err)
110		}
111		if _, err := io.ReadFull(xr, buf); err != nil {
112			log.Fatal(err)
113		}
114
115		// Close the Reader.
116		if err := xr.Close(); err != nil {
117			log.Fatal(err)
118		}
119
120		got := string(buf)
121		want := string(files[f.Name][pos : pos+80])
122		fmt.Printf("File: %s\n\tgot:  %q\n\twant: %q\n\n", f.Name, got, want)
123	}
124
125	// Output:
126	// File: twain.txt
127	// 	got:  "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the"
128	// 	want: "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the"
129	//
130	// File: digits.txt
131	// 	got:  "63955008002334767618706808652687872278317742021406898070341050620023527363226729"
132	// 	want: "63955008002334767618706808652687872278317742021406898070341050620023527363226729"
133	//
134	// File: huffman.txt
135	// 	got:  "E+uXeMsjFSXvhrGmRZCF7ErSVMWoWEzqMdW8uRyjCRxkQxOrWrQgkSdHshJyTbsBajQUoNfPY1zuLRvy"
136	// 	want: "E+uXeMsjFSXvhrGmRZCF7ErSVMWoWEzqMdW8uRyjCRxkQxOrWrQgkSdHshJyTbsBajQUoNfPY1zuLRvy"
137}
138
139// The Gzip format (RFC 1952) is a framing format for DEFLATE (RFC 1951).
140// For this reason, we can provide random access decompression to Gzip files
141// that are compressed with XFLATE. The example below adds a lightweight
142// header and footer to the XFLATE stream to make it compliant with the Gzip
143// format. This has the advantage that these files remain readable by
144// standard implementations of Gzip. Note that regular Gzip files are not
145// seekable because they are not compressed in the XFLATE format.
146func Example_gzipFile() {
147	// Test file of non-trivial size.
148	twain := testutil.MustLoadFile("../testdata/twain.txt")
149
150	// The Gzip header without using any extra features is 10 bytes long.
151	const header = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff"
152
153	// Write the Gzip file.
154	buffer := new(bytes.Buffer)
155	{
156		// Write Gzip header.
157		buffer.WriteString(header)
158
159		// Instead of using flate.Writer, we use xflate.Writer instead.
160		// We choose a relative small chunk size of 64KiB for better
161		// random access properties, at the expense of compression ratio.
162		xw, err := xflate.NewWriter(buffer, &xflate.WriterConfig{
163			Level:     xflate.BestSpeed,
164			ChunkSize: 1 << 16,
165		})
166		if err != nil {
167			log.Fatal(err)
168		}
169
170		// Write the test data.
171		crc := crc32.NewIEEE()
172		mw := io.MultiWriter(xw, crc) // Write to both compressor and hasher
173		if _, err := io.Copy(mw, bytes.NewReader(twain)); err != nil {
174			log.Fatal(err)
175		}
176		if err := xw.Close(); err != nil {
177			log.Fatal(err)
178		}
179
180		// Write Gzip footer.
181		binary.Write(buffer, binary.LittleEndian, uint32(crc.Sum32()))
182		binary.Write(buffer, binary.LittleEndian, uint32(len(twain)))
183	}
184
185	// Verify that Gzip file is RFC 1952 compliant.
186	{
187		gz, err := gzip.NewReader(bytes.NewReader(buffer.Bytes()))
188		if err != nil {
189			log.Fatal(err)
190		}
191		buf, err := ioutil.ReadAll(gz)
192		if err != nil {
193			log.Fatal(err)
194		}
195		if !bytes.Equal(buf, twain) {
196			log.Fatal("gzip content does not match")
197		}
198	}
199
200	// Read the Gzip file.
201	{
202		// Parse and discard the Gzip wrapper.
203		// This does not work for back-to-back Gzip files.
204		var hdr [10]byte
205		rd := bytes.NewReader(buffer.Bytes())
206		if _, err := rd.ReadAt(hdr[:], 0); err != nil {
207			log.Fatal(err)
208		}
209		if string(hdr[:3]) != header[:3] || rd.Size() < 18 {
210			log.Fatal("not a gzip file")
211		}
212		if hdr[3]&0xfe > 0 {
213			log.Fatal("no support for extra gzip features")
214		}
215		rds := io.NewSectionReader(rd, 10, rd.Size()-18) // Strip Gzip header/footer
216
217		// Since we know that the writer used the XFLATE format, we can open
218		// the compressed file as an xflate.Reader. If the file was compressed
219		// with regular DEFLATE, then this will return an error.
220		xr, err := xflate.NewReader(rds, nil)
221		if err != nil {
222			log.Fatal(err)
223		}
224
225		// Read from the middle of the stream.
226		buf := make([]byte, 80)
227		pos := int64(len(twain) / 2)
228		if _, err := xr.Seek(pos, io.SeekStart); err != nil {
229			log.Fatal(err)
230		}
231		if _, err := io.ReadFull(xr, buf); err != nil {
232			log.Fatal(err)
233		}
234
235		// Close the Reader.
236		if err := xr.Close(); err != nil {
237			log.Fatal(err)
238		}
239
240		got := string(buf)
241		want := string(twain[pos : pos+80])
242		fmt.Printf("got:  %q\nwant: %q\n", got, want)
243	}
244
245	// Output:
246	// got:  "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the"
247	// want: "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the"
248}
249