1// Copyright 2016, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5package xflate_test 6 7import ( 8 "archive/zip" 9 "bytes" 10 "compress/gzip" 11 "encoding/binary" 12 "fmt" 13 "hash/crc32" 14 "io" 15 "io/ioutil" 16 "log" 17 18 "github.com/dsnet/compress/internal/testutil" 19 "github.com/dsnet/compress/xflate" 20) 21 22func init() { log.SetFlags(log.Lshortfile) } 23 24// Zip archives allow for efficient random access between files, however, 25// they do not easily allow for efficient random access within a given file, 26// especially if compressed. In this example, we use XFLATE to compress each 27// file. This is particularly useful for seeking within a relatively large 28// file in a Zip archive. 29func Example_zipFile() { 30 // Test files of non-trivial sizes. 31 files := map[string][]byte{ 32 "twain.txt": testutil.MustLoadFile("../testdata/twain.txt"), 33 "digits.txt": testutil.MustLoadFile("../testdata/digits.txt"), 34 "huffman.txt": testutil.MustLoadFile("../testdata/huffman.txt"), 35 } 36 37 // Write the Zip archive. 38 buffer := new(bytes.Buffer) 39 zw := zip.NewWriter(buffer) 40 zw.RegisterCompressor(zip.Deflate, func(wr io.Writer) (io.WriteCloser, error) { 41 // Instead of the default DEFLATE compressor, register one that uses 42 // XFLATE instead. We choose a relative small chunk size of 64KiB for 43 // better random access properties, at the expense of compression ratio. 44 return xflate.NewWriter(wr, &xflate.WriterConfig{ 45 Level: xflate.BestSpeed, 46 ChunkSize: 1 << 16, 47 }) 48 }) 49 for _, name := range []string{"twain.txt", "digits.txt", "huffman.txt"} { 50 body := files[name] 51 f, err := zw.Create(name) 52 if err != nil { 53 log.Fatal(err) 54 } 55 if _, err = f.Write(body); err != nil { 56 log.Fatal(err) 57 } 58 } 59 if err := zw.Close(); err != nil { 60 log.Fatal(err) 61 } 62 63 // Read the Zip archive. 64 rd := bytes.NewReader(buffer.Bytes()) 65 zr, err := zip.NewReader(rd, rd.Size()) 66 if err != nil { 67 log.Fatal(err) 68 } 69 for _, f := range zr.File { 70 // Verify that the new compression format is backwards compatible with 71 // a standard DEFLATE decompressor. 72 rc, err := f.Open() 73 if err != nil { 74 log.Fatal(err) 75 } 76 buf, err := ioutil.ReadAll(rc) 77 if err != nil { 78 log.Fatal(err) 79 } 80 if err := rc.Close(); err != nil { 81 log.Fatal(err) 82 } 83 if !bytes.Equal(buf, files[f.Name]) { 84 log.Fatal("file content does not match") 85 } 86 } 87 for _, f := range zr.File { 88 // In order for XFLATE to provide random access, it needs to be provided 89 // an io.ReadSeeker in order to operate. Thus, get low-level access to 90 // the compressed file data in archive. 91 off, err := f.DataOffset() 92 if err != nil { 93 log.Fatal(err) 94 } 95 rds := io.NewSectionReader(rd, off, int64(f.CompressedSize64)) 96 97 // Since we know that the writer used the XFLATE format, we can open 98 // the compressed file as an xflate.Reader. If the file was compressed 99 // with regular DEFLATE, then this will return an error. 100 xr, err := xflate.NewReader(rds, nil) 101 if err != nil { 102 log.Fatal(err) 103 } 104 105 // Read from the middle of the file. 106 buf := make([]byte, 80) 107 pos := int64(f.UncompressedSize64 / 2) 108 if _, err := xr.Seek(pos, io.SeekStart); err != nil { 109 log.Fatal(err) 110 } 111 if _, err := io.ReadFull(xr, buf); err != nil { 112 log.Fatal(err) 113 } 114 115 // Close the Reader. 116 if err := xr.Close(); err != nil { 117 log.Fatal(err) 118 } 119 120 got := string(buf) 121 want := string(files[f.Name][pos : pos+80]) 122 fmt.Printf("File: %s\n\tgot: %q\n\twant: %q\n\n", f.Name, got, want) 123 } 124 125 // Output: 126 // File: twain.txt 127 // got: "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the" 128 // want: "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the" 129 // 130 // File: digits.txt 131 // got: "63955008002334767618706808652687872278317742021406898070341050620023527363226729" 132 // want: "63955008002334767618706808652687872278317742021406898070341050620023527363226729" 133 // 134 // File: huffman.txt 135 // got: "E+uXeMsjFSXvhrGmRZCF7ErSVMWoWEzqMdW8uRyjCRxkQxOrWrQgkSdHshJyTbsBajQUoNfPY1zuLRvy" 136 // want: "E+uXeMsjFSXvhrGmRZCF7ErSVMWoWEzqMdW8uRyjCRxkQxOrWrQgkSdHshJyTbsBajQUoNfPY1zuLRvy" 137} 138 139// The Gzip format (RFC 1952) is a framing format for DEFLATE (RFC 1951). 140// For this reason, we can provide random access decompression to Gzip files 141// that are compressed with XFLATE. The example below adds a lightweight 142// header and footer to the XFLATE stream to make it compliant with the Gzip 143// format. This has the advantage that these files remain readable by 144// standard implementations of Gzip. Note that regular Gzip files are not 145// seekable because they are not compressed in the XFLATE format. 146func Example_gzipFile() { 147 // Test file of non-trivial size. 148 twain := testutil.MustLoadFile("../testdata/twain.txt") 149 150 // The Gzip header without using any extra features is 10 bytes long. 151 const header = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff" 152 153 // Write the Gzip file. 154 buffer := new(bytes.Buffer) 155 { 156 // Write Gzip header. 157 buffer.WriteString(header) 158 159 // Instead of using flate.Writer, we use xflate.Writer instead. 160 // We choose a relative small chunk size of 64KiB for better 161 // random access properties, at the expense of compression ratio. 162 xw, err := xflate.NewWriter(buffer, &xflate.WriterConfig{ 163 Level: xflate.BestSpeed, 164 ChunkSize: 1 << 16, 165 }) 166 if err != nil { 167 log.Fatal(err) 168 } 169 170 // Write the test data. 171 crc := crc32.NewIEEE() 172 mw := io.MultiWriter(xw, crc) // Write to both compressor and hasher 173 if _, err := io.Copy(mw, bytes.NewReader(twain)); err != nil { 174 log.Fatal(err) 175 } 176 if err := xw.Close(); err != nil { 177 log.Fatal(err) 178 } 179 180 // Write Gzip footer. 181 binary.Write(buffer, binary.LittleEndian, uint32(crc.Sum32())) 182 binary.Write(buffer, binary.LittleEndian, uint32(len(twain))) 183 } 184 185 // Verify that Gzip file is RFC 1952 compliant. 186 { 187 gz, err := gzip.NewReader(bytes.NewReader(buffer.Bytes())) 188 if err != nil { 189 log.Fatal(err) 190 } 191 buf, err := ioutil.ReadAll(gz) 192 if err != nil { 193 log.Fatal(err) 194 } 195 if !bytes.Equal(buf, twain) { 196 log.Fatal("gzip content does not match") 197 } 198 } 199 200 // Read the Gzip file. 201 { 202 // Parse and discard the Gzip wrapper. 203 // This does not work for back-to-back Gzip files. 204 var hdr [10]byte 205 rd := bytes.NewReader(buffer.Bytes()) 206 if _, err := rd.ReadAt(hdr[:], 0); err != nil { 207 log.Fatal(err) 208 } 209 if string(hdr[:3]) != header[:3] || rd.Size() < 18 { 210 log.Fatal("not a gzip file") 211 } 212 if hdr[3]&0xfe > 0 { 213 log.Fatal("no support for extra gzip features") 214 } 215 rds := io.NewSectionReader(rd, 10, rd.Size()-18) // Strip Gzip header/footer 216 217 // Since we know that the writer used the XFLATE format, we can open 218 // the compressed file as an xflate.Reader. If the file was compressed 219 // with regular DEFLATE, then this will return an error. 220 xr, err := xflate.NewReader(rds, nil) 221 if err != nil { 222 log.Fatal(err) 223 } 224 225 // Read from the middle of the stream. 226 buf := make([]byte, 80) 227 pos := int64(len(twain) / 2) 228 if _, err := xr.Seek(pos, io.SeekStart); err != nil { 229 log.Fatal(err) 230 } 231 if _, err := io.ReadFull(xr, buf); err != nil { 232 log.Fatal(err) 233 } 234 235 // Close the Reader. 236 if err := xr.Close(); err != nil { 237 log.Fatal(err) 238 } 239 240 got := string(buf) 241 want := string(twain[pos : pos+80]) 242 fmt.Printf("got: %q\nwant: %q\n", got, want) 243 } 244 245 // Output: 246 // got: "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the" 247 // want: "ver, white with foam, the driving spray of spume-flakes, the dim\noutlines of the" 248} 249