1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package gzip implements reading and writing of gzip format compressed files,
6// as specified in RFC 1952.
7package gzip
8
9import (
10	"bufio"
11	"compress/flate"
12	"errors"
13	"hash"
14	"hash/crc32"
15	"io"
16	"time"
17)
18
19const (
20	gzipID1     = 0x1f
21	gzipID2     = 0x8b
22	gzipDeflate = 8
23	flagText    = 1 << 0
24	flagHdrCrc  = 1 << 1
25	flagExtra   = 1 << 2
26	flagName    = 1 << 3
27	flagComment = 1 << 4
28)
29
30func makeReader(r io.Reader) flate.Reader {
31	if rr, ok := r.(flate.Reader); ok {
32		return rr
33	}
34	return bufio.NewReader(r)
35}
36
37var (
38	// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
39	ErrChecksum = errors.New("gzip: invalid checksum")
40	// ErrHeader is returned when reading GZIP data that has an invalid header.
41	ErrHeader = errors.New("gzip: invalid header")
42)
43
44// The gzip file stores a header giving metadata about the compressed file.
45// That header is exposed as the fields of the Writer and Reader structs.
46type Header struct {
47	Comment string    // comment
48	Extra   []byte    // "extra data"
49	ModTime time.Time // modification time
50	Name    string    // file name
51	OS      byte      // operating system type
52}
53
54// A Reader is an io.Reader that can be read to retrieve
55// uncompressed data from a gzip-format compressed file.
56//
57// In general, a gzip file can be a concatenation of gzip files,
58// each with its own header.  Reads from the Reader
59// return the concatenation of the uncompressed data of each.
60// Only the first header is recorded in the Reader fields.
61//
62// Gzip files store a length and checksum of the uncompressed data.
63// The Reader will return a ErrChecksum when Read
64// reaches the end of the uncompressed data if it does not
65// have the expected length or checksum.  Clients should treat data
66// returned by Read as tentative until they receive the io.EOF
67// marking the end of the data.
68type Reader struct {
69	Header
70	r            flate.Reader
71	decompressor io.ReadCloser
72	digest       hash.Hash32
73	size         uint32
74	flg          byte
75	buf          [512]byte
76	err          error
77}
78
79// NewReader creates a new Reader reading the given reader.
80// The implementation buffers input and may read more data than necessary from r.
81// It is the caller's responsibility to call Close on the Reader when done.
82func NewReader(r io.Reader) (*Reader, error) {
83	z := new(Reader)
84	z.r = makeReader(r)
85	z.digest = crc32.NewIEEE()
86	if err := z.readHeader(true); err != nil {
87		return nil, err
88	}
89	return z, nil
90}
91
92// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
93func get4(p []byte) uint32 {
94	return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
95}
96
97func (z *Reader) readString() (string, error) {
98	var err error
99	needconv := false
100	for i := 0; ; i++ {
101		if i >= len(z.buf) {
102			return "", ErrHeader
103		}
104		z.buf[i], err = z.r.ReadByte()
105		if err != nil {
106			return "", err
107		}
108		if z.buf[i] > 0x7f {
109			needconv = true
110		}
111		if z.buf[i] == 0 {
112			// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
113			if needconv {
114				s := make([]rune, 0, i)
115				for _, v := range z.buf[0:i] {
116					s = append(s, rune(v))
117				}
118				return string(s), nil
119			}
120			return string(z.buf[0:i]), nil
121		}
122	}
123	panic("not reached")
124}
125
126func (z *Reader) read2() (uint32, error) {
127	_, err := io.ReadFull(z.r, z.buf[0:2])
128	if err != nil {
129		return 0, err
130	}
131	return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
132}
133
134func (z *Reader) readHeader(save bool) error {
135	_, err := io.ReadFull(z.r, z.buf[0:10])
136	if err != nil {
137		return err
138	}
139	if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
140		return ErrHeader
141	}
142	z.flg = z.buf[3]
143	if save {
144		z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
145		// z.buf[8] is xfl, ignored
146		z.OS = z.buf[9]
147	}
148	z.digest.Reset()
149	z.digest.Write(z.buf[0:10])
150
151	if z.flg&flagExtra != 0 {
152		n, err := z.read2()
153		if err != nil {
154			return err
155		}
156		data := make([]byte, n)
157		if _, err = io.ReadFull(z.r, data); err != nil {
158			return err
159		}
160		if save {
161			z.Extra = data
162		}
163	}
164
165	var s string
166	if z.flg&flagName != 0 {
167		if s, err = z.readString(); err != nil {
168			return err
169		}
170		if save {
171			z.Name = s
172		}
173	}
174
175	if z.flg&flagComment != 0 {
176		if s, err = z.readString(); err != nil {
177			return err
178		}
179		if save {
180			z.Comment = s
181		}
182	}
183
184	if z.flg&flagHdrCrc != 0 {
185		n, err := z.read2()
186		if err != nil {
187			return err
188		}
189		sum := z.digest.Sum32() & 0xFFFF
190		if n != sum {
191			return ErrHeader
192		}
193	}
194
195	z.digest.Reset()
196	z.decompressor = flate.NewReader(z.r)
197	return nil
198}
199
200func (z *Reader) Read(p []byte) (n int, err error) {
201	if z.err != nil {
202		return 0, z.err
203	}
204	if len(p) == 0 {
205		return 0, nil
206	}
207
208	n, err = z.decompressor.Read(p)
209	z.digest.Write(p[0:n])
210	z.size += uint32(n)
211	if n != 0 || err != io.EOF {
212		z.err = err
213		return
214	}
215
216	// Finished file; check checksum + size.
217	if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
218		z.err = err
219		return 0, err
220	}
221	crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
222	sum := z.digest.Sum32()
223	if sum != crc32 || isize != z.size {
224		z.err = ErrChecksum
225		return 0, z.err
226	}
227
228	// File is ok; is there another?
229	if err = z.readHeader(false); err != nil {
230		z.err = err
231		return
232	}
233
234	// Yes.  Reset and read from it.
235	z.digest.Reset()
236	z.size = 0
237	return z.Read(p)
238}
239
240// Close closes the Reader. It does not close the underlying io.Reader.
241func (z *Reader) Close() error { return z.decompressor.Close() }
242