1// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package xz supports the compression and decompression of xz files. It
6// supports version 1.0.4 of the specification without the non-LZMA2
7// filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt
8package xz
9
10import (
11	"bytes"
12	"errors"
13	"fmt"
14	"hash"
15	"io"
16
17	"github.com/ulikunitz/xz/internal/xlog"
18	"github.com/ulikunitz/xz/lzma"
19)
20
21// ReaderConfig defines the parameters for the xz reader. The
22// SingleStream parameter requests the reader to assume that the
23// underlying stream contains only a single stream.
24type ReaderConfig struct {
25	DictCap      int
26	SingleStream bool
27}
28
29// fill replaces all zero values with their default values.
30func (c *ReaderConfig) fill() {
31	if c.DictCap == 0 {
32		c.DictCap = 8 * 1024 * 1024
33	}
34}
35
36// Verify checks the reader parameters for Validity. Zero values will be
37// replaced by default values.
38func (c *ReaderConfig) Verify() error {
39	if c == nil {
40		return errors.New("xz: reader parameters are nil")
41	}
42	lc := lzma.Reader2Config{DictCap: c.DictCap}
43	if err := lc.Verify(); err != nil {
44		return err
45	}
46	return nil
47}
48
49// Reader supports the reading of one or multiple xz streams.
50type Reader struct {
51	ReaderConfig
52
53	xz io.Reader
54	sr *streamReader
55}
56
57// streamReader decodes a single xz stream
58type streamReader struct {
59	ReaderConfig
60
61	xz      io.Reader
62	br      *blockReader
63	newHash func() hash.Hash
64	h       header
65	index   []record
66}
67
68// NewReader creates a new xz reader using the default parameters.
69// The function reads and checks the header of the first XZ stream. The
70// reader will process multiple streams including padding.
71func NewReader(xz io.Reader) (r *Reader, err error) {
72	return ReaderConfig{}.NewReader(xz)
73}
74
75// NewReader creates an xz stream reader. The created reader will be
76// able to process multiple streams and padding unless a SingleStream
77// has been set in the reader configuration c.
78func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) {
79	if err = c.Verify(); err != nil {
80		return nil, err
81	}
82	r = &Reader{
83		ReaderConfig: c,
84		xz:           xz,
85	}
86	if r.sr, err = c.newStreamReader(xz); err != nil {
87		if err == io.EOF {
88			err = io.ErrUnexpectedEOF
89		}
90		return nil, err
91	}
92	return r, nil
93}
94
95var errUnexpectedData = errors.New("xz: unexpected data after stream")
96
97// Read reads uncompressed data from the stream.
98func (r *Reader) Read(p []byte) (n int, err error) {
99	for n < len(p) {
100		if r.sr == nil {
101			if r.SingleStream {
102				data := make([]byte, 1)
103				_, err = io.ReadFull(r.xz, data)
104				if err != io.EOF {
105					return n, errUnexpectedData
106				}
107				return n, io.EOF
108			}
109			for {
110				r.sr, err = r.ReaderConfig.newStreamReader(r.xz)
111				if err != errPadding {
112					break
113				}
114			}
115			if err != nil {
116				return n, err
117			}
118		}
119		k, err := r.sr.Read(p[n:])
120		n += k
121		if err != nil {
122			if err == io.EOF {
123				r.sr = nil
124				continue
125			}
126			return n, err
127		}
128	}
129	return n, nil
130}
131
132var errPadding = errors.New("xz: padding (4 zero bytes) encountered")
133
134// newStreamReader creates a new xz stream reader using the given configuration
135// parameters. NewReader reads and checks the header of the xz stream.
136func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) {
137	if err = c.Verify(); err != nil {
138		return nil, err
139	}
140	data := make([]byte, HeaderLen)
141	if _, err := io.ReadFull(xz, data[:4]); err != nil {
142		return nil, err
143	}
144	if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) {
145		return nil, errPadding
146	}
147	if _, err = io.ReadFull(xz, data[4:]); err != nil {
148		if err == io.EOF {
149			err = io.ErrUnexpectedEOF
150		}
151		return nil, err
152	}
153	r = &streamReader{
154		ReaderConfig: c,
155		xz:           xz,
156		index:        make([]record, 0, 4),
157	}
158	if err = r.h.UnmarshalBinary(data); err != nil {
159		return nil, err
160	}
161	xlog.Debugf("xz header %s", r.h)
162	if r.newHash, err = newHashFunc(r.h.flags); err != nil {
163		return nil, err
164	}
165	return r, nil
166}
167
168// errIndex indicates an error with the xz file index.
169var errIndex = errors.New("xz: error in xz file index")
170
171// readTail reads the index body and the xz footer.
172func (r *streamReader) readTail() error {
173	index, n, err := readIndexBody(r.xz)
174	if err != nil {
175		if err == io.EOF {
176			err = io.ErrUnexpectedEOF
177		}
178		return err
179	}
180	if len(index) != len(r.index) {
181		return fmt.Errorf("xz: index length is %d; want %d",
182			len(index), len(r.index))
183	}
184	for i, rec := range r.index {
185		if rec != index[i] {
186			return fmt.Errorf("xz: record %d is %v; want %v",
187				i, rec, index[i])
188		}
189	}
190
191	p := make([]byte, footerLen)
192	if _, err = io.ReadFull(r.xz, p); err != nil {
193		if err == io.EOF {
194			err = io.ErrUnexpectedEOF
195		}
196		return err
197	}
198	var f footer
199	if err = f.UnmarshalBinary(p); err != nil {
200		return err
201	}
202	xlog.Debugf("xz footer %s", f)
203	if f.flags != r.h.flags {
204		return errors.New("xz: footer flags incorrect")
205	}
206	if f.indexSize != int64(n)+1 {
207		return errors.New("xz: index size in footer wrong")
208	}
209	return nil
210}
211
212// Read reads actual data from the xz stream.
213func (r *streamReader) Read(p []byte) (n int, err error) {
214	for n < len(p) {
215		if r.br == nil {
216			bh, hlen, err := readBlockHeader(r.xz)
217			if err != nil {
218				if err == errIndexIndicator {
219					if err = r.readTail(); err != nil {
220						return n, err
221					}
222					return n, io.EOF
223				}
224				return n, err
225			}
226			xlog.Debugf("block %v", *bh)
227			r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh,
228				hlen, r.newHash())
229			if err != nil {
230				return n, err
231			}
232		}
233		k, err := r.br.Read(p[n:])
234		n += k
235		if err != nil {
236			if err == io.EOF {
237				r.index = append(r.index, r.br.record())
238				r.br = nil
239			} else {
240				return n, err
241			}
242		}
243	}
244	return n, nil
245}
246
247// countingReader is a reader that counts the bytes read.
248type countingReader struct {
249	r io.Reader
250	n int64
251}
252
253// Read reads data from the wrapped reader and adds it to the n field.
254func (lr *countingReader) Read(p []byte) (n int, err error) {
255	n, err = lr.r.Read(p)
256	lr.n += int64(n)
257	return n, err
258}
259
260// blockReader supports the reading of a block.
261type blockReader struct {
262	lxz       countingReader
263	header    *blockHeader
264	headerLen int
265	n         int64
266	hash      hash.Hash
267	r         io.Reader
268	err       error
269}
270
271// newBlockReader creates a new block reader.
272func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader,
273	hlen int, hash hash.Hash) (br *blockReader, err error) {
274
275	br = &blockReader{
276		lxz:       countingReader{r: xz},
277		header:    h,
278		headerLen: hlen,
279		hash:      hash,
280	}
281
282	fr, err := c.newFilterReader(&br.lxz, h.filters)
283	if err != nil {
284		return nil, err
285	}
286	br.r = io.TeeReader(fr, br.hash)
287
288	return br, nil
289}
290
291// uncompressedSize returns the uncompressed size of the block.
292func (br *blockReader) uncompressedSize() int64 {
293	return br.n
294}
295
296// compressedSize returns the compressed size of the block.
297func (br *blockReader) compressedSize() int64 {
298	return br.lxz.n
299}
300
301// unpaddedSize computes the unpadded size for the block.
302func (br *blockReader) unpaddedSize() int64 {
303	n := int64(br.headerLen)
304	n += br.compressedSize()
305	n += int64(br.hash.Size())
306	return n
307}
308
309// record returns the index record for the current block.
310func (br *blockReader) record() record {
311	return record{br.unpaddedSize(), br.uncompressedSize()}
312}
313
314// errBlockSize indicates that the size of the block in the block header
315// is wrong.
316var errBlockSize = errors.New("xz: wrong uncompressed size for block")
317
318// Read reads data from the block.
319func (br *blockReader) Read(p []byte) (n int, err error) {
320	n, err = br.r.Read(p)
321	br.n += int64(n)
322
323	u := br.header.uncompressedSize
324	if u >= 0 && br.uncompressedSize() > u {
325		return n, errors.New("xz: wrong uncompressed size for block")
326	}
327	c := br.header.compressedSize
328	if c >= 0 && br.compressedSize() > c {
329		return n, errors.New("xz: wrong compressed size for block")
330	}
331	if err != io.EOF {
332		return n, err
333	}
334	if br.uncompressedSize() < u || br.compressedSize() < c {
335		return n, io.ErrUnexpectedEOF
336	}
337
338	s := br.hash.Size()
339	k := padLen(br.lxz.n)
340	q := make([]byte, k+s, k+2*s)
341	if _, err = io.ReadFull(br.lxz.r, q); err != nil {
342		if err == io.EOF {
343			err = io.ErrUnexpectedEOF
344		}
345		return n, err
346	}
347	if !allZeros(q[:k]) {
348		return n, errors.New("xz: non-zero block padding")
349	}
350	checkSum := q[k:]
351	computedSum := br.hash.Sum(checkSum[s:])
352	if !bytes.Equal(checkSum, computedSum) {
353		return n, errors.New("xz: checksum error for block")
354	}
355	return n, io.EOF
356}
357
358func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader,
359	err error) {
360
361	if err = verifyFilters(f); err != nil {
362		return nil, err
363	}
364
365	fr = r
366	for i := len(f) - 1; i >= 0; i-- {
367		fr, err = f[i].reader(fr, c)
368		if err != nil {
369			return nil, err
370		}
371	}
372	return fr, nil
373}
374