1// Copyright 2014-2017 Ulrich Kunitz. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package xz supports the compression and decompression of xz files. It 6// supports version 1.0.4 of the specification without the non-LZMA2 7// filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt 8package xz 9 10import ( 11 "bytes" 12 "errors" 13 "fmt" 14 "hash" 15 "io" 16 17 "github.com/ulikunitz/xz/internal/xlog" 18 "github.com/ulikunitz/xz/lzma" 19) 20 21// ReaderConfig defines the parameters for the xz reader. The 22// SingleStream parameter requests the reader to assume that the 23// underlying stream contains only a single stream. 24type ReaderConfig struct { 25 DictCap int 26 SingleStream bool 27} 28 29// fill replaces all zero values with their default values. 30func (c *ReaderConfig) fill() { 31 if c.DictCap == 0 { 32 c.DictCap = 8 * 1024 * 1024 33 } 34} 35 36// Verify checks the reader parameters for Validity. Zero values will be 37// replaced by default values. 38func (c *ReaderConfig) Verify() error { 39 if c == nil { 40 return errors.New("xz: reader parameters are nil") 41 } 42 lc := lzma.Reader2Config{DictCap: c.DictCap} 43 if err := lc.Verify(); err != nil { 44 return err 45 } 46 return nil 47} 48 49// Reader supports the reading of one or multiple xz streams. 50type Reader struct { 51 ReaderConfig 52 53 xz io.Reader 54 sr *streamReader 55} 56 57// streamReader decodes a single xz stream 58type streamReader struct { 59 ReaderConfig 60 61 xz io.Reader 62 br *blockReader 63 newHash func() hash.Hash 64 h header 65 index []record 66} 67 68// NewReader creates a new xz reader using the default parameters. 69// The function reads and checks the header of the first XZ stream. The 70// reader will process multiple streams including padding. 71func NewReader(xz io.Reader) (r *Reader, err error) { 72 return ReaderConfig{}.NewReader(xz) 73} 74 75// NewReader creates an xz stream reader. The created reader will be 76// able to process multiple streams and padding unless a SingleStream 77// has been set in the reader configuration c. 78func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) { 79 if err = c.Verify(); err != nil { 80 return nil, err 81 } 82 r = &Reader{ 83 ReaderConfig: c, 84 xz: xz, 85 } 86 if r.sr, err = c.newStreamReader(xz); err != nil { 87 if err == io.EOF { 88 err = io.ErrUnexpectedEOF 89 } 90 return nil, err 91 } 92 return r, nil 93} 94 95var errUnexpectedData = errors.New("xz: unexpected data after stream") 96 97// Read reads uncompressed data from the stream. 98func (r *Reader) Read(p []byte) (n int, err error) { 99 for n < len(p) { 100 if r.sr == nil { 101 if r.SingleStream { 102 data := make([]byte, 1) 103 _, err = io.ReadFull(r.xz, data) 104 if err != io.EOF { 105 return n, errUnexpectedData 106 } 107 return n, io.EOF 108 } 109 for { 110 r.sr, err = r.ReaderConfig.newStreamReader(r.xz) 111 if err != errPadding { 112 break 113 } 114 } 115 if err != nil { 116 return n, err 117 } 118 } 119 k, err := r.sr.Read(p[n:]) 120 n += k 121 if err != nil { 122 if err == io.EOF { 123 r.sr = nil 124 continue 125 } 126 return n, err 127 } 128 } 129 return n, nil 130} 131 132var errPadding = errors.New("xz: padding (4 zero bytes) encountered") 133 134// newStreamReader creates a new xz stream reader using the given configuration 135// parameters. NewReader reads and checks the header of the xz stream. 136func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) { 137 if err = c.Verify(); err != nil { 138 return nil, err 139 } 140 data := make([]byte, HeaderLen) 141 if _, err := io.ReadFull(xz, data[:4]); err != nil { 142 return nil, err 143 } 144 if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) { 145 return nil, errPadding 146 } 147 if _, err = io.ReadFull(xz, data[4:]); err != nil { 148 if err == io.EOF { 149 err = io.ErrUnexpectedEOF 150 } 151 return nil, err 152 } 153 r = &streamReader{ 154 ReaderConfig: c, 155 xz: xz, 156 index: make([]record, 0, 4), 157 } 158 if err = r.h.UnmarshalBinary(data); err != nil { 159 return nil, err 160 } 161 xlog.Debugf("xz header %s", r.h) 162 if r.newHash, err = newHashFunc(r.h.flags); err != nil { 163 return nil, err 164 } 165 return r, nil 166} 167 168// errIndex indicates an error with the xz file index. 169var errIndex = errors.New("xz: error in xz file index") 170 171// readTail reads the index body and the xz footer. 172func (r *streamReader) readTail() error { 173 index, n, err := readIndexBody(r.xz) 174 if err != nil { 175 if err == io.EOF { 176 err = io.ErrUnexpectedEOF 177 } 178 return err 179 } 180 if len(index) != len(r.index) { 181 return fmt.Errorf("xz: index length is %d; want %d", 182 len(index), len(r.index)) 183 } 184 for i, rec := range r.index { 185 if rec != index[i] { 186 return fmt.Errorf("xz: record %d is %v; want %v", 187 i, rec, index[i]) 188 } 189 } 190 191 p := make([]byte, footerLen) 192 if _, err = io.ReadFull(r.xz, p); err != nil { 193 if err == io.EOF { 194 err = io.ErrUnexpectedEOF 195 } 196 return err 197 } 198 var f footer 199 if err = f.UnmarshalBinary(p); err != nil { 200 return err 201 } 202 xlog.Debugf("xz footer %s", f) 203 if f.flags != r.h.flags { 204 return errors.New("xz: footer flags incorrect") 205 } 206 if f.indexSize != int64(n)+1 { 207 return errors.New("xz: index size in footer wrong") 208 } 209 return nil 210} 211 212// Read reads actual data from the xz stream. 213func (r *streamReader) Read(p []byte) (n int, err error) { 214 for n < len(p) { 215 if r.br == nil { 216 bh, hlen, err := readBlockHeader(r.xz) 217 if err != nil { 218 if err == errIndexIndicator { 219 if err = r.readTail(); err != nil { 220 return n, err 221 } 222 return n, io.EOF 223 } 224 return n, err 225 } 226 xlog.Debugf("block %v", *bh) 227 r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh, 228 hlen, r.newHash()) 229 if err != nil { 230 return n, err 231 } 232 } 233 k, err := r.br.Read(p[n:]) 234 n += k 235 if err != nil { 236 if err == io.EOF { 237 r.index = append(r.index, r.br.record()) 238 r.br = nil 239 } else { 240 return n, err 241 } 242 } 243 } 244 return n, nil 245} 246 247// countingReader is a reader that counts the bytes read. 248type countingReader struct { 249 r io.Reader 250 n int64 251} 252 253// Read reads data from the wrapped reader and adds it to the n field. 254func (lr *countingReader) Read(p []byte) (n int, err error) { 255 n, err = lr.r.Read(p) 256 lr.n += int64(n) 257 return n, err 258} 259 260// blockReader supports the reading of a block. 261type blockReader struct { 262 lxz countingReader 263 header *blockHeader 264 headerLen int 265 n int64 266 hash hash.Hash 267 r io.Reader 268 err error 269} 270 271// newBlockReader creates a new block reader. 272func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader, 273 hlen int, hash hash.Hash) (br *blockReader, err error) { 274 275 br = &blockReader{ 276 lxz: countingReader{r: xz}, 277 header: h, 278 headerLen: hlen, 279 hash: hash, 280 } 281 282 fr, err := c.newFilterReader(&br.lxz, h.filters) 283 if err != nil { 284 return nil, err 285 } 286 br.r = io.TeeReader(fr, br.hash) 287 288 return br, nil 289} 290 291// uncompressedSize returns the uncompressed size of the block. 292func (br *blockReader) uncompressedSize() int64 { 293 return br.n 294} 295 296// compressedSize returns the compressed size of the block. 297func (br *blockReader) compressedSize() int64 { 298 return br.lxz.n 299} 300 301// unpaddedSize computes the unpadded size for the block. 302func (br *blockReader) unpaddedSize() int64 { 303 n := int64(br.headerLen) 304 n += br.compressedSize() 305 n += int64(br.hash.Size()) 306 return n 307} 308 309// record returns the index record for the current block. 310func (br *blockReader) record() record { 311 return record{br.unpaddedSize(), br.uncompressedSize()} 312} 313 314// errBlockSize indicates that the size of the block in the block header 315// is wrong. 316var errBlockSize = errors.New("xz: wrong uncompressed size for block") 317 318// Read reads data from the block. 319func (br *blockReader) Read(p []byte) (n int, err error) { 320 n, err = br.r.Read(p) 321 br.n += int64(n) 322 323 u := br.header.uncompressedSize 324 if u >= 0 && br.uncompressedSize() > u { 325 return n, errors.New("xz: wrong uncompressed size for block") 326 } 327 c := br.header.compressedSize 328 if c >= 0 && br.compressedSize() > c { 329 return n, errors.New("xz: wrong compressed size for block") 330 } 331 if err != io.EOF { 332 return n, err 333 } 334 if br.uncompressedSize() < u || br.compressedSize() < c { 335 return n, io.ErrUnexpectedEOF 336 } 337 338 s := br.hash.Size() 339 k := padLen(br.lxz.n) 340 q := make([]byte, k+s, k+2*s) 341 if _, err = io.ReadFull(br.lxz.r, q); err != nil { 342 if err == io.EOF { 343 err = io.ErrUnexpectedEOF 344 } 345 return n, err 346 } 347 if !allZeros(q[:k]) { 348 return n, errors.New("xz: non-zero block padding") 349 } 350 checkSum := q[k:] 351 computedSum := br.hash.Sum(checkSum[s:]) 352 if !bytes.Equal(checkSum, computedSum) { 353 return n, errors.New("xz: checksum error for block") 354 } 355 return n, io.EOF 356} 357 358func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader, 359 err error) { 360 361 if err = verifyFilters(f); err != nil { 362 return nil, err 363 } 364 365 fr = r 366 for i := len(f) - 1; i >= 0; i-- { 367 fr, err = f[i].reader(fr, c) 368 if err != nil { 369 return nil, err 370 } 371 } 372 return fr, nil 373} 374