xz/lzma/header2.go

// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package lzma

import (
	"errors"
	"fmt"
	"io"
)

const (
	// maximum size of compressed data in a chunk
	maxCompressed = 1 << 16
	// maximum size of uncompressed data in a chunk
	maxUncompressed = 1 << 21
)

// chunkType represents the type of an LZMA2 chunk. Note that this
// value is an internal representation and no actual encoding of a LZMA2
// chunk header.
type chunkType byte

// Possible values for the chunk type.
const (
	// end of stream
	cEOS chunkType = iota
	// uncompressed; reset dictionary
	cUD
	// uncompressed; no reset of dictionary
	cU
	// LZMA compressed; no reset
	cL
	// LZMA compressed; reset state
	cLR
	// LZMA compressed; reset state; new property value
	cLRN
	// LZMA compressed; reset state; new property value; reset dictionary
	cLRND
)

// chunkTypeStrings provide a string representation for the chunk types.
var chunkTypeStrings = [...]string{
	cEOS:  "EOS",
	cU:    "U",
	cUD:   "UD",
	cL:    "L",
	cLR:   "LR",
	cLRN:  "LRN",
	cLRND: "LRND",
}

// String returns a string representation of the chunk type.
func (c chunkType) String() string {
	if !(cEOS <= c && c <= cLRND) {
		return "unknown"
	}
	return chunkTypeStrings[c]
}

// Actual encodings for the chunk types in the value. Note that the high
// uncompressed size bits are stored in the header byte additionally.
const (
	hEOS  = 0
	hUD   = 1
	hU    = 2
	hL    = 1 << 7
	hLR   = 1<<7 | 1<<5
	hLRN  = 1<<7 | 1<<6
	hLRND = 1<<7 | 1<<6 | 1<<5
)

// errHeaderByte indicates an unsupported value for the chunk header
// byte. These bytes starts the variable-length chunk header.
var errHeaderByte = errors.New("lzma: unsupported chunk header byte")

// headerChunkType converts the header byte into a chunk type. It
// ignores the uncompressed size bits in the chunk header byte.
func headerChunkType(h byte) (c chunkType, err error) {
	if h&hL == 0 {
		// no compression
		switch h {
		case hEOS:
			c = cEOS
		case hUD:
			c = cUD
		case hU:
			c = cU
		default:
			return 0, errHeaderByte
		}
		return
	}
	switch h & hLRND {
	case hL:
		c = cL
	case hLR:
		c = cLR
	case hLRN:
		c = cLRN
	case hLRND:
		c = cLRND
	default:
		return 0, errHeaderByte
	}
	return
}

// uncompressedHeaderLen provides the length of an uncompressed header
const uncompressedHeaderLen = 3

// headerLen returns the length of the LZMA2 header for a given chunk
// type.
func headerLen(c chunkType) int {
	switch c {
	case cEOS:
		return 1
	case cU, cUD:
		return uncompressedHeaderLen
	case cL, cLR:
		return 5
	case cLRN, cLRND:
		return 6
	}
	panic(fmt.Errorf("unsupported chunk type %d", c))
}

// chunkHeader represents the contents of a chunk header.
type chunkHeader struct {
	ctype        chunkType
	uncompressed uint32
	compressed   uint16
	props        Properties
}

// String returns a string representation of the chunk header.
func (h *chunkHeader) String() string {
	return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
		h.compressed, &h.props)
}

// UnmarshalBinary reads the content of the chunk header from the data
// slice. The slice must have the correct length.
func (h *chunkHeader) UnmarshalBinary(data []byte) error {
	if len(data) == 0 {
		return errors.New("no data")
	}
	c, err := headerChunkType(data[0])
	if err != nil {
		return err
	}

	n := headerLen(c)
	if len(data) < n {
		return errors.New("incomplete data")
	}
	if len(data) > n {
		return errors.New("invalid data length")
	}

	*h = chunkHeader{ctype: c}
	if c == cEOS {
		return nil
	}

	h.uncompressed = uint32(uint16BE(data[1:3]))
	if c <= cU {
		return nil
	}
	h.uncompressed |= uint32(data[0]&^hLRND) << 16

	h.compressed = uint16BE(data[3:5])
	if c <= cLR {
		return nil
	}

	h.props, err = PropertiesForCode(data[5])
	return err
}

// MarshalBinary encodes the chunk header value. The function checks
// whether the content of the chunk header is correct.
func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
	if h.ctype > cLRND {
		return nil, errors.New("invalid chunk type")
	}
	if err = h.props.verify(); err != nil {
		return nil, err
	}

	data = make([]byte, headerLen(h.ctype))

	switch h.ctype {
	case cEOS:
		return data, nil
	case cUD:
		data[0] = hUD
	case cU:
		data[0] = hU
	case cL:
		data[0] = hL
	case cLR:
		data[0] = hLR
	case cLRN:
		data[0] = hLRN
	case cLRND:
		data[0] = hLRND
	}

	putUint16BE(data[1:3], uint16(h.uncompressed))
	if h.ctype <= cU {
		return data, nil
	}
	data[0] |= byte(h.uncompressed>>16) &^ hLRND

	putUint16BE(data[3:5], h.compressed)
	if h.ctype <= cLR {
		return data, nil
	}

	data[5] = h.props.Code()
	return data, nil
}

// readChunkHeader reads the chunk header from the IO reader.
func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
	p := make([]byte, 1, 6)
	if _, err = io.ReadFull(r, p); err != nil {
		return
	}
	c, err := headerChunkType(p[0])
	if err != nil {
		return
	}
	p = p[:headerLen(c)]
	if _, err = io.ReadFull(r, p[1:]); err != nil {
		return
	}
	h = new(chunkHeader)
	if err = h.UnmarshalBinary(p); err != nil {
		return nil, err
	}
	return h, nil
}

// uint16BE converts a big-endian uint16 representation to an uint16
// value.
func uint16BE(p []byte) uint16 {
	return uint16(p[0])<<8 | uint16(p[1])
}

// putUint16BE puts the big-endian uint16 presentation into the given
// slice.
func putUint16BE(p []byte, x uint16) {
	p[0] = byte(x >> 8)
	p[1] = byte(x)
}

// chunkState is used to manage the state of the chunks
type chunkState byte

// start and stop define the initial and terminating state of the chunk
// state
const (
	start chunkState = 'S'
	stop             = 'T'
)

// errors for the chunk state handling
var (
	errChunkType = errors.New("lzma: unexpected chunk type")
	errState     = errors.New("lzma: wrong chunk state")
)

// next transitions state based on chunk type input
func (c *chunkState) next(ctype chunkType) error {
	switch *c {
	// start state
	case 'S':
		switch ctype {
		case cEOS:
			*c = 'T'
		case cUD:
			*c = 'R'
		case cLRND:
			*c = 'L'
		default:
			return errChunkType
		}
	// normal LZMA mode
	case 'L':
		switch ctype {
		case cEOS:
			*c = 'T'
		case cUD:
			*c = 'R'
		case cU:
			*c = 'U'
		case cL, cLR, cLRN, cLRND:
			break
		default:
			return errChunkType
		}
	// reset required
	case 'R':
		switch ctype {
		case cEOS:
			*c = 'T'
		case cUD, cU:
			break
		case cLRN, cLRND:
			*c = 'L'
		default:
			return errChunkType
		}
	// uncompressed
	case 'U':
		switch ctype {
		case cEOS:
			*c = 'T'
		case cUD:
			*c = 'R'
		case cU:
			break
		case cL, cLR, cLRN, cLRND:
			*c = 'L'
		default:
			return errChunkType
		}
	// terminal state
	case 'T':
		return errChunkType
	default:
		return errState
	}
	return nil
}

// defaultChunkType returns the default chunk type for each chunk state.
func (c chunkState) defaultChunkType() chunkType {
	switch c {
	case 'S':
		return cLRND
	case 'L', 'U':
		return cL
	case 'R':
		return cLRN
	default:
		// no error
		return cEOS
	}
}

// maxDictCap defines the maximum dictionary capacity supported by the
// LZMA2 dictionary capacity encoding.
const maxDictCap = 1<<32 - 1

// maxDictCapCode defines the maximum dictionary capacity code.
const maxDictCapCode = 40

// The function decodes the dictionary capacity byte, but doesn't change
// for the correct range of the given byte.
func decodeDictCap(c byte) int64 {
	return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
}

// DecodeDictCap decodes the encoded dictionary capacity. The function
// returns an error if the code is out of range.
func DecodeDictCap(c byte) (n int64, err error) {
	if c >= maxDictCapCode {
		if c == maxDictCapCode {
			return maxDictCap, nil
		}
		return 0, errors.New("lzma: invalid dictionary size code")
	}
	return decodeDictCap(c), nil
}

// EncodeDictCap encodes a dictionary capacity. The function returns the
// code for the capacity that is greater or equal n. If n exceeds the
// maximum support dictionary capacity, the maximum value is returned.
func EncodeDictCap(n int64) byte {
	a, b := byte(0), byte(40)
	for a < b {
		c := a + (b-a)>>1
		m := decodeDictCap(c)
		if n <= m {
			if n == m {
				return c
			}
			b = c
		} else {
			a = c + 1
		}
	}
	return a
}