1// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package lzma
6
7import (
8	"errors"
9	"fmt"
10	"io"
11)
12
13const (
14	// maximum size of compressed data in a chunk
15	maxCompressed = 1 << 16
16	// maximum size of uncompressed data in a chunk
17	maxUncompressed = 1 << 21
18)
19
20// chunkType represents the type of an LZMA2 chunk. Note that this
21// value is an internal representation and no actual encoding of a LZMA2
22// chunk header.
23type chunkType byte
24
25// Possible values for the chunk type.
26const (
27	// end of stream
28	cEOS chunkType = iota
29	// uncompressed; reset dictionary
30	cUD
31	// uncompressed; no reset of dictionary
32	cU
33	// LZMA compressed; no reset
34	cL
35	// LZMA compressed; reset state
36	cLR
37	// LZMA compressed; reset state; new property value
38	cLRN
39	// LZMA compressed; reset state; new property value; reset dictionary
40	cLRND
41)
42
43// chunkTypeStrings provide a string representation for the chunk types.
44var chunkTypeStrings = [...]string{
45	cEOS:  "EOS",
46	cU:    "U",
47	cUD:   "UD",
48	cL:    "L",
49	cLR:   "LR",
50	cLRN:  "LRN",
51	cLRND: "LRND",
52}
53
54// String returns a string representation of the chunk type.
55func (c chunkType) String() string {
56	if !(cEOS <= c && c <= cLRND) {
57		return "unknown"
58	}
59	return chunkTypeStrings[c]
60}
61
62// Actual encodings for the chunk types in the value. Note that the high
63// uncompressed size bits are stored in the header byte additionally.
64const (
65	hEOS  = 0
66	hUD   = 1
67	hU    = 2
68	hL    = 1 << 7
69	hLR   = 1<<7 | 1<<5
70	hLRN  = 1<<7 | 1<<6
71	hLRND = 1<<7 | 1<<6 | 1<<5
72)
73
74// errHeaderByte indicates an unsupported value for the chunk header
75// byte. These bytes starts the variable-length chunk header.
76var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
77
78// headerChunkType converts the header byte into a chunk type. It
79// ignores the uncompressed size bits in the chunk header byte.
80func headerChunkType(h byte) (c chunkType, err error) {
81	if h&hL == 0 {
82		// no compression
83		switch h {
84		case hEOS:
85			c = cEOS
86		case hUD:
87			c = cUD
88		case hU:
89			c = cU
90		default:
91			return 0, errHeaderByte
92		}
93		return
94	}
95	switch h & hLRND {
96	case hL:
97		c = cL
98	case hLR:
99		c = cLR
100	case hLRN:
101		c = cLRN
102	case hLRND:
103		c = cLRND
104	default:
105		return 0, errHeaderByte
106	}
107	return
108}
109
110// uncompressedHeaderLen provides the length of an uncompressed header
111const uncompressedHeaderLen = 3
112
113// headerLen returns the length of the LZMA2 header for a given chunk
114// type.
115func headerLen(c chunkType) int {
116	switch c {
117	case cEOS:
118		return 1
119	case cU, cUD:
120		return uncompressedHeaderLen
121	case cL, cLR:
122		return 5
123	case cLRN, cLRND:
124		return 6
125	}
126	panic(fmt.Errorf("unsupported chunk type %d", c))
127}
128
129// chunkHeader represents the contents of a chunk header.
130type chunkHeader struct {
131	ctype        chunkType
132	uncompressed uint32
133	compressed   uint16
134	props        Properties
135}
136
137// String returns a string representation of the chunk header.
138func (h *chunkHeader) String() string {
139	return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
140		h.compressed, &h.props)
141}
142
143// UnmarshalBinary reads the content of the chunk header from the data
144// slice. The slice must have the correct length.
145func (h *chunkHeader) UnmarshalBinary(data []byte) error {
146	if len(data) == 0 {
147		return errors.New("no data")
148	}
149	c, err := headerChunkType(data[0])
150	if err != nil {
151		return err
152	}
153
154	n := headerLen(c)
155	if len(data) < n {
156		return errors.New("incomplete data")
157	}
158	if len(data) > n {
159		return errors.New("invalid data length")
160	}
161
162	*h = chunkHeader{ctype: c}
163	if c == cEOS {
164		return nil
165	}
166
167	h.uncompressed = uint32(uint16BE(data[1:3]))
168	if c <= cU {
169		return nil
170	}
171	h.uncompressed |= uint32(data[0]&^hLRND) << 16
172
173	h.compressed = uint16BE(data[3:5])
174	if c <= cLR {
175		return nil
176	}
177
178	h.props, err = PropertiesForCode(data[5])
179	return err
180}
181
182// MarshalBinary encodes the chunk header value. The function checks
183// whether the content of the chunk header is correct.
184func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
185	if h.ctype > cLRND {
186		return nil, errors.New("invalid chunk type")
187	}
188	if err = h.props.verify(); err != nil {
189		return nil, err
190	}
191
192	data = make([]byte, headerLen(h.ctype))
193
194	switch h.ctype {
195	case cEOS:
196		return data, nil
197	case cUD:
198		data[0] = hUD
199	case cU:
200		data[0] = hU
201	case cL:
202		data[0] = hL
203	case cLR:
204		data[0] = hLR
205	case cLRN:
206		data[0] = hLRN
207	case cLRND:
208		data[0] = hLRND
209	}
210
211	putUint16BE(data[1:3], uint16(h.uncompressed))
212	if h.ctype <= cU {
213		return data, nil
214	}
215	data[0] |= byte(h.uncompressed>>16) &^ hLRND
216
217	putUint16BE(data[3:5], h.compressed)
218	if h.ctype <= cLR {
219		return data, nil
220	}
221
222	data[5] = h.props.Code()
223	return data, nil
224}
225
226// readChunkHeader reads the chunk header from the IO reader.
227func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
228	p := make([]byte, 1, 6)
229	if _, err = io.ReadFull(r, p); err != nil {
230		return
231	}
232	c, err := headerChunkType(p[0])
233	if err != nil {
234		return
235	}
236	p = p[:headerLen(c)]
237	if _, err = io.ReadFull(r, p[1:]); err != nil {
238		return
239	}
240	h = new(chunkHeader)
241	if err = h.UnmarshalBinary(p); err != nil {
242		return nil, err
243	}
244	return h, nil
245}
246
247// uint16BE converts a big-endian uint16 representation to an uint16
248// value.
249func uint16BE(p []byte) uint16 {
250	return uint16(p[0])<<8 | uint16(p[1])
251}
252
253// putUint16BE puts the big-endian uint16 presentation into the given
254// slice.
255func putUint16BE(p []byte, x uint16) {
256	p[0] = byte(x >> 8)
257	p[1] = byte(x)
258}
259
260// chunkState is used to manage the state of the chunks
261type chunkState byte
262
263// start and stop define the initial and terminating state of the chunk
264// state
265const (
266	start chunkState = 'S'
267	stop             = 'T'
268)
269
270// errors for the chunk state handling
271var (
272	errChunkType = errors.New("lzma: unexpected chunk type")
273	errState     = errors.New("lzma: wrong chunk state")
274)
275
276// next transitions state based on chunk type input
277func (c *chunkState) next(ctype chunkType) error {
278	switch *c {
279	// start state
280	case 'S':
281		switch ctype {
282		case cEOS:
283			*c = 'T'
284		case cUD:
285			*c = 'R'
286		case cLRND:
287			*c = 'L'
288		default:
289			return errChunkType
290		}
291	// normal LZMA mode
292	case 'L':
293		switch ctype {
294		case cEOS:
295			*c = 'T'
296		case cUD:
297			*c = 'R'
298		case cU:
299			*c = 'U'
300		case cL, cLR, cLRN, cLRND:
301			break
302		default:
303			return errChunkType
304		}
305	// reset required
306	case 'R':
307		switch ctype {
308		case cEOS:
309			*c = 'T'
310		case cUD, cU:
311			break
312		case cLRN, cLRND:
313			*c = 'L'
314		default:
315			return errChunkType
316		}
317	// uncompressed
318	case 'U':
319		switch ctype {
320		case cEOS:
321			*c = 'T'
322		case cUD:
323			*c = 'R'
324		case cU:
325			break
326		case cL, cLR, cLRN, cLRND:
327			*c = 'L'
328		default:
329			return errChunkType
330		}
331	// terminal state
332	case 'T':
333		return errChunkType
334	default:
335		return errState
336	}
337	return nil
338}
339
340// defaultChunkType returns the default chunk type for each chunk state.
341func (c chunkState) defaultChunkType() chunkType {
342	switch c {
343	case 'S':
344		return cLRND
345	case 'L', 'U':
346		return cL
347	case 'R':
348		return cLRN
349	default:
350		// no error
351		return cEOS
352	}
353}
354
355// maxDictCap defines the maximum dictionary capacity supported by the
356// LZMA2 dictionary capacity encoding.
357const maxDictCap = 1<<32 - 1
358
359// maxDictCapCode defines the maximum dictionary capacity code.
360const maxDictCapCode = 40
361
362// The function decodes the dictionary capacity byte, but doesn't change
363// for the correct range of the given byte.
364func decodeDictCap(c byte) int64 {
365	return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
366}
367
368// DecodeDictCap decodes the encoded dictionary capacity. The function
369// returns an error if the code is out of range.
370func DecodeDictCap(c byte) (n int64, err error) {
371	if c >= maxDictCapCode {
372		if c == maxDictCapCode {
373			return maxDictCap, nil
374		}
375		return 0, errors.New("lzma: invalid dictionary size code")
376	}
377	return decodeDictCap(c), nil
378}
379
380// EncodeDictCap encodes a dictionary capacity. The function returns the
381// code for the capacity that is greater or equal n. If n exceeds the
382// maximum support dictionary capacity, the maximum value is returned.
383func EncodeDictCap(n int64) byte {
384	a, b := byte(0), byte(40)
385	for a < b {
386		c := a + (b-a)>>1
387		m := decodeDictCap(c)
388		if n <= m {
389			if n == m {
390				return c
391			}
392			b = c
393		} else {
394			a = c + 1
395		}
396	}
397	return a
398}
399