1// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package lzma
6
7import (
8	"fmt"
9	"io"
10)
11
12// opLenMargin provides the upper limit of the number of bytes required
13// to encode a single operation.
14const opLenMargin = 10
15
16// compressFlags control the compression process.
17type compressFlags uint32
18
19// Values for compressFlags.
20const (
21	// all data should be compressed, even if compression is not
22	// optimal.
23	all compressFlags = 1 << iota
24)
25
26// encoderFlags provide the flags for an encoder.
27type encoderFlags uint32
28
29// Flags for the encoder.
30const (
31	// eosMarker requests an EOS marker to be written.
32	eosMarker encoderFlags = 1 << iota
33)
34
35// Encoder compresses data buffered in the encoder dictionary and writes
36// it into a byte writer.
37type encoder struct {
38	dict  *encoderDict
39	state *state
40	re    *rangeEncoder
41	start int64
42	// generate eos marker
43	marker bool
44	limit  bool
45	margin int
46}
47
48// newEncoder creates a new encoder. If the byte writer must be
49// limited use LimitedByteWriter provided by this package. The flags
50// argument supports the eosMarker flag, controlling whether a
51// terminating end-of-stream marker must be written.
52func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict,
53	flags encoderFlags) (e *encoder, err error) {
54
55	re, err := newRangeEncoder(bw)
56	if err != nil {
57		return nil, err
58	}
59	e = &encoder{
60		dict:   dict,
61		state:  state,
62		re:     re,
63		marker: flags&eosMarker != 0,
64		start:  dict.Pos(),
65		margin: opLenMargin,
66	}
67	if e.marker {
68		e.margin += 5
69	}
70	return e, nil
71}
72
73// Write writes the bytes from p into the dictionary. If not enough
74// space is available the data in the dictionary buffer will be
75// compressed to make additional space available. If the limit of the
76// underlying writer has been reached ErrLimit will be returned.
77func (e *encoder) Write(p []byte) (n int, err error) {
78	for {
79		k, err := e.dict.Write(p[n:])
80		n += k
81		if err == ErrNoSpace {
82			if err = e.compress(0); err != nil {
83				return n, err
84			}
85			continue
86		}
87		return n, err
88	}
89}
90
91// Reopen reopens the encoder with a new byte writer.
92func (e *encoder) Reopen(bw io.ByteWriter) error {
93	var err error
94	if e.re, err = newRangeEncoder(bw); err != nil {
95		return err
96	}
97	e.start = e.dict.Pos()
98	e.limit = false
99	return nil
100}
101
102// writeLiteral writes a literal into the LZMA stream
103func (e *encoder) writeLiteral(l lit) error {
104	var err error
105	state, state2, _ := e.state.states(e.dict.Pos())
106	if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil {
107		return err
108	}
109	litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos())
110	match := e.dict.ByteAt(int(e.state.rep[0]) + 1)
111	err = e.state.litCodec.Encode(e.re, l.b, state, match, litState)
112	if err != nil {
113		return err
114	}
115	e.state.updateStateLiteral()
116	return nil
117}
118
119// iverson implements the Iverson operator as proposed by Donald Knuth in his
120// book Concrete Mathematics.
121func iverson(ok bool) uint32 {
122	if ok {
123		return 1
124	}
125	return 0
126}
127
128// writeMatch writes a repetition operation into the operation stream
129func (e *encoder) writeMatch(m match) error {
130	var err error
131	if !(minDistance <= m.distance && m.distance <= maxDistance) {
132		panic(fmt.Errorf("match distance %d out of range", m.distance))
133	}
134	dist := uint32(m.distance - minDistance)
135	if !(minMatchLen <= m.n && m.n <= maxMatchLen) &&
136		!(dist == e.state.rep[0] && m.n == 1) {
137		panic(fmt.Errorf(
138			"match length %d out of range; dist %d rep[0] %d",
139			m.n, dist, e.state.rep[0]))
140	}
141	state, state2, posState := e.state.states(e.dict.Pos())
142	if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil {
143		return err
144	}
145	g := 0
146	for ; g < 4; g++ {
147		if e.state.rep[g] == dist {
148			break
149		}
150	}
151	b := iverson(g < 4)
152	if err = e.state.isRep[state].Encode(e.re, b); err != nil {
153		return err
154	}
155	n := uint32(m.n - minMatchLen)
156	if b == 0 {
157		// simple match
158		e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] =
159			e.state.rep[2], e.state.rep[1], e.state.rep[0], dist
160		e.state.updateStateMatch()
161		if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil {
162			return err
163		}
164		return e.state.distCodec.Encode(e.re, dist, n)
165	}
166	b = iverson(g != 0)
167	if err = e.state.isRepG0[state].Encode(e.re, b); err != nil {
168		return err
169	}
170	if b == 0 {
171		// g == 0
172		b = iverson(m.n != 1)
173		if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil {
174			return err
175		}
176		if b == 0 {
177			e.state.updateStateShortRep()
178			return nil
179		}
180	} else {
181		// g in {1,2,3}
182		b = iverson(g != 1)
183		if err = e.state.isRepG1[state].Encode(e.re, b); err != nil {
184			return err
185		}
186		if b == 1 {
187			// g in {2,3}
188			b = iverson(g != 2)
189			err = e.state.isRepG2[state].Encode(e.re, b)
190			if err != nil {
191				return err
192			}
193			if b == 1 {
194				e.state.rep[3] = e.state.rep[2]
195			}
196			e.state.rep[2] = e.state.rep[1]
197		}
198		e.state.rep[1] = e.state.rep[0]
199		e.state.rep[0] = dist
200	}
201	e.state.updateStateRep()
202	return e.state.repLenCodec.Encode(e.re, n, posState)
203}
204
205// writeOp writes a single operation to the range encoder. The function
206// checks whether there is enough space available to close the LZMA
207// stream.
208func (e *encoder) writeOp(op operation) error {
209	if e.re.Available() < int64(e.margin) {
210		return ErrLimit
211	}
212	switch x := op.(type) {
213	case lit:
214		return e.writeLiteral(x)
215	case match:
216		return e.writeMatch(x)
217	default:
218		panic("unexpected operation")
219	}
220}
221
222// compress compressed data from the dictionary buffer. If the flag all
223// is set, all data in the dictionary buffer will be compressed. The
224// function returns ErrLimit if the underlying writer has reached its
225// limit.
226func (e *encoder) compress(flags compressFlags) error {
227	n := 0
228	if flags&all == 0 {
229		n = maxMatchLen - 1
230	}
231	d := e.dict
232	m := d.m
233	for d.Buffered() > n {
234		op := m.NextOp(e.state.rep)
235		if err := e.writeOp(op); err != nil {
236			return err
237		}
238		d.Discard(op.Len())
239	}
240	return nil
241}
242
243// eosMatch is a pseudo operation that indicates the end of the stream.
244var eosMatch = match{distance: maxDistance, n: minMatchLen}
245
246// Close terminates the LZMA stream. If requested the end-of-stream
247// marker will be written. If the byte writer limit has been or will be
248// reached during compression of the remaining data in the buffer the
249// LZMA stream will be closed and data will remain in the buffer.
250func (e *encoder) Close() error {
251	err := e.compress(all)
252	if err != nil && err != ErrLimit {
253		return err
254	}
255	if e.marker {
256		if err := e.writeMatch(eosMatch); err != nil {
257			return err
258		}
259	}
260	err = e.re.Close()
261	return err
262}
263
264// Compressed returns the number bytes of the input data that been
265// compressed.
266func (e *encoder) Compressed() int64 {
267	return e.dict.Pos() - e.start
268}
269