1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5package xflate
6
7import (
8	"encoding/binary"
9	"hash/crc32"
10	"io"
11
12	"github.com/dsnet/compress/internal/errors"
13	"github.com/dsnet/compress/xflate/internal/meta"
14)
15
16// A Writer is an io.Writer that can write the XFLATE format.
17// The XFLATE stream outputted by this Writer can be read by both Reader and
18// flate.Reader.
19type Writer struct {
20	// These statistics fields are automatically updated by Writer.
21	// It is safe to set these values to any arbitrary value.
22	InputOffset  int64 // Total number of bytes issued to Write
23	OutputOffset int64 // Total number of bytes written to underlying io.Writer
24
25	wr io.Writer
26	mw meta.Writer  // Meta encoder used to write the index and footer
27	zw *flateWriter // DEFLATE compressor
28
29	idx  index // Index table of seekable offsets
30	nidx int64 // Number of records per index
31	nchk int64 // Raw size of each independent chunk
32	err  error // Persistent error
33
34	// The following fields are embedded here to reduce memory allocations.
35	scratch [64]byte
36}
37
38// WriterConfig configures the Writer.
39// The zero value for any field uses the default value for that field type.
40type WriterConfig struct {
41	// Underlying DEFLATE compression level.
42	//
43	// This compression level will be passed directly to the underlying DEFLATE
44	// compressor. Higher values provide better compression ratio at the expense
45	// of CPU time.
46	Level int
47
48	// Uncompressed size of each independent chunk.
49	//
50	// Each chunk will be compressed independently. This has that advantage that
51	// the chunk can be decompressed without knowledge about the preceding
52	// chunks, but has the disadvantage that it reduces the compression ratio.
53	// Smaller ChunkSizes provide better random access properties, while larger
54	// sizes provide better compression ratio.
55	ChunkSize int64
56
57	// The number of records in each index.
58	//
59	// When this number is reached, the index is automatically flushed. This is
60	// done to ensure that there is some limit on the amount of memory needed to
61	// represent the index. A negative value indicates that the Writer will
62	// not automatically flush the index.
63	//
64	// The multiplication of the IndexSize and the size of each record (24 B)
65	// gives an approximation for how much memory the index will occupy.
66	// The multiplication of the IndexSize and the ChunkSize gives an
67	// approximation for how much uncompressed data each index represents.
68	IndexSize int64
69
70	_ struct{} // Blank field to prevent unkeyed struct literals
71}
72
73// NewWriter creates a new Writer writing to the given writer.
74// It is the caller's responsibility to call Close to complete the stream.
75//
76// If conf is nil, then default configuration values are used. Writer copies
77// all configuration values as necessary and does not store conf.
78func NewWriter(wr io.Writer, conf *WriterConfig) (*Writer, error) {
79	var lvl int
80	var nchk, nidx int64
81	if conf != nil {
82		lvl = conf.Level
83		switch {
84		case conf.ChunkSize < 0:
85			return nil, errorf(errors.Invalid, "invalid chunk size: %d", conf.ChunkSize)
86		case conf.ChunkSize > 0:
87			nchk = conf.ChunkSize
88		}
89		switch {
90		case conf.IndexSize < 0:
91			nidx = -1
92		case conf.IndexSize > 0:
93			nidx = conf.IndexSize
94		}
95	}
96
97	zw, err := newFlateWriter(wr, lvl)
98	if err != nil {
99		return nil, err
100	}
101	xw := &Writer{wr: wr, zw: zw, nchk: nchk, nidx: nidx}
102	xw.Reset(wr)
103	return xw, nil
104}
105
106// Reset discards the Writer's state and makes it equivalent to the result
107// of a call to NewWriter, but writes to wr instead. Any configurations from
108// a prior call to NewWriter will be preserved.
109//
110// This is used to reduce memory allocations.
111func (xw *Writer) Reset(wr io.Writer) error {
112	*xw = Writer{
113		wr:   wr,
114		mw:   xw.mw,
115		zw:   xw.zw,
116		nchk: xw.nchk,
117		nidx: xw.nidx,
118		idx:  xw.idx,
119	}
120	if xw.zw == nil {
121		xw.zw, _ = newFlateWriter(wr, DefaultCompression)
122	} else {
123		xw.zw.Reset(wr)
124	}
125	if xw.nchk == 0 {
126		xw.nchk = DefaultChunkSize
127	}
128	if xw.nidx == 0 {
129		xw.nidx = DefaultIndexSize
130	}
131	xw.idx.Reset()
132	return nil
133}
134
135// Write writes the compressed form of buf to the underlying io.Writer.
136// This automatically breaks the input into multiple chunks, writes them out,
137// and records the sizes of each chunk in the index table.
138func (xw *Writer) Write(buf []byte) (int, error) {
139	if xw.err != nil {
140		return 0, xw.err
141	}
142
143	var n, cnt int
144	for len(buf) > 0 && xw.err == nil {
145		// Flush chunk if necessary.
146		remain := xw.nchk - xw.zw.InputOffset
147		if remain <= 0 {
148			xw.err = xw.Flush(FlushFull)
149			continue
150		}
151		if remain > int64(len(buf)) {
152			remain = int64(len(buf))
153		}
154
155		// Compress data for current chunk.
156		offset := xw.zw.OutputOffset
157		n, xw.err = xw.zw.Write(buf[:remain])
158		xw.OutputOffset += xw.zw.OutputOffset - offset
159		buf = buf[n:]
160		cnt += n
161	}
162
163	xw.InputOffset += int64(cnt)
164	return cnt, xw.err
165}
166
167// Flush flushes the current write buffer to the underlying writer.
168// Flushing is entirely optional and should be used sparingly.
169func (xw *Writer) Flush(mode FlushMode) error {
170	if xw.err != nil {
171		return xw.err
172	}
173
174	switch mode {
175	case FlushSync:
176		offset := xw.zw.OutputOffset
177		xw.err = xw.zw.Flush()
178		xw.OutputOffset += xw.zw.OutputOffset - offset
179		return xw.err
180	case FlushFull:
181		if xw.err = xw.Flush(FlushSync); xw.err != nil {
182			return xw.err
183		}
184		xw.idx.AppendRecord(xw.zw.OutputOffset, xw.zw.InputOffset, deflateType)
185		xw.zw.Reset(xw.wr)
186		if int64(len(xw.idx.Records)) == xw.nidx {
187			xw.err = xw.Flush(FlushIndex)
188		}
189		return xw.err
190	case FlushIndex:
191		if xw.zw.InputOffset+xw.zw.OutputOffset > 0 {
192			if err := xw.Flush(FlushFull); err != nil {
193				return err
194			}
195		}
196		xw.err = xw.encodeIndex(&xw.idx)
197		backSize := xw.idx.IndexSize
198		xw.idx.Reset()
199		xw.idx.BackSize = backSize
200		return xw.err
201	default:
202		return errorf(errors.Invalid, "invalid flush mode: %d", mode)
203	}
204}
205
206// Close ends the XFLATE stream and flushes all buffered data.
207// This method automatically writes an index if any chunks have been written
208// since the last FlushIndex.
209func (xw *Writer) Close() error {
210	if xw.err == errClosed {
211		return nil
212	}
213	if xw.err != nil {
214		return xw.err
215	}
216
217	// Flush final index.
218	if xw.zw.OutputOffset+xw.zw.InputOffset > 0 || len(xw.idx.Records) > 0 {
219		xw.err = xw.Flush(FlushIndex)
220		if xw.err != nil {
221			return xw.err
222		}
223	}
224
225	// Encode the footer.
226	err := xw.encodeFooter(xw.idx.BackSize)
227	if err != nil {
228		xw.err = err
229	} else {
230		xw.err = errClosed
231	}
232	return err
233}
234
235// encodeIndex encodes the index into a meta encoded stream.
236// The index.Records and index.BackSize fields must be populated.
237// The index.IndexSize field will be populated upon successful write.
238func (xw *Writer) encodeIndex(index *index) error {
239	// Helper function to write VLIs.
240	var crc uint32
241	var errVLI error
242	writeVLI := func(x int64) {
243		b := xw.scratch[:binary.PutUvarint(xw.scratch[:], uint64(x))]
244		crc = crc32.Update(crc, crc32.MakeTable(crc32.IEEE), b)
245		if _, err := xw.mw.Write(b); err != nil {
246			errVLI = errWrap(err)
247		}
248	}
249
250	// Write the index.
251	xw.mw.Reset(xw.wr)
252	defer func() { xw.OutputOffset += xw.mw.OutputOffset }()
253	xw.mw.FinalMode = meta.FinalMeta
254	writeVLI(index.BackSize)
255	writeVLI(int64(len(index.Records)))
256	writeVLI(index.LastRecord().CompOffset)
257	writeVLI(index.LastRecord().RawOffset)
258	var preRec record
259	for _, rec := range index.Records {
260		writeVLI(rec.CompOffset - preRec.CompOffset)
261		writeVLI(rec.RawOffset - preRec.RawOffset)
262		preRec = rec
263	}
264	if errVLI != nil {
265		return errWrap(errVLI)
266	}
267
268	binary.LittleEndian.PutUint32(xw.scratch[:], crc)
269	if _, err := xw.mw.Write(xw.scratch[:4]); err != nil {
270		return errWrap(err)
271	}
272	if err := xw.mw.Close(); err != nil {
273		return errWrap(err)
274	}
275	index.IndexSize = xw.mw.OutputOffset // Record the encoded size
276	return nil
277}
278
279// encodeFooter writes the final footer, encoding the provided backSize into it.
280func (xw *Writer) encodeFooter(backSize int64) error {
281	var n int
282	n += copy(xw.scratch[n:], magic[:])
283	n += binary.PutUvarint(xw.scratch[n:], uint64(backSize))
284
285	xw.mw.Reset(xw.wr)
286	defer func() { xw.OutputOffset += xw.mw.OutputOffset }()
287	xw.mw.FinalMode = meta.FinalStream
288	if _, err := xw.mw.Write(xw.scratch[:n]); err != nil {
289		return errWrap(err)
290	}
291	if err := xw.mw.Close(); err != nil {
292		return errWrap(err)
293	}
294	if xw.mw.NumBlocks != 1 {
295		return errorf(errors.Internal, "footer was not a single block")
296	}
297	return nil
298}
299