1// Copyright 2019 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// ----------------
16
17// Package cgolz4 wraps the C "lz4" library.
18//
19// It speaks the LZ4 frame format, not the LZ4 block format.
20package cgolz4
21
22// TODO: dictionaries. See https://github.com/lz4/lz4/issues/791
23
24/*
25#cgo pkg-config: liblz4
26#include "lz4.h"
27#include "lz4frame.h"
28
29#include <stdint.h>
30
31#if (LZ4_VERSION_MAJOR < 1) || (LZ4_VERSION_MINOR < 8)
32void LZ4F_resetDecompressionContext(LZ4F_decompressionContext_t d) {}
33uint32_t cgolz4_have_lz4f_reset_decompression_context() { return 0; }
34#else
35uint32_t cgolz4_have_lz4f_reset_decompression_context() { return 1; }
36#endif
37
38typedef struct {
39	uint32_t ndst;
40	uint32_t nsrc;
41	uint32_t eof;
42} advances;
43
44LZ4F_compressionContext_t cgolz4_compress_new() {
45	LZ4F_compressionContext_t ret = NULL;
46	if (LZ4F_isError(LZ4F_createCompressionContext(&ret, LZ4F_VERSION))) {
47		return NULL;
48	}
49	return ret;
50}
51
52LZ4F_decompressionContext_t cgolz4_decompress_new() {
53	LZ4F_decompressionContext_t ret = NULL;
54	if (LZ4F_isError(LZ4F_createDecompressionContext(&ret, LZ4F_VERSION))) {
55		return NULL;
56	}
57	return ret;
58}
59
60uint64_t cgolz4_compress_min_dst_len(uint32_t srcSize) {
61	return LZ4F_compressBound(srcSize, NULL);
62}
63
64uint64_t cgolz4_compress_begin(LZ4F_compressionContext_t z,
65		advances* a,
66		uint8_t* dst_ptr,
67		uint32_t dst_len) {
68	size_t result = LZ4F_compressBegin(z, dst_ptr, dst_len, NULL);
69	if (LZ4F_isError(result)) {
70		a->ndst = 0;
71		a->nsrc = 0;
72		a->eof = 0;
73		return result;
74	}
75	a->ndst = result;
76	a->nsrc = 0;
77	a->eof = 0;
78	return 0;
79}
80
81uint64_t cgolz4_compress_update(LZ4F_compressionContext_t z,
82		advances* a,
83		uint8_t* dst_ptr,
84		uint32_t dst_len,
85		uint8_t* src_ptr,
86		uint32_t src_len) {
87	size_t result = LZ4F_compressUpdate(z, dst_ptr, dst_len, src_ptr, src_len, NULL);
88	if (LZ4F_isError(result)) {
89		a->ndst = 0;
90		a->nsrc = 0;
91		a->eof = 0;
92		return result;
93	}
94	a->ndst = result;
95	a->nsrc = src_len;
96	a->eof = 0;
97	return 0;
98}
99
100uint64_t cgolz4_compress_end(LZ4F_compressionContext_t z,
101		advances* a,
102		uint8_t* dst_ptr,
103		uint32_t dst_len) {
104	size_t result = LZ4F_compressEnd(z, dst_ptr, dst_len, NULL);
105	if (LZ4F_isError(result)) {
106		a->ndst = 0;
107		a->nsrc = 0;
108		a->eof = 0;
109		return result;
110	}
111	a->ndst = result;
112	a->nsrc = 0;
113	a->eof = 1;
114	return 0;
115}
116
117uint64_t cgolz4_decompress_update(LZ4F_decompressionContext_t z,
118		advances* a,
119		uint8_t* dst_ptr,
120		uint32_t dst_len,
121		uint8_t* src_ptr,
122		uint32_t src_len) {
123	size_t d = dst_len;
124	size_t s = src_len;
125	size_t result = LZ4F_decompress(z, dst_ptr, &d, src_ptr, &s, NULL);
126	a->ndst = d;
127	a->nsrc = s;
128	a->eof = (result == 0) ? 1 : 0;
129	return LZ4F_isError(result) ? result : 0;
130}
131*/
132import "C"
133
134import (
135	"errors"
136	"io"
137	"unsafe"
138
139	"github.com/google/wuffs/lib/compression"
140)
141
142const cgoEnabled = true
143
144// maxLen avoids overflow concerns when converting C and Go integer types.
145const maxLen = 1 << 30
146
147// blockMaxLen is the size that Writer.Write's []byte argument is sliced into.
148// LZ4 is fundamentally a block-based API, and compressing source data of size
149// N requires O(N) memory. For Writer to have a fixed size buffer, we impose a
150// cap on the source data size.
151const blockMaxLen = 65536
152
153var (
154	errMissingResetCall           = errors.New("cgolz4: missing Reset call")
155	errNilIOReader                = errors.New("cgolz4: nil io.Reader")
156	errNilIOWriter                = errors.New("cgolz4: nil io.Writer")
157	errNilReceiver                = errors.New("cgolz4: nil receiver")
158	errOutOfMemoryVersionMismatch = errors.New("cgolz4: out of memory / version mismatch")
159	errWriteBufferIsTooSmall      = errors.New("cgolz4: write buffer is too small")
160)
161
162type errCode uint64
163
164func (e errCode) Error() string {
165	if s := C.GoString(C.LZ4F_getErrorName(C.LZ4F_errorCode_t(e))); s != "" {
166		return "cgolz4: " + s
167	}
168	return "cgolz4: unknown error"
169}
170
171// ReaderRecycler can lessen the new memory allocated when calling Reader.Reset
172// on a bound Reader.
173//
174// It is not safe to use a ReaderRecycler and a Reader concurrently.
175type ReaderRecycler struct {
176	z      C.LZ4F_decompressionContext_t
177	closed bool
178}
179
180// Bind lets r re-use the memory that is manually managed by c. Call c.Close to
181// free that memory.
182func (c *ReaderRecycler) Bind(r *Reader) {
183	r.recycler = c
184}
185
186// Close implements io.Closer.
187func (c *ReaderRecycler) Close() error {
188	c.closed = true
189	if c.z != nil {
190		C.LZ4F_freeDecompressionContext(c.z)
191		c.z = nil
192	}
193	return nil
194}
195
196// Reader decompresses from the lz4 format.
197//
198// The zero value is not usable until Reset is called.
199type Reader struct {
200	buf  [blockMaxLen]byte
201	i, j uint32
202	r    io.Reader
203
204	readErr error
205	lz4Err  error
206
207	recycler *ReaderRecycler
208
209	z C.LZ4F_decompressionContext_t
210	a C.advances
211}
212
213// Reset implements compression.Reader.
214func (r *Reader) Reset(reader io.Reader, dictionary []byte) error {
215	if r == nil {
216		return errNilReceiver
217	}
218	if err := r.Close(); err != nil {
219		return err
220	}
221	if reader == nil {
222		return errNilIOReader
223	}
224	r.r = reader
225	return nil
226}
227
228// Close implements compression.Reader.
229func (r *Reader) Close() error {
230	if r == nil {
231		return errNilReceiver
232	}
233	if r.r == nil {
234		return nil
235	}
236	r.i = 0
237	r.j = 0
238	r.r = nil
239	r.readErr = nil
240	r.lz4Err = nil
241	if r.z != nil {
242		if (r.recycler != nil) && !r.recycler.closed && (r.recycler.z == nil) &&
243			(C.cgolz4_have_lz4f_reset_decompression_context() != 0) {
244			r.recycler.z, r.z = r.z, nil
245		} else {
246			C.LZ4F_freeDecompressionContext(r.z)
247			r.z = nil
248		}
249	}
250	return nil
251}
252
253// Read implements compression.Reader.
254func (r *Reader) Read(p []byte) (int, error) {
255	if r == nil {
256		return 0, errNilReceiver
257	}
258	if r.r == nil {
259		return 0, errMissingResetCall
260	}
261
262	if r.z == nil {
263		if (r.recycler != nil) && !r.recycler.closed && (r.recycler.z != nil) &&
264			(C.cgolz4_have_lz4f_reset_decompression_context() != 0) {
265			r.z, r.recycler.z = r.recycler.z, nil
266			C.LZ4F_resetDecompressionContext(r.z)
267		} else {
268			r.z = C.cgolz4_decompress_new()
269			if r.z == nil {
270				return 0, errOutOfMemoryVersionMismatch
271			}
272		}
273	}
274
275	if len(p) > maxLen {
276		p = p[:maxLen]
277	}
278
279	for numRead := 0; ; {
280		if r.lz4Err != nil {
281			return numRead, r.lz4Err
282		}
283		if len(p) == 0 {
284			return numRead, nil
285		}
286
287		if r.i >= r.j {
288			if r.readErr != nil {
289				return numRead, r.readErr
290			}
291
292			n, err := r.r.Read(r.buf[:])
293			if err == io.EOF {
294				err = io.ErrUnexpectedEOF
295			}
296			r.i, r.j, r.readErr = 0, uint32(n), err
297			continue
298		}
299
300		e := errCode(C.cgolz4_decompress_update(r.z, &r.a,
301			(*C.uint8_t)(unsafe.Pointer(&p[0])),
302			(C.uint32_t)(len(p)),
303			(*C.uint8_t)(unsafe.Pointer(&r.buf[r.i])),
304			(C.uint32_t)(r.j-r.i),
305		))
306
307		numRead += int(r.a.ndst)
308		p = p[int(r.a.ndst):]
309
310		r.i += uint32(r.a.nsrc)
311
312		if e == 0 {
313			if r.a.eof == 0 {
314				continue
315			}
316			r.lz4Err = io.EOF
317		} else {
318			r.lz4Err = e
319		}
320		return numRead, r.lz4Err
321	}
322}
323
324// WriterRecycler can lessen the new memory allocated when calling Writer.Reset
325// on a bound Writer.
326//
327// It is not safe to use a WriterRecycler and a Writer concurrently.
328type WriterRecycler struct {
329	z      C.LZ4F_compressionContext_t
330	closed bool
331}
332
333// Bind lets w re-use the memory that is manually managed by c. Call c.Close to
334// free that memory.
335func (c *WriterRecycler) Bind(w *Writer) {
336	w.recycler = c
337}
338
339// Close implements io.Closer.
340func (c *WriterRecycler) Close() error {
341	c.closed = true
342	if c.z != nil {
343		C.LZ4F_freeCompressionContext(c.z)
344		c.z = nil
345	}
346	return nil
347}
348
349func minDstLenForBlockMaxLen() uint64 {
350	return uint64(C.cgolz4_compress_min_dst_len(C.uint32_t(blockMaxLen)))
351}
352
353const writerBufLen = 2*blockMaxLen + 16
354
355// Writer compresses to the lz4 format.
356//
357// Compressed bytes may be buffered and not sent to the underlying io.Writer
358// until Close is called.
359//
360// The zero value is not usable until Reset is called.
361type Writer struct {
362	buf   [writerBufLen]byte
363	j     uint32
364	w     io.Writer
365	begun bool
366
367	writeErr error
368
369	recycler *WriterRecycler
370
371	z C.LZ4F_compressionContext_t
372	a C.advances
373}
374
375// Reset implements compression.Writer.
376func (w *Writer) Reset(writer io.Writer, dictionary []byte, level compression.Level) error {
377	if w == nil {
378		return errNilReceiver
379	}
380	w.close()
381	if writer == nil {
382		return errNilIOWriter
383	}
384	w.w = writer
385	w.begun = false
386	return nil
387}
388
389// Close implements compression.Writer.
390func (w *Writer) Close() error {
391	if w == nil {
392		return errNilReceiver
393	}
394	err := w.flush(true)
395	w.close()
396	return err
397}
398
399func (w *Writer) flush(final bool) error {
400	if w.w == nil {
401		return nil
402	}
403
404	if final {
405		if err := w.write(nil, true); err != nil {
406			return err
407		}
408	}
409
410	if w.j == 0 {
411		return nil
412	}
413	_, err := w.w.Write(w.buf[:w.j])
414	w.j = 0
415	return err
416}
417
418func (w *Writer) close() {
419	if w.w == nil {
420		return
421	}
422	w.j = 0
423	w.w = nil
424	w.writeErr = nil
425	if w.z != nil {
426		if (w.recycler != nil) && !w.recycler.closed && (w.recycler.z == nil) {
427			w.recycler.z, w.z = w.z, nil
428		} else {
429			C.LZ4F_freeCompressionContext(w.z)
430			w.z = nil
431		}
432	}
433}
434
435// Write implements compression.Writer.
436func (w *Writer) Write(p []byte) (int, error) {
437	if w == nil {
438		return 0, errNilReceiver
439	}
440	if w.w == nil {
441		return 0, errMissingResetCall
442	}
443	if w.writeErr != nil {
444		return 0, w.writeErr
445	}
446
447	originalLenP := len(p)
448	for {
449		remaining := []byte(nil)
450		if blockMaxLen > maxLen {
451			panic("unreachable")
452		} else if len(p) > blockMaxLen {
453			p, remaining = p[:blockMaxLen], p[blockMaxLen:]
454		}
455
456		if err := w.write(p, false); err != nil {
457			return 0, err
458		}
459
460		p, remaining = remaining, nil
461		if len(p) == 0 {
462			return originalLenP, nil
463		}
464	}
465}
466
467func (w *Writer) write(p []byte, final bool) error {
468	if n := len(p); (n > blockMaxLen) || (n > maxLen) {
469		panic("unreachable")
470	}
471
472	if w.z == nil {
473		if (w.recycler != nil) && !w.recycler.closed && (w.recycler.z != nil) {
474			w.z, w.recycler.z = w.recycler.z, nil
475		} else {
476			w.z = C.cgolz4_compress_new()
477			if w.z == nil {
478				return errOutOfMemoryVersionMismatch
479			}
480		}
481	}
482
483	if !w.begun {
484		w.begun = true
485		e := errCode(C.cgolz4_compress_begin(w.z, &w.a,
486			(*C.uint8_t)(unsafe.Pointer(&w.buf[w.j])),
487			(C.uint32_t)(uint32(len(w.buf))-w.j),
488		))
489
490		w.j += uint32(w.a.ndst)
491		p = p[uint32(w.a.nsrc):]
492
493		if e != 0 {
494			w.writeErr = e
495			return w.writeErr
496		}
497	}
498
499	for (len(p) > 0) || final {
500		minDstLen := uint64(C.cgolz4_compress_min_dst_len(C.uint32_t(len(p))))
501		for (uint64(len(w.buf)) - uint64(w.j)) < minDstLen {
502			if w.j == 0 {
503				w.writeErr = errWriteBufferIsTooSmall
504				return w.writeErr
505			}
506			if err := w.flush(false); err != nil {
507				w.writeErr = err
508				return w.writeErr
509			}
510		}
511
512		e := errCode(0)
513		if final {
514			e = errCode(C.cgolz4_compress_end(w.z, &w.a,
515				(*C.uint8_t)(unsafe.Pointer(&w.buf[w.j])),
516				(C.uint32_t)(uint32(len(w.buf))-w.j),
517			))
518			final = w.a.eof == 0
519		} else {
520			e = errCode(C.cgolz4_compress_update(w.z, &w.a,
521				(*C.uint8_t)(unsafe.Pointer(&w.buf[w.j])),
522				(C.uint32_t)(uint32(len(w.buf))-w.j),
523				(*C.uint8_t)(unsafe.Pointer(&p[0])),
524				(C.uint32_t)(len(p)),
525			))
526		}
527
528		w.j += uint32(w.a.ndst)
529		p = p[uint32(w.a.nsrc):]
530
531		if e != 0 {
532			w.writeErr = e
533			return w.writeErr
534		}
535	}
536	return nil
537}
538