1e95abc47Szrj /*-
2e95abc47Szrj  * Copyright (c) 2009-2011 Sean Purcell
3e95abc47Szrj  * All rights reserved.
4e95abc47Szrj  *
5e95abc47Szrj  * Redistribution and use in source and binary forms, with or without
6e95abc47Szrj  * modification, are permitted provided that the following conditions
7e95abc47Szrj  * are met:
8e95abc47Szrj  * 1. Redistributions of source code must retain the above copyright
9e95abc47Szrj  *    notice, this list of conditions and the following disclaimer.
10e95abc47Szrj  * 2. Redistributions in binary form must reproduce the above copyright
11e95abc47Szrj  *    notice, this list of conditions and the following disclaimer in the
12e95abc47Szrj  *    documentation and/or other materials provided with the distribution.
13e95abc47Szrj  *
14e95abc47Szrj  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15e95abc47Szrj  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16e95abc47Szrj  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17e95abc47Szrj  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18e95abc47Szrj  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19e95abc47Szrj  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20e95abc47Szrj  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21e95abc47Szrj  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22e95abc47Szrj  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23e95abc47Szrj  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24e95abc47Szrj  */
25e95abc47Szrj 
26e95abc47Szrj #include "archive_platform.h"
27e95abc47Szrj 
28e95abc47Szrj __FBSDID("$FreeBSD$");
29e95abc47Szrj 
30e95abc47Szrj #ifdef HAVE_ERRNO_H
31e95abc47Szrj #include <errno.h>
32e95abc47Szrj #endif
33e95abc47Szrj 
34e95abc47Szrj #ifdef HAVE_ERRNO_H
35e95abc47Szrj #include <errno.h>
36e95abc47Szrj #endif
37e95abc47Szrj #include <stdio.h>
38e95abc47Szrj #ifdef HAVE_STDLIB_H
39e95abc47Szrj #include <stdlib.h>
40e95abc47Szrj #endif
41e95abc47Szrj #ifdef HAVE_STRING_H
42e95abc47Szrj #include <string.h>
43e95abc47Szrj #endif
44e95abc47Szrj #ifdef HAVE_UNISTD_H
45e95abc47Szrj #include <unistd.h>
46e95abc47Szrj #endif
47e95abc47Szrj #if HAVE_ZSTD_H
48e95abc47Szrj #include <zstd.h>
49e95abc47Szrj #endif
50e95abc47Szrj 
51e95abc47Szrj #include "archive.h"
52e95abc47Szrj #include "archive_endian.h"
53e95abc47Szrj #include "archive_private.h"
54e95abc47Szrj #include "archive_read_private.h"
55e95abc47Szrj 
56e95abc47Szrj #if HAVE_ZSTD_H && HAVE_LIBZSTD
57e95abc47Szrj 
58e95abc47Szrj struct private_data {
59e95abc47Szrj 	ZSTD_DStream	*dstream;
60e95abc47Szrj 	unsigned char	*out_block;
61e95abc47Szrj 	size_t		 out_block_size;
62e95abc47Szrj 	int64_t		 total_out;
63e95abc47Szrj 	char		 in_frame; /* True = in the middle of a zstd frame. */
64e95abc47Szrj 	char		 eof; /* True = found end of compressed data. */
65e95abc47Szrj };
66e95abc47Szrj 
67e95abc47Szrj /* Zstd Filter. */
68e95abc47Szrj static ssize_t	zstd_filter_read(struct archive_read_filter *, const void**);
69e95abc47Szrj static int	zstd_filter_close(struct archive_read_filter *);
70e95abc47Szrj #endif
71e95abc47Szrj 
72e95abc47Szrj /*
73e95abc47Szrj  * Note that we can detect zstd compressed files even if we can't decompress
74e95abc47Szrj  * them.  (In fact, we like detecting them because we can give better error
75e95abc47Szrj  * messages.)  So the bid framework here gets compiled even if no zstd library
76e95abc47Szrj  * is available.
77e95abc47Szrj  */
78e95abc47Szrj static int	zstd_bidder_bid(struct archive_read_filter_bidder *,
79e95abc47Szrj 		    struct archive_read_filter *);
80e95abc47Szrj static int	zstd_bidder_init(struct archive_read_filter *);
81e95abc47Szrj 
82*50f8aa9cSAntonio Huete Jimenez static const struct archive_read_filter_bidder_vtable
83*50f8aa9cSAntonio Huete Jimenez zstd_bidder_vtable = {
84*50f8aa9cSAntonio Huete Jimenez 	.bid = zstd_bidder_bid,
85*50f8aa9cSAntonio Huete Jimenez 	.init = zstd_bidder_init,
86*50f8aa9cSAntonio Huete Jimenez };
87*50f8aa9cSAntonio Huete Jimenez 
88e95abc47Szrj int
archive_read_support_filter_zstd(struct archive * _a)89e95abc47Szrj archive_read_support_filter_zstd(struct archive *_a)
90e95abc47Szrj {
91e95abc47Szrj 	struct archive_read *a = (struct archive_read *)_a;
92e95abc47Szrj 
93*50f8aa9cSAntonio Huete Jimenez 	if (__archive_read_register_bidder(a, NULL, "zstd",
94*50f8aa9cSAntonio Huete Jimenez 				&zstd_bidder_vtable) != ARCHIVE_OK)
95e95abc47Szrj 		return (ARCHIVE_FATAL);
96e95abc47Szrj 
97e95abc47Szrj #if HAVE_ZSTD_H && HAVE_LIBZSTD
98e95abc47Szrj 	return (ARCHIVE_OK);
99e95abc47Szrj #else
100e95abc47Szrj 	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
101e95abc47Szrj 	    "Using external zstd program for zstd decompression");
102e95abc47Szrj 	return (ARCHIVE_WARN);
103e95abc47Szrj #endif
104e95abc47Szrj }
105e95abc47Szrj 
106e95abc47Szrj /*
107e95abc47Szrj  * Test whether we can handle this data.
108e95abc47Szrj  */
109e95abc47Szrj static int
zstd_bidder_bid(struct archive_read_filter_bidder * self,struct archive_read_filter * filter)110e95abc47Szrj zstd_bidder_bid(struct archive_read_filter_bidder *self,
111e95abc47Szrj     struct archive_read_filter *filter)
112e95abc47Szrj {
113e95abc47Szrj 	const unsigned char *buffer;
114e95abc47Szrj 	ssize_t avail;
115e95abc47Szrj 	unsigned prefix;
116e95abc47Szrj 
117e95abc47Szrj 	/* Zstd frame magic values */
118e95abc47Szrj 	const unsigned zstd_magic = 0xFD2FB528U;
119085658deSDaniel Fojt 	const unsigned zstd_magic_skippable_start = 0x184D2A50U;
120085658deSDaniel Fojt 	const unsigned zstd_magic_skippable_mask = 0xFFFFFFF0;
121e95abc47Szrj 
122e95abc47Szrj 	(void) self; /* UNUSED */
123e95abc47Szrj 
124e95abc47Szrj 	buffer = __archive_read_filter_ahead(filter, 4, &avail);
125e95abc47Szrj 	if (buffer == NULL)
126e95abc47Szrj 		return (0);
127e95abc47Szrj 
128e95abc47Szrj 	prefix = archive_le32dec(buffer);
129e95abc47Szrj 	if (prefix == zstd_magic)
130e95abc47Szrj 		return (32);
131085658deSDaniel Fojt 	if ((prefix & zstd_magic_skippable_mask) == zstd_magic_skippable_start)
132085658deSDaniel Fojt 		return (32);
133e95abc47Szrj 
134e95abc47Szrj 	return (0);
135e95abc47Szrj }
136e95abc47Szrj 
137e95abc47Szrj #if !(HAVE_ZSTD_H && HAVE_LIBZSTD)
138e95abc47Szrj 
139e95abc47Szrj /*
140e95abc47Szrj  * If we don't have the library on this system, we can't do the
141e95abc47Szrj  * decompression directly.  We can, however, try to run "zstd -d"
142e95abc47Szrj  * in case that's available.
143e95abc47Szrj  */
144e95abc47Szrj static int
zstd_bidder_init(struct archive_read_filter * self)145e95abc47Szrj zstd_bidder_init(struct archive_read_filter *self)
146e95abc47Szrj {
147e95abc47Szrj 	int r;
148e95abc47Szrj 
149e95abc47Szrj 	r = __archive_read_program(self, "zstd -d -qq");
150e95abc47Szrj 	/* Note: We set the format here even if __archive_read_program()
151e95abc47Szrj 	 * above fails.  We do, after all, know what the format is
152e95abc47Szrj 	 * even if we weren't able to read it. */
153e95abc47Szrj 	self->code = ARCHIVE_FILTER_ZSTD;
154e95abc47Szrj 	self->name = "zstd";
155e95abc47Szrj 	return (r);
156e95abc47Szrj }
157e95abc47Szrj 
158e95abc47Szrj #else
159e95abc47Szrj 
160*50f8aa9cSAntonio Huete Jimenez static const struct archive_read_filter_vtable
161*50f8aa9cSAntonio Huete Jimenez zstd_reader_vtable = {
162*50f8aa9cSAntonio Huete Jimenez 	.read = zstd_filter_read,
163*50f8aa9cSAntonio Huete Jimenez 	.close = zstd_filter_close,
164*50f8aa9cSAntonio Huete Jimenez };
165*50f8aa9cSAntonio Huete Jimenez 
166e95abc47Szrj /*
167e95abc47Szrj  * Initialize the filter object
168e95abc47Szrj  */
169e95abc47Szrj static int
zstd_bidder_init(struct archive_read_filter * self)170e95abc47Szrj zstd_bidder_init(struct archive_read_filter *self)
171e95abc47Szrj {
172e95abc47Szrj 	struct private_data *state;
173e95abc47Szrj 	const size_t out_block_size = ZSTD_DStreamOutSize();
174e95abc47Szrj 	void *out_block;
175e95abc47Szrj 	ZSTD_DStream *dstream;
176e95abc47Szrj 
177e95abc47Szrj 	self->code = ARCHIVE_FILTER_ZSTD;
178e95abc47Szrj 	self->name = "zstd";
179e95abc47Szrj 
180e95abc47Szrj 	state = (struct private_data *)calloc(sizeof(*state), 1);
181e95abc47Szrj 	out_block = (unsigned char *)malloc(out_block_size);
182e95abc47Szrj 	dstream = ZSTD_createDStream();
183e95abc47Szrj 
184e95abc47Szrj 	if (state == NULL || out_block == NULL || dstream == NULL) {
185e95abc47Szrj 		free(out_block);
186e95abc47Szrj 		free(state);
187e95abc47Szrj 		ZSTD_freeDStream(dstream); /* supports free on NULL */
188e95abc47Szrj 		archive_set_error(&self->archive->archive, ENOMEM,
189e95abc47Szrj 		    "Can't allocate data for zstd decompression");
190e95abc47Szrj 		return (ARCHIVE_FATAL);
191e95abc47Szrj 	}
192e95abc47Szrj 
193e95abc47Szrj 	self->data = state;
194e95abc47Szrj 
195e95abc47Szrj 	state->out_block_size = out_block_size;
196e95abc47Szrj 	state->out_block = out_block;
197e95abc47Szrj 	state->dstream = dstream;
198*50f8aa9cSAntonio Huete Jimenez 	self->vtable = &zstd_reader_vtable;
199e95abc47Szrj 
200e95abc47Szrj 	state->eof = 0;
201e95abc47Szrj 	state->in_frame = 0;
202e95abc47Szrj 
203e95abc47Szrj 	return (ARCHIVE_OK);
204e95abc47Szrj }
205e95abc47Szrj 
206e95abc47Szrj static ssize_t
zstd_filter_read(struct archive_read_filter * self,const void ** p)207e95abc47Szrj zstd_filter_read(struct archive_read_filter *self, const void **p)
208e95abc47Szrj {
209e95abc47Szrj 	struct private_data *state;
210e95abc47Szrj 	size_t decompressed;
211e95abc47Szrj 	ssize_t avail_in;
212e95abc47Szrj 	ZSTD_outBuffer out;
213e95abc47Szrj 	ZSTD_inBuffer in;
214e95abc47Szrj 
215e95abc47Szrj 	state = (struct private_data *)self->data;
216e95abc47Szrj 
217e95abc47Szrj 	out = (ZSTD_outBuffer) { state->out_block, state->out_block_size, 0 };
218e95abc47Szrj 
219e95abc47Szrj 	/* Try to fill the output buffer. */
220e95abc47Szrj 	while (out.pos < out.size && !state->eof) {
221e95abc47Szrj 		if (!state->in_frame) {
222e95abc47Szrj 			const size_t ret = ZSTD_initDStream(state->dstream);
223e95abc47Szrj 			if (ZSTD_isError(ret)) {
224e95abc47Szrj 				archive_set_error(&self->archive->archive,
225e95abc47Szrj 				    ARCHIVE_ERRNO_MISC,
226e95abc47Szrj 				    "Error initializing zstd decompressor: %s",
227e95abc47Szrj 				    ZSTD_getErrorName(ret));
228e95abc47Szrj 				return (ARCHIVE_FATAL);
229e95abc47Szrj 			}
230e95abc47Szrj 		}
231e95abc47Szrj 		in.src = __archive_read_filter_ahead(self->upstream, 1,
232e95abc47Szrj 		    &avail_in);
233e95abc47Szrj 		if (avail_in < 0) {
234e95abc47Szrj 			return avail_in;
235e95abc47Szrj 		}
236e95abc47Szrj 		if (in.src == NULL && avail_in == 0) {
237e95abc47Szrj 			if (!state->in_frame) {
238e95abc47Szrj 				/* end of stream */
239e95abc47Szrj 				state->eof = 1;
240e95abc47Szrj 				break;
241e95abc47Szrj 			} else {
242e95abc47Szrj 				archive_set_error(&self->archive->archive,
243e95abc47Szrj 				    ARCHIVE_ERRNO_MISC,
244e95abc47Szrj 				    "Truncated zstd input");
245e95abc47Szrj 				return (ARCHIVE_FATAL);
246e95abc47Szrj 			}
247e95abc47Szrj 		}
248e95abc47Szrj 		in.size = avail_in;
249e95abc47Szrj 		in.pos = 0;
250e95abc47Szrj 
251e95abc47Szrj 		{
252e95abc47Szrj 			const size_t ret =
253e95abc47Szrj 			    ZSTD_decompressStream(state->dstream, &out, &in);
254e95abc47Szrj 
255e95abc47Szrj 			if (ZSTD_isError(ret)) {
256e95abc47Szrj 				archive_set_error(&self->archive->archive,
257e95abc47Szrj 				    ARCHIVE_ERRNO_MISC,
258e95abc47Szrj 				    "Zstd decompression failed: %s",
259e95abc47Szrj 				    ZSTD_getErrorName(ret));
260e95abc47Szrj 				return (ARCHIVE_FATAL);
261e95abc47Szrj 			}
262e95abc47Szrj 
263e95abc47Szrj 			/* Decompressor made some progress */
264e95abc47Szrj 			__archive_read_filter_consume(self->upstream, in.pos);
265e95abc47Szrj 
266e95abc47Szrj 			/* ret guaranteed to be > 0 if frame isn't done yet */
267e95abc47Szrj 			state->in_frame = (ret != 0);
268e95abc47Szrj 		}
269e95abc47Szrj 	}
270e95abc47Szrj 
271e95abc47Szrj 	decompressed = out.pos;
272e95abc47Szrj 	state->total_out += decompressed;
273e95abc47Szrj 	if (decompressed == 0)
274e95abc47Szrj 		*p = NULL;
275e95abc47Szrj 	else
276e95abc47Szrj 		*p = state->out_block;
277e95abc47Szrj 	return (decompressed);
278e95abc47Szrj }
279e95abc47Szrj 
280e95abc47Szrj /*
281e95abc47Szrj  * Clean up the decompressor.
282e95abc47Szrj  */
283e95abc47Szrj static int
zstd_filter_close(struct archive_read_filter * self)284e95abc47Szrj zstd_filter_close(struct archive_read_filter *self)
285e95abc47Szrj {
286e95abc47Szrj 	struct private_data *state;
287e95abc47Szrj 
288e95abc47Szrj 	state = (struct private_data *)self->data;
289e95abc47Szrj 
290e95abc47Szrj 	ZSTD_freeDStream(state->dstream);
291e95abc47Szrj 	free(state->out_block);
292e95abc47Szrj 	free(state);
293e95abc47Szrj 
294e95abc47Szrj 	return (ARCHIVE_OK);
295e95abc47Szrj }
296e95abc47Szrj 
297e95abc47Szrj #endif /* HAVE_ZLIB_H && HAVE_LIBZSTD */
298