1 /*-
2  * Copyright (c) 2017 Sean Purcell
3  * Copyright (c) 2023-2024 Klara, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "archive_platform.h"
28 
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #ifdef HAVE_LIMITS_H
33 #include <limits.h>
34 #endif
35 #ifdef HAVE_STDINT_H
36 #include <stdint.h>
37 #endif
38 #ifdef HAVE_STDLIB_H
39 #include <stdlib.h>
40 #endif
41 #ifdef HAVE_STRING_H
42 #include <string.h>
43 #endif
44 #ifdef HAVE_UNISTD_H
45 #include <unistd.h>
46 #endif
47 #ifdef HAVE_ZSTD_H
48 #include <zstd.h>
49 #endif
50 
51 #include "archive.h"
52 #include "archive_private.h"
53 #include "archive_string.h"
54 #include "archive_write_private.h"
55 
56 /* Don't compile this if we don't have zstd.h */
57 
58 struct private_data {
59 	int		 compression_level;
60 	int		 threads;
61 	int		 long_distance;
62 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
63 	enum {
64 		running,
65 		finishing,
66 		resetting,
67 	} state;
68 	int		 frame_per_file;
69 	size_t		 min_frame_in;
70 	size_t		 max_frame_in;
71 	size_t		 min_frame_out;
72 	size_t		 max_frame_out;
73 	size_t		 cur_frame;
74 	size_t		 cur_frame_in;
75 	size_t		 cur_frame_out;
76 	size_t		 total_in;
77 	ZSTD_CStream	*cstream;
78 	ZSTD_outBuffer	 out;
79 #else
80 	struct archive_write_program_data *pdata;
81 #endif
82 };
83 
84 /* If we don't have the library use default range values (zstdcli.c v1.4.0) */
85 #define CLEVEL_MIN -99
86 #define CLEVEL_STD_MIN 0 /* prior to 1.3.4 and more recent without using --fast */
87 #define CLEVEL_DEFAULT 3
88 #define CLEVEL_STD_MAX 19 /* without using --ultra */
89 #define CLEVEL_MAX 22
90 
91 #define LONG_STD 27
92 
93 #define MINVER_NEGCLEVEL 10304
94 #define MINVER_MINCLEVEL 10306
95 #define MINVER_LONG 10302
96 
97 static int archive_compressor_zstd_options(struct archive_write_filter *,
98 		    const char *, const char *);
99 static int archive_compressor_zstd_open(struct archive_write_filter *);
100 static int archive_compressor_zstd_write(struct archive_write_filter *,
101 		    const void *, size_t);
102 static int archive_compressor_zstd_flush(struct archive_write_filter *);
103 static int archive_compressor_zstd_close(struct archive_write_filter *);
104 static int archive_compressor_zstd_free(struct archive_write_filter *);
105 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
106 static int drive_compressor(struct archive_write_filter *,
107 		    struct private_data *, int, const void *, size_t);
108 #endif
109 
110 
111 /*
112  * Add a zstd compression filter to this write handle.
113  */
114 int
archive_write_add_filter_zstd(struct archive * _a)115 archive_write_add_filter_zstd(struct archive *_a)
116 {
117 	struct archive_write *a = (struct archive_write *)_a;
118 	struct archive_write_filter *f = __archive_write_allocate_filter(_a);
119 	struct private_data *data;
120 	archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
121 	    ARCHIVE_STATE_NEW, "archive_write_add_filter_zstd");
122 
123 	data = calloc(1, sizeof(*data));
124 	if (data == NULL) {
125 		archive_set_error(&a->archive, ENOMEM, "Out of memory");
126 		return (ARCHIVE_FATAL);
127 	}
128 	f->data = data;
129 	f->open = &archive_compressor_zstd_open;
130 	f->options = &archive_compressor_zstd_options;
131 	f->flush = &archive_compressor_zstd_flush;
132 	f->close = &archive_compressor_zstd_close;
133 	f->free = &archive_compressor_zstd_free;
134 	f->code = ARCHIVE_FILTER_ZSTD;
135 	f->name = "zstd";
136 	data->compression_level = CLEVEL_DEFAULT;
137 	data->threads = 0;
138 	data->long_distance = 0;
139 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
140 	data->frame_per_file = 0;
141 	data->min_frame_in = 0;
142 	data->max_frame_in = SIZE_MAX;
143 	data->min_frame_out = 0;
144 	data->max_frame_out = SIZE_MAX;
145 	data->cur_frame_in = 0;
146 	data->cur_frame_out = 0;
147 	data->cstream = ZSTD_createCStream();
148 	if (data->cstream == NULL) {
149 		free(data);
150 		archive_set_error(&a->archive, ENOMEM,
151 		    "Failed to allocate zstd compressor object");
152 		return (ARCHIVE_FATAL);
153 	}
154 
155 	return (ARCHIVE_OK);
156 #else
157 	data->pdata = __archive_write_program_allocate("zstd");
158 	if (data->pdata == NULL) {
159 		free(data);
160 		archive_set_error(&a->archive, ENOMEM, "Out of memory");
161 		return (ARCHIVE_FATAL);
162 	}
163 	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
164 	    "Using external zstd program");
165 	return (ARCHIVE_WARN);
166 #endif
167 }
168 
169 static int
archive_compressor_zstd_free(struct archive_write_filter * f)170 archive_compressor_zstd_free(struct archive_write_filter *f)
171 {
172 	struct private_data *data = (struct private_data *)f->data;
173 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
174 	ZSTD_freeCStream(data->cstream);
175 	free(data->out.dst);
176 #else
177 	__archive_write_program_free(data->pdata);
178 #endif
179 	free(data);
180 	f->data = NULL;
181 	return (ARCHIVE_OK);
182 }
183 
184 static int
string_to_number(const char * string,intmax_t * numberp)185 string_to_number(const char *string, intmax_t *numberp)
186 {
187 	char *end;
188 
189 	if (string == NULL || *string == '\0')
190 		return (ARCHIVE_WARN);
191 	*numberp = strtoimax(string, &end, 10);
192 	if (end == string || *end != '\0' || errno == EOVERFLOW) {
193 		*numberp = 0;
194 		return (ARCHIVE_WARN);
195 	}
196 	return (ARCHIVE_OK);
197 }
198 
199 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
200 static int
string_to_size(const char * string,size_t * numberp)201 string_to_size(const char *string, size_t *numberp)
202 {
203 	uintmax_t number;
204 	char *end;
205 	unsigned int shift = 0;
206 
207 	if (string == NULL || *string == '\0' || *string == '-')
208 		return (ARCHIVE_WARN);
209 	number = strtoumax(string, &end, 10);
210 	if (end > string) {
211 		if (*end == 'K' || *end == 'k') {
212 			shift = 10;
213 			end++;
214 		} else if (*end == 'M' || *end == 'm') {
215 			shift = 20;
216 			end++;
217 		} else if (*end == 'G' || *end == 'g') {
218 			shift = 30;
219 			end++;
220 		}
221 		if (*end == 'B' || *end == 'b') {
222 			end++;
223 		}
224 	}
225 	if (end == string || *end != '\0' || errno == EOVERFLOW) {
226 		return (ARCHIVE_WARN);
227 	}
228 	if (number > (uintmax_t)SIZE_MAX >> shift) {
229 		return (ARCHIVE_WARN);
230 	}
231 	*numberp = (size_t)(number << shift);
232 	return (ARCHIVE_OK);
233 }
234 #endif
235 
236 /*
237  * Set write options.
238  */
239 static int
archive_compressor_zstd_options(struct archive_write_filter * f,const char * key,const char * value)240 archive_compressor_zstd_options(struct archive_write_filter *f, const char *key,
241     const char *value)
242 {
243 	struct private_data *data = (struct private_data *)f->data;
244 
245 	if (strcmp(key, "compression-level") == 0) {
246 		intmax_t level;
247 		if (string_to_number(value, &level) != ARCHIVE_OK) {
248 			return (ARCHIVE_WARN);
249 		}
250 		/* If we don't have the library, hard-code the max level */
251 		int minimum = CLEVEL_MIN;
252 		int maximum = CLEVEL_MAX;
253 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
254 		maximum = ZSTD_maxCLevel();
255 #if ZSTD_VERSION_NUMBER >= MINVER_MINCLEVEL
256 		if (ZSTD_versionNumber() >= MINVER_MINCLEVEL) {
257 			minimum = ZSTD_minCLevel();
258 		}
259 		else
260 #endif
261 		if (ZSTD_versionNumber() < MINVER_NEGCLEVEL) {
262 			minimum = CLEVEL_STD_MIN;
263 		}
264 #endif
265 		if (level < minimum || level > maximum) {
266 			return (ARCHIVE_WARN);
267 		}
268 		data->compression_level = (int)level;
269 		return (ARCHIVE_OK);
270 	} else if (strcmp(key, "threads") == 0) {
271 		intmax_t threads;
272 		if (string_to_number(value, &threads) != ARCHIVE_OK) {
273 			return (ARCHIVE_WARN);
274 		}
275 
276 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
277 		if (threads == 0) {
278 			threads = sysconf(_SC_NPROCESSORS_ONLN);
279 		}
280 #elif !defined(__CYGWIN__) && defined(_WIN32_WINNT) && \
281     _WIN32_WINNT >= 0x0601 /* _WIN32_WINNT_WIN7 */
282 		if (threads == 0) {
283 			DWORD winCores = GetActiveProcessorCount(
284 			    ALL_PROCESSOR_GROUPS);
285 			threads = (intmax_t)winCores;
286 		}
287 #endif
288 		if (threads < 0 || threads > INT_MAX) {
289 			return (ARCHIVE_WARN);
290 		}
291 		data->threads = (int)threads;
292 		return (ARCHIVE_OK);
293 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
294 	} else if (strcmp(key, "frame-per-file") == 0) {
295 		data->frame_per_file = 1;
296 		return (ARCHIVE_OK);
297 	} else if (strcmp(key, "min-frame-in") == 0) {
298 		if (string_to_size(value, &data->min_frame_in) != ARCHIVE_OK) {
299 			return (ARCHIVE_WARN);
300 		}
301 		return (ARCHIVE_OK);
302 	} else if (strcmp(key, "min-frame-out") == 0 ||
303 	    strcmp(key, "min-frame-size") == 0) {
304 		if (string_to_size(value, &data->min_frame_out) != ARCHIVE_OK) {
305 			return (ARCHIVE_WARN);
306 		}
307 		return (ARCHIVE_OK);
308 	} else if (strcmp(key, "max-frame-in") == 0 ||
309 	    strcmp(key, "max-frame-size") == 0) {
310 		if (string_to_size(value, &data->max_frame_in) != ARCHIVE_OK ||
311 		    data->max_frame_in < 1024) {
312 			return (ARCHIVE_WARN);
313 		}
314 		return (ARCHIVE_OK);
315 	} else if (strcmp(key, "max-frame-out") == 0) {
316 		if (string_to_size(value, &data->max_frame_out) != ARCHIVE_OK ||
317 		    data->max_frame_out < 1024) {
318 			return (ARCHIVE_WARN);
319 		}
320 		return (ARCHIVE_OK);
321 #endif
322 	}
323 	else if (strcmp(key, "long") == 0) {
324 		intmax_t long_distance;
325 		if (string_to_number(value, &long_distance) != ARCHIVE_OK) {
326 			return (ARCHIVE_WARN);
327 		}
328 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && ZSTD_VERSION_NUMBER >= MINVER_LONG
329 		ZSTD_bounds bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
330 		if (ZSTD_isError(bounds.error)) {
331 			int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31));
332 			if (((int)long_distance) < 10 || (int)long_distance > max_distance)
333 				return (ARCHIVE_WARN);
334 		} else {
335 			if ((int)long_distance < bounds.lowerBound || (int)long_distance > bounds.upperBound)
336 				return (ARCHIVE_WARN);
337 		}
338 #else
339 		int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31));
340 		if (((int)long_distance) < 10 || (int)long_distance > max_distance)
341 		    return (ARCHIVE_WARN);
342 #endif
343 		data->long_distance = (int)long_distance;
344 		return (ARCHIVE_OK);
345 	}
346 
347 	/* Note: The "warn" return is just to inform the options
348 	 * supervisor that we didn't handle it.  It will generate
349 	 * a suitable error if no one used this option. */
350 	return (ARCHIVE_WARN);
351 }
352 
353 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
354 /*
355  * Setup callback.
356  */
357 static int
archive_compressor_zstd_open(struct archive_write_filter * f)358 archive_compressor_zstd_open(struct archive_write_filter *f)
359 {
360 	struct private_data *data = (struct private_data *)f->data;
361 
362 	if (data->out.dst == NULL) {
363 		size_t bs = ZSTD_CStreamOutSize(), bpb;
364 		if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
365 			/* Buffer size should be a multiple number of
366 			 * the of bytes per block for performance. */
367 			bpb = archive_write_get_bytes_per_block(f->archive);
368 			if (bpb > bs)
369 				bs = bpb;
370 			else if (bpb != 0)
371 				bs -= bs % bpb;
372 		}
373 		data->out.size = bs;
374 		data->out.pos = 0;
375 		data->out.dst
376 		    = (unsigned char *)malloc(data->out.size);
377 		if (data->out.dst == NULL) {
378 			archive_set_error(f->archive, ENOMEM,
379 			    "Can't allocate data for compression buffer");
380 			return (ARCHIVE_FATAL);
381 		}
382 	}
383 
384 	f->write = archive_compressor_zstd_write;
385 
386 	if (ZSTD_isError(ZSTD_initCStream(data->cstream,
387 	    data->compression_level))) {
388 		archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
389 		    "Internal error initializing zstd compressor object");
390 		return (ARCHIVE_FATAL);
391 	}
392 
393 	ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_nbWorkers, data->threads);
394 
395 #if ZSTD_VERSION_NUMBER >= MINVER_LONG
396 	ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_windowLog, data->long_distance);
397 #endif
398 
399 	return (ARCHIVE_OK);
400 }
401 
402 /*
403  * Write data to the compressed stream.
404  */
405 static int
archive_compressor_zstd_write(struct archive_write_filter * f,const void * buff,size_t length)406 archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
407     size_t length)
408 {
409 	struct private_data *data = (struct private_data *)f->data;
410 
411 	return (drive_compressor(f, data, 0, buff, length));
412 }
413 
414 /*
415  * Flush the compressed stream.
416  */
417 static int
archive_compressor_zstd_flush(struct archive_write_filter * f)418 archive_compressor_zstd_flush(struct archive_write_filter *f)
419 {
420 	struct private_data *data = (struct private_data *)f->data;
421 
422 	if (data->frame_per_file && data->state == running) {
423 		if (data->cur_frame_in > data->min_frame_in &&
424 		    data->cur_frame_out > data->min_frame_out) {
425 			data->state = finishing;
426 		}
427 	}
428 	return (drive_compressor(f, data, 1, NULL, 0));
429 }
430 
431 /*
432  * Finish the compression...
433  */
434 static int
archive_compressor_zstd_close(struct archive_write_filter * f)435 archive_compressor_zstd_close(struct archive_write_filter *f)
436 {
437 	struct private_data *data = (struct private_data *)f->data;
438 
439 	if (data->state == running)
440 		data->state = finishing;
441 	return (drive_compressor(f, data, 1, NULL, 0));
442 }
443 
444 /*
445  * Utility function to push input data through compressor,
446  * writing full output blocks as necessary.
447  */
448 static int
drive_compressor(struct archive_write_filter * f,struct private_data * data,int flush,const void * src,size_t length)449 drive_compressor(struct archive_write_filter *f,
450     struct private_data *data, int flush, const void *src, size_t length)
451 {
452 	ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 };
453 	size_t ipos, opos, zstdret = 0;
454 	int ret;
455 
456 	for (;;) {
457 		ipos = in.pos;
458 		opos = data->out.pos;
459 		switch (data->state) {
460 		case running:
461 			if (in.pos == in.size)
462 				return (ARCHIVE_OK);
463 			zstdret = ZSTD_compressStream(data->cstream,
464 			    &data->out, &in);
465 			if (ZSTD_isError(zstdret))
466 				goto zstd_fatal;
467 			break;
468 		case finishing:
469 			zstdret = ZSTD_endStream(data->cstream, &data->out);
470 			if (ZSTD_isError(zstdret))
471 				goto zstd_fatal;
472 			if (zstdret == 0)
473 				data->state = resetting;
474 			break;
475 		case resetting:
476 			ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only);
477 			data->cur_frame++;
478 			data->cur_frame_in = 0;
479 			data->cur_frame_out = 0;
480 			data->state = running;
481 			break;
482 		}
483 		data->total_in += in.pos - ipos;
484 		data->cur_frame_in += in.pos - ipos;
485 		data->cur_frame_out += data->out.pos - opos;
486 		if (data->state == running) {
487 			if (data->cur_frame_in >= data->max_frame_in ||
488 			    data->cur_frame_out >= data->max_frame_out) {
489 				data->state = finishing;
490 			}
491 		}
492 		if (data->out.pos == data->out.size ||
493 		    (flush && data->out.pos > 0)) {
494 			ret = __archive_write_filter(f->next_filter,
495 			    data->out.dst, data->out.pos);
496 			if (ret != ARCHIVE_OK)
497 				goto fatal;
498 			data->out.pos = 0;
499 		}
500 	}
501 zstd_fatal:
502 	archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
503 	    "Zstd compression failed: %s",
504 	    ZSTD_getErrorName(zstdret));
505 fatal:
506 	return (ARCHIVE_FATAL);
507 }
508 
509 #else /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */
510 
511 static int
archive_compressor_zstd_open(struct archive_write_filter * f)512 archive_compressor_zstd_open(struct archive_write_filter *f)
513 {
514 	struct private_data *data = (struct private_data *)f->data;
515 	struct archive_string as;
516 	int r;
517 
518 	archive_string_init(&as);
519 	/* --no-check matches library default */
520 	archive_strcpy(&as, "zstd --no-check");
521 
522 	if (data->compression_level < CLEVEL_STD_MIN) {
523 		archive_string_sprintf(&as, " --fast=%d", -data->compression_level);
524 	} else {
525 		archive_string_sprintf(&as, " -%d", data->compression_level);
526 	}
527 
528 	if (data->compression_level > CLEVEL_STD_MAX) {
529 		archive_strcat(&as, " --ultra");
530 	}
531 
532 	if (data->threads != 0) {
533 		archive_string_sprintf(&as, " --threads=%d", data->threads);
534 	}
535 
536 	if (data->long_distance != 0) {
537 		archive_string_sprintf(&as, " --long=%d", data->long_distance);
538 	}
539 
540 	f->write = archive_compressor_zstd_write;
541 	r = __archive_write_program_open(f, data->pdata, as.s);
542 	archive_string_free(&as);
543 	return (r);
544 }
545 
546 static int
archive_compressor_zstd_write(struct archive_write_filter * f,const void * buff,size_t length)547 archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
548     size_t length)
549 {
550 	struct private_data *data = (struct private_data *)f->data;
551 
552 	return __archive_write_program_write(f, data->pdata, buff, length);
553 }
554 
555 static int
archive_compressor_zstd_flush(struct archive_write_filter * f)556 archive_compressor_zstd_flush(struct archive_write_filter *f)
557 {
558 	(void)f; /* UNUSED */
559 
560 	return (ARCHIVE_OK);
561 }
562 
563 static int
archive_compressor_zstd_close(struct archive_write_filter * f)564 archive_compressor_zstd_close(struct archive_write_filter *f)
565 {
566 	struct private_data *data = (struct private_data *)f->data;
567 
568 	return __archive_write_program_close(f, data->pdata);
569 }
570 
571 #endif /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */
572