1 /*-
2  * Copyright (c) 2009,2010 Michihiro NAKAJIMA
3  * Copyright (c) 2003-2010 Tim Kientzle
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "archive_platform.h"
28 
29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_xz.c 201108 2009-12-28 03:28:21Z kientzle $");
30 
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 #include <time.h>
41 #ifdef HAVE_LZMA_H
42 #include <lzma.h>
43 #endif
44 
45 #include "archive.h"
46 #include "archive_endian.h"
47 #include "archive_private.h"
48 #include "archive_write_private.h"
49 
50 #if ARCHIVE_VERSION_NUMBER < 4000000
51 int
52 archive_write_set_compression_lzip(struct archive *a)
53 {
54 	__archive_write_filters_free(a);
55 	return (archive_write_add_filter_lzip(a));
56 }
57 
58 int
59 archive_write_set_compression_lzma(struct archive *a)
60 {
61 	__archive_write_filters_free(a);
62 	return (archive_write_add_filter_lzma(a));
63 }
64 
65 int
66 archive_write_set_compression_xz(struct archive *a)
67 {
68 	__archive_write_filters_free(a);
69 	return (archive_write_add_filter_xz(a));
70 }
71 
72 #endif
73 
74 #ifndef HAVE_LZMA_H
75 int
76 archive_write_add_filter_xz(struct archive *a)
77 {
78 	archive_set_error(a, ARCHIVE_ERRNO_MISC,
79 	    "xz compression not supported on this platform");
80 	return (ARCHIVE_FATAL);
81 }
82 
83 int
84 archive_write_add_filter_lzma(struct archive *a)
85 {
86 	archive_set_error(a, ARCHIVE_ERRNO_MISC,
87 	    "lzma compression not supported on this platform");
88 	return (ARCHIVE_FATAL);
89 }
90 
91 int
92 archive_write_add_filter_lzip(struct archive *a)
93 {
94 	archive_set_error(a, ARCHIVE_ERRNO_MISC,
95 	    "lzma compression not supported on this platform");
96 	return (ARCHIVE_FATAL);
97 }
98 #else
99 /* Don't compile this if we don't have liblzma. */
100 
101 struct private_data {
102 	int		 compression_level;
103 	lzma_stream	 stream;
104 	lzma_filter	 lzmafilters[2];
105 	lzma_options_lzma lzma_opt;
106 	int64_t		 total_in;
107 	unsigned char	*compressed;
108 	size_t		 compressed_buffer_size;
109 	int64_t		 total_out;
110 	/* the CRC32 value of uncompressed data for lzip */
111 	uint32_t	 crc32;
112 };
113 
114 static int	archive_compressor_xz_options(struct archive_write_filter *,
115 		    const char *, const char *);
116 static int	archive_compressor_xz_open(struct archive_write_filter *);
117 static int	archive_compressor_xz_write(struct archive_write_filter *,
118 		    const void *, size_t);
119 static int	archive_compressor_xz_close(struct archive_write_filter *);
120 static int	archive_compressor_xz_free(struct archive_write_filter *);
121 static int	drive_compressor(struct archive_write_filter *,
122 		    struct private_data *, int finishing);
123 
124 struct option_value {
125 	uint32_t dict_size;
126 	uint32_t nice_len;
127 	lzma_match_finder mf;
128 };
129 static const struct option_value option_values[] = {
130 	{ 1 << 16, 32, LZMA_MF_HC3},
131 	{ 1 << 20, 32, LZMA_MF_HC3},
132 	{ 3 << 19, 32, LZMA_MF_HC4},
133 	{ 1 << 21, 32, LZMA_MF_BT4},
134 	{ 3 << 20, 32, LZMA_MF_BT4},
135 	{ 1 << 22, 32, LZMA_MF_BT4},
136 	{ 1 << 23, 64, LZMA_MF_BT4},
137 	{ 1 << 24, 64, LZMA_MF_BT4},
138 	{ 3 << 23, 64, LZMA_MF_BT4},
139 	{ 1 << 25, 64, LZMA_MF_BT4}
140 };
141 
142 static int
143 common_setup(struct archive_write_filter *f)
144 {
145 	struct private_data *data;
146 	struct archive_write *a = (struct archive_write *)f->archive;
147 	data = calloc(1, sizeof(*data));
148 	if (data == NULL) {
149 		archive_set_error(&a->archive, ENOMEM, "Out of memory");
150 		return (ARCHIVE_FATAL);
151 	}
152 	f->data = data;
153 	data->compression_level = LZMA_PRESET_DEFAULT;
154 	f->open = &archive_compressor_xz_open;
155 	f->close = archive_compressor_xz_close;
156 	f->free = archive_compressor_xz_free;
157 	f->options = &archive_compressor_xz_options;
158 	return (ARCHIVE_OK);
159 }
160 
161 /*
162  * Add an xz compression filter to this write handle.
163  */
164 int
165 archive_write_add_filter_xz(struct archive *_a)
166 {
167 	struct archive_write_filter *f;
168 	int r;
169 
170 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
171 	    ARCHIVE_STATE_NEW, "archive_write_add_filter_xz");
172 	f = __archive_write_allocate_filter(_a);
173 	r = common_setup(f);
174 	if (r == ARCHIVE_OK) {
175 		f->code = ARCHIVE_COMPRESSION_XZ;
176 		f->name = "xz";
177 	}
178 	return (r);
179 }
180 
181 /* LZMA is handled identically, we just need a different compression
182  * code set.  (The liblzma setup looks at the code to determine
183  * the one place that XZ and LZMA require different handling.) */
184 int
185 archive_write_add_filter_lzma(struct archive *_a)
186 {
187 	struct archive_write_filter *f;
188 	int r;
189 
190 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
191 	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lzma");
192 	f = __archive_write_allocate_filter(_a);
193 	r = common_setup(f);
194 	if (r == ARCHIVE_OK) {
195 		f->code = ARCHIVE_COMPRESSION_LZMA;
196 		f->name = "lzma";
197 	}
198 	return (r);
199 }
200 
201 int
202 archive_write_add_filter_lzip(struct archive *_a)
203 {
204 	struct archive_write_filter *f;
205 	int r;
206 
207 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
208 	    ARCHIVE_STATE_NEW, "archive_write_add_filter_lzip");
209 	f = __archive_write_allocate_filter(_a);
210 	r = common_setup(f);
211 	if (r == ARCHIVE_OK) {
212 		f->code = ARCHIVE_COMPRESSION_LZIP;
213 		f->name = "lzip";
214 	}
215 	return (r);
216 }
217 
218 static int
219 archive_compressor_xz_init_stream(struct archive_write_filter *f,
220     struct private_data *data)
221 {
222 	static const lzma_stream lzma_stream_init_data = LZMA_STREAM_INIT;
223 	int ret;
224 
225 	data->stream = lzma_stream_init_data;
226 	data->stream.next_out = data->compressed;
227 	data->stream.avail_out = data->compressed_buffer_size;
228 	if (f->code == ARCHIVE_COMPRESSION_XZ)
229 		ret = lzma_stream_encoder(&(data->stream),
230 		    data->lzmafilters, LZMA_CHECK_CRC64);
231 	else if (f->code == ARCHIVE_COMPRESSION_LZMA)
232 		ret = lzma_alone_encoder(&(data->stream), &data->lzma_opt);
233 	else {	/* ARCHIVE_COMPRESSION_LZIP */
234 		int dict_size = data->lzma_opt.dict_size;
235 		int ds, log2dic, wedges;
236 
237 		/* Calculate a coded dictionary size */
238 		if (dict_size < (1 << 12) || dict_size > (1 << 27)) {
239 			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
240 			    "Unacceptable dictionary dize for lzip: %d",
241 			    dict_size);
242 			return (ARCHIVE_FATAL);
243 		}
244 		for (log2dic = 27; log2dic >= 12; log2dic--) {
245 			if (dict_size & (1 << log2dic))
246 				break;
247 		}
248 		if (dict_size > (1 << log2dic)) {
249 			log2dic++;
250 			wedges =
251 			    ((1 << log2dic) - dict_size) / (1 << (log2dic - 4));
252 		} else
253 			wedges = 0;
254 		ds = ((wedges << 5) & 0xe0) | (log2dic & 0x1f);
255 
256 		data->crc32 = 0;
257 		/* Make a header */
258 		data->compressed[0] = 0x4C;
259 		data->compressed[1] = 0x5A;
260 		data->compressed[2] = 0x49;
261 		data->compressed[3] = 0x50;
262 		data->compressed[4] = 1;/* Version */
263 		data->compressed[5] = (unsigned char)ds;
264 		data->stream.next_out += 6;
265 		data->stream.avail_out -= 6;
266 
267 		ret = lzma_raw_encoder(&(data->stream), data->lzmafilters);
268 	}
269 	if (ret == LZMA_OK)
270 		return (ARCHIVE_OK);
271 
272 	switch (ret) {
273 	case LZMA_MEM_ERROR:
274 		archive_set_error(f->archive, ENOMEM,
275 		    "Internal error initializing compression library: "
276 		    "Cannot allocate memory");
277 		break;
278 	default:
279 		archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
280 		    "Internal error initializing compression library: "
281 		    "It's a bug in liblzma");
282 		break;
283 	}
284 	return (ARCHIVE_FATAL);
285 }
286 
287 /*
288  * Setup callback.
289  */
290 static int
291 archive_compressor_xz_open(struct archive_write_filter *f)
292 {
293 	struct private_data *data = f->data;
294 	int ret;
295 
296 	ret = __archive_write_open_filter(f->next_filter);
297 	if (ret != ARCHIVE_OK)
298 		return (ret);
299 
300 	if (data->compressed == NULL) {
301 		data->compressed_buffer_size = 65536;
302 		data->compressed
303 		    = (unsigned char *)malloc(data->compressed_buffer_size);
304 		if (data->compressed == NULL) {
305 			archive_set_error(f->archive, ENOMEM,
306 			    "Can't allocate data for compression buffer");
307 			return (ARCHIVE_FATAL);
308 		}
309 	}
310 
311 	f->write = archive_compressor_xz_write;
312 
313 	/* Initialize compression library. */
314 	if (f->code == ARCHIVE_COMPRESSION_LZIP) {
315 		const struct option_value *val =
316 		    &option_values[data->compression_level];
317 
318 		data->lzma_opt.dict_size = val->dict_size;
319 		data->lzma_opt.preset_dict = NULL;
320 		data->lzma_opt.preset_dict_size = 0;
321 		data->lzma_opt.lc = LZMA_LC_DEFAULT;
322 		data->lzma_opt.lp = LZMA_LP_DEFAULT;
323 		data->lzma_opt.pb = LZMA_PB_DEFAULT;
324 		data->lzma_opt.mode =
325 		    data->compression_level<= 2? LZMA_MODE_FAST:LZMA_MODE_NORMAL;
326 		data->lzma_opt.nice_len = val->nice_len;
327 		data->lzma_opt.mf = val->mf;
328 		data->lzma_opt.depth = 0;
329 		data->lzmafilters[0].id = LZMA_FILTER_LZMA1;
330 		data->lzmafilters[0].options = &data->lzma_opt;
331 		data->lzmafilters[1].id = LZMA_VLI_UNKNOWN;/* Terminate */
332 	} else {
333 		if (lzma_lzma_preset(&data->lzma_opt, data->compression_level)) {
334 			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
335 			    "Internal error initializing compression library");
336 		}
337 		data->lzmafilters[0].id = LZMA_FILTER_LZMA2;
338 		data->lzmafilters[0].options = &data->lzma_opt;
339 		data->lzmafilters[1].id = LZMA_VLI_UNKNOWN;/* Terminate */
340 	}
341 	ret = archive_compressor_xz_init_stream(f, data);
342 	if (ret == LZMA_OK) {
343 		f->data = data;
344 		return (0);
345 	}
346 	return (ARCHIVE_FATAL);
347 }
348 
349 /*
350  * Set write options.
351  */
352 static int
353 archive_compressor_xz_options(struct archive_write_filter *f,
354     const char *key, const char *value)
355 {
356 	struct private_data *data = (struct private_data *)f->data;
357 
358 	if (strcmp(key, "compression-level") == 0) {
359 		if (value == NULL || !(value[0] >= '0' && value[0] <= '9') ||
360 		    value[1] != '\0')
361 			return (ARCHIVE_WARN);
362 		data->compression_level = value[0] - '0';
363 		if (data->compression_level > 6)
364 			data->compression_level = 6;
365 		return (ARCHIVE_OK);
366 	}
367 
368 	return (ARCHIVE_WARN);
369 }
370 
371 /*
372  * Write data to the compressed stream.
373  */
374 static int
375 archive_compressor_xz_write(struct archive_write_filter *f,
376     const void *buff, size_t length)
377 {
378 	struct private_data *data = (struct private_data *)f->data;
379 	int ret;
380 
381 	/* Update statistics */
382 	data->total_in += length;
383 	if (f->code == ARCHIVE_COMPRESSION_LZIP)
384 		data->crc32 = lzma_crc32(buff, length, data->crc32);
385 
386 	/* Compress input data to output buffer */
387 	data->stream.next_in = buff;
388 	data->stream.avail_in = length;
389 	if ((ret = drive_compressor(f, data, 0)) != ARCHIVE_OK)
390 		return (ret);
391 
392 	return (ARCHIVE_OK);
393 }
394 
395 
396 /*
397  * Finish the compression...
398  */
399 static int
400 archive_compressor_xz_close(struct archive_write_filter *f)
401 {
402 	struct private_data *data = (struct private_data *)f->data;
403 	int ret, r1;
404 
405 	ret = drive_compressor(f, data, 1);
406 	if (ret == ARCHIVE_OK) {
407 		data->total_out +=
408 		    data->compressed_buffer_size - data->stream.avail_out;
409 		ret = __archive_write_filter(f->next_filter,
410 		    data->compressed,
411 		    data->compressed_buffer_size - data->stream.avail_out);
412 		if (f->code == ARCHIVE_COMPRESSION_LZIP && ret == ARCHIVE_OK) {
413 			archive_le32enc(data->compressed, data->crc32);
414 			archive_le64enc(data->compressed+4, data->total_in);
415 			archive_le64enc(data->compressed+12, data->total_out + 20);
416 			ret = __archive_write_filter(f->next_filter,
417 			    data->compressed, 20);
418 		}
419 	}
420 	lzma_end(&(data->stream));
421 	r1 = __archive_write_close_filter(f->next_filter);
422 	return (r1 < ret ? r1 : ret);
423 }
424 
425 static int
426 archive_compressor_xz_free(struct archive_write_filter *f)
427 {
428 	struct private_data *data = (struct private_data *)f->data;
429 	free(data->compressed);
430 	free(data);
431 	f->data = NULL;
432 	return (ARCHIVE_OK);
433 }
434 
435 /*
436  * Utility function to push input data through compressor,
437  * writing full output blocks as necessary.
438  *
439  * Note that this handles both the regular write case (finishing ==
440  * false) and the end-of-archive case (finishing == true).
441  */
442 static int
443 drive_compressor(struct archive_write_filter *f,
444     struct private_data *data, int finishing)
445 {
446 	int ret;
447 
448 	for (;;) {
449 		if (data->stream.avail_out == 0) {
450 			data->total_out += data->compressed_buffer_size;
451 			ret = __archive_write_filter(f->next_filter,
452 			    data->compressed,
453 			    data->compressed_buffer_size);
454 			if (ret != ARCHIVE_OK)
455 				return (ARCHIVE_FATAL);
456 			data->stream.next_out = data->compressed;
457 			data->stream.avail_out = data->compressed_buffer_size;
458 		}
459 
460 		/* If there's nothing to do, we're done. */
461 		if (!finishing && data->stream.avail_in == 0)
462 			return (ARCHIVE_OK);
463 
464 		ret = lzma_code(&(data->stream),
465 		    finishing ? LZMA_FINISH : LZMA_RUN );
466 
467 		switch (ret) {
468 		case LZMA_OK:
469 			/* In non-finishing case, check if compressor
470 			 * consumed everything */
471 			if (!finishing && data->stream.avail_in == 0)
472 				return (ARCHIVE_OK);
473 			/* In finishing case, this return always means
474 			 * there's more work */
475 			break;
476 		case LZMA_STREAM_END:
477 			/* This return can only occur in finishing case. */
478 			if (finishing)
479 				return (ARCHIVE_OK);
480 			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
481 			    "lzma compression data error");
482 			return (ARCHIVE_FATAL);
483 		case LZMA_MEMLIMIT_ERROR:
484 			archive_set_error(f->archive, ENOMEM,
485 			    "lzma compression error: "
486 			    "%ju MiB would have been needed",
487 			    (uintmax_t)((lzma_memusage(&(data->stream))
488 				    + 1024 * 1024 -1)
489 				/ (1024 * 1024)));
490 			return (ARCHIVE_FATAL);
491 		default:
492 			/* Any other return value indicates an error. */
493 			archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
494 			    "lzma compression failed:"
495 			    " lzma_code() call returned status %d",
496 			    ret);
497 			return (ARCHIVE_FATAL);
498 		}
499 	}
500 }
501 
502 #endif /* HAVE_LZMA_H */
503