1 // Written in the D programming language.
2
3 /**
4 * Compress/decompress data using the $(HTTP www._zlib.net, _zlib library).
5 *
6 * Examples:
7 *
8 * If you have a small buffer you can use $(LREF compress) and
9 * $(LREF uncompress) directly.
10 *
11 * -------
12 * import std.zlib;
13 *
14 * auto src =
15 * "the quick brown fox jumps over the lazy dog\r
16 * the quick brown fox jumps over the lazy dog\r";
17 *
18 * ubyte[] dst;
19 * ubyte[] result;
20 *
21 * dst = compress(src);
22 * result = cast(ubyte[]) uncompress(dst);
23 * assert(result == src);
24 * -------
25 *
26 * When the data to be compressed doesn't fit in one buffer, use
27 * $(LREF Compress) and $(LREF UnCompress).
28 *
29 * -------
30 * import std.zlib;
31 * import std.stdio;
32 * import std.conv : to;
33 * import std.algorithm.iteration : map;
34 *
35 * UnCompress decmp = new UnCompress;
36 * foreach (chunk; stdin.byChunk(4096).map!(x => decmp.uncompress(x)))
37 * {
38 * chunk.to!string.write;
39 * }
40
41 * -------
42 *
43 * References:
44 * $(HTTP en.wikipedia.org/wiki/Zlib, Wikipedia)
45 *
46 * Copyright: Copyright Digital Mars 2000 - 2011.
47 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
48 * Authors: $(HTTP digitalmars.com, Walter Bright)
49 * Source: $(PHOBOSSRC std/_zlib.d)
50 */
51 /* Copyright Digital Mars 2000 - 2011.
52 * Distributed under the Boost Software License, Version 1.0.
53 * (See accompanying file LICENSE_1_0.txt or copy at
54 * http://www.boost.org/LICENSE_1_0.txt)
55 */
56 module std.zlib;
57
58 //debug=zlib; // uncomment to turn on debugging printf's
59
60 import etc.c.zlib;
61
62 // Values for 'mode'
63
64 enum
65 {
66 Z_NO_FLUSH = 0,
67 Z_SYNC_FLUSH = 2,
68 Z_FULL_FLUSH = 3,
69 Z_FINISH = 4,
70 }
71
72 /*************************************
73 * Errors throw a ZlibException.
74 */
75
76 class ZlibException : Exception
77 {
this(int errnum)78 this(int errnum)
79 { string msg;
80
81 switch (errnum)
82 {
83 case Z_STREAM_END: msg = "stream end"; break;
84 case Z_NEED_DICT: msg = "need dict"; break;
85 case Z_ERRNO: msg = "errno"; break;
86 case Z_STREAM_ERROR: msg = "stream error"; break;
87 case Z_DATA_ERROR: msg = "data error"; break;
88 case Z_MEM_ERROR: msg = "mem error"; break;
89 case Z_BUF_ERROR: msg = "buf error"; break;
90 case Z_VERSION_ERROR: msg = "version error"; break;
91 default: msg = "unknown error"; break;
92 }
93 super(msg);
94 }
95 }
96
97 /**
98 * $(P Compute the Adler-32 checksum of a buffer's worth of data.)
99 *
100 * Params:
101 * adler = the starting checksum for the computation. Use 1
102 * for a new checksum. Use the output of this function
103 * for a cumulative checksum.
104 * buf = buffer containing input data
105 *
106 * Returns:
107 * A $(D uint) checksum for the provided input data and starting checksum
108 *
109 * See_Also:
110 * $(LINK http://en.wikipedia.org/wiki/Adler-32)
111 */
112
adler32(uint adler,const (void)[]buf)113 uint adler32(uint adler, const(void)[] buf)
114 {
115 import std.range : chunks;
116 foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000))
117 {
118 adler = etc.c.zlib.adler32(adler, chunk.ptr, cast(uint) chunk.length);
119 }
120 return adler;
121 }
122
123 ///
124 @system unittest
125 {
126 static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
127
128 uint adler = adler32(0u, data);
129 assert(adler == 0xdc0037);
130 }
131
132 @system unittest
133 {
134 static string data = "test";
135
136 uint adler = adler32(1, data);
137 assert(adler == 0x045d01c1);
138 }
139
140 /**
141 * $(P Compute the CRC32 checksum of a buffer's worth of data.)
142 *
143 * Params:
144 * crc = the starting checksum for the computation. Use 0
145 * for a new checksum. Use the output of this function
146 * for a cumulative checksum.
147 * buf = buffer containing input data
148 *
149 * Returns:
150 * A $(D uint) checksum for the provided input data and starting checksum
151 *
152 * See_Also:
153 * $(LINK http://en.wikipedia.org/wiki/Cyclic_redundancy_check)
154 */
155
crc32(uint crc,const (void)[]buf)156 uint crc32(uint crc, const(void)[] buf)
157 {
158 import std.range : chunks;
159 foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000))
160 {
161 crc = etc.c.zlib.crc32(crc, chunk.ptr, cast(uint) chunk.length);
162 }
163 return crc;
164 }
165
166 @system unittest
167 {
168 static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
169
170 uint crc;
171
172 debug(zlib) printf("D.zlib.crc32.unittest\n");
173 crc = crc32(0u, cast(void[]) data);
174 debug(zlib) printf("crc = %x\n", crc);
175 assert(crc == 0x2520577b);
176 }
177
178 /**
179 * $(P Compress data)
180 *
181 * Params:
182 * srcbuf = buffer containing the data to compress
183 * level = compression level. Legal values are -1 .. 9, with -1 indicating
184 * the default level (6), 0 indicating no compression, 1 being the
185 * least compression and 9 being the most.
186 *
187 * Returns:
188 * the compressed data
189 */
190
compress(const (void)[]srcbuf,int level)191 ubyte[] compress(const(void)[] srcbuf, int level)
192 in
193 {
194 assert(-1 <= level && level <= 9);
195 }
196 body
197 {
198 import core.memory : GC;
199 auto destlen = srcbuf.length + ((srcbuf.length + 1023) / 1024) + 12;
200 auto destbuf = new ubyte[destlen];
201 auto err = etc.c.zlib.compress2(destbuf.ptr, &destlen, cast(ubyte *) srcbuf.ptr, srcbuf.length, level);
202 if (err)
203 {
204 GC.free(destbuf.ptr);
205 throw new ZlibException(err);
206 }
207
208 destbuf.length = destlen;
209 return destbuf;
210 }
211
212 /*********************************************
213 * ditto
214 */
215
compress(const (void)[]srcbuf)216 ubyte[] compress(const(void)[] srcbuf)
217 {
218 return compress(srcbuf, Z_DEFAULT_COMPRESSION);
219 }
220
221 /*********************************************
222 * Decompresses the data in srcbuf[].
223 * Params:
224 * srcbuf = buffer containing the compressed data.
225 * destlen = size of the uncompressed data.
226 * It need not be accurate, but the decompression will be faster
227 * if the exact size is supplied.
228 * winbits = the base two logarithm of the maximum window size.
229 * Returns: the decompressed data.
230 */
231
232 void[] uncompress(const(void)[] srcbuf, size_t destlen = 0u, int winbits = 15)
233 {
234 import std.conv : to;
235 int err;
236 ubyte[] destbuf;
237
238 if (!destlen)
239 destlen = srcbuf.length * 2 + 1;
240
241 etc.c.zlib.z_stream zs;
242 zs.next_in = cast(typeof(zs.next_in)) srcbuf.ptr;
243 zs.avail_in = to!uint(srcbuf.length);
244 err = etc.c.zlib.inflateInit2(&zs, winbits);
245 if (err)
246 {
247 throw new ZlibException(err);
248 }
249
250 size_t olddestlen = 0u;
251
252 loop:
253 while (true)
254 {
255 destbuf.length = destlen;
256 zs.next_out = cast(typeof(zs.next_out)) &destbuf[olddestlen];
257 zs.avail_out = to!uint(destlen - olddestlen);
258 olddestlen = destlen;
259
260 err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
261 switch (err)
262 {
263 case Z_OK:
264 destlen = destbuf.length * 2;
265 continue loop;
266
267 case Z_STREAM_END:
268 destbuf.length = zs.total_out;
269 err = etc.c.zlib.inflateEnd(&zs);
270 if (err != Z_OK)
271 throw new ZlibException(err);
272 return destbuf;
273
274 default:
275 etc.c.zlib.inflateEnd(&zs);
276 throw new ZlibException(err);
277 }
278 }
279 assert(0);
280 }
281
282 @system unittest
283 {
284 auto src =
285 "the quick brown fox jumps over the lazy dog\r
286 the quick brown fox jumps over the lazy dog\r
287 ";
288 ubyte[] dst;
289 ubyte[] result;
290
291 //arrayPrint(src);
292 dst = compress(src);
293 //arrayPrint(dst);
294 result = cast(ubyte[]) uncompress(dst);
295 //arrayPrint(result);
296 assert(result == src);
297 }
298
299 @system unittest
300 {
301 ubyte[] src = new ubyte[1000000];
302 ubyte[] dst;
303 ubyte[] result;
304
305 src[] = 0x80;
306 dst = compress(src);
307 assert(dst.length*2 + 1 < src.length);
308 result = cast(ubyte[]) uncompress(dst);
309 assert(result == src);
310 }
311
312 /+
313 void arrayPrint(ubyte[] array)
314 {
315 //printf("array %p,%d\n", cast(void*) array, array.length);
316 for (size_t i = 0; i < array.length; i++)
317 {
318 printf("%02x ", array[i]);
319 if (((i + 1) & 15) == 0)
320 printf("\n");
321 }
322 printf("\n\n");
323 }
324 +/
325
326 /// the header format the compressed stream is wrapped in
327 enum HeaderFormat {
328 deflate, /// a standard zlib header
329 gzip, /// a gzip file format header
330 determineFromData /// used when decompressing. Try to automatically detect the stream format by looking at the data
331 }
332
333 /*********************************************
334 * Used when the data to be compressed is not all in one buffer.
335 */
336
337 class Compress
338 {
339 import std.conv : to;
340
341 private:
342 z_stream zs;
343 int level = Z_DEFAULT_COMPRESSION;
344 int inited;
345 immutable bool gzip;
346
error(int err)347 void error(int err)
348 {
349 if (inited)
350 { deflateEnd(&zs);
351 inited = 0;
352 }
353 throw new ZlibException(err);
354 }
355
356 public:
357
358 /**
359 * Constructor.
360 *
361 * Params:
362 * level = compression level. Legal values are 1 .. 9, with 1 being the least
363 * compression and 9 being the most. The default value is 6.
364 * header = sets the compression type to one of the options available
365 * in $(LREF HeaderFormat). Defaults to HeaderFormat.deflate.
366 *
367 * See_Also:
368 * $(LREF compress), $(LREF HeaderFormat)
369 */
370 this(int level, HeaderFormat header = HeaderFormat.deflate)
371 in
372 {
373 assert(1 <= level && level <= 9);
374 }
375 body
376 {
377 this.level = level;
378 this.gzip = header == HeaderFormat.gzip;
379 }
380
381 /// ditto
382 this(HeaderFormat header = HeaderFormat.deflate)
383 {
384 this.gzip = header == HeaderFormat.gzip;
385 }
386
~this()387 ~this()
388 { int err;
389
390 if (inited)
391 {
392 inited = 0;
393 deflateEnd(&zs);
394 }
395 }
396
397 /**
398 * Compress the data in buf and return the compressed data.
399 * Params:
400 * buf = data to compress
401 *
402 * Returns:
403 * the compressed data. The buffers returned from successive calls to this should be concatenated together.
404 *
405 */
compress(const (void)[]buf)406 const(void)[] compress(const(void)[] buf)
407 {
408 import core.memory : GC;
409 int err;
410 ubyte[] destbuf;
411
412 if (buf.length == 0)
413 return null;
414
415 if (!inited)
416 {
417 err = deflateInit2(&zs, level, Z_DEFLATED, 15 + (gzip ? 16 : 0), 8, Z_DEFAULT_STRATEGY);
418 if (err)
419 error(err);
420 inited = 1;
421 }
422
423 destbuf = new ubyte[zs.avail_in + buf.length];
424 zs.next_out = destbuf.ptr;
425 zs.avail_out = to!uint(destbuf.length);
426
427 if (zs.avail_in)
428 buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
429
430 zs.next_in = cast(typeof(zs.next_in)) buf.ptr;
431 zs.avail_in = to!uint(buf.length);
432
433 err = deflate(&zs, Z_NO_FLUSH);
434 if (err != Z_STREAM_END && err != Z_OK)
435 {
436 GC.free(destbuf.ptr);
437 error(err);
438 }
439 destbuf.length = destbuf.length - zs.avail_out;
440 return destbuf;
441 }
442
443 /***
444 * Compress and return any remaining data.
445 * The returned data should be appended to that returned by compress().
446 * Params:
447 * mode = one of the following:
448 * $(DL
449 $(DT Z_SYNC_FLUSH )
450 $(DD Syncs up flushing to the next byte boundary.
451 Used when more data is to be compressed later on.)
452 $(DT Z_FULL_FLUSH )
453 $(DD Syncs up flushing to the next byte boundary.
454 Used when more data is to be compressed later on,
455 and the decompressor needs to be restartable at this
456 point.)
457 $(DT Z_FINISH)
458 $(DD (default) Used when finished compressing the data. )
459 )
460 */
461 void[] flush(int mode = Z_FINISH)
462 in
463 {
464 assert(mode == Z_FINISH || mode == Z_SYNC_FLUSH || mode == Z_FULL_FLUSH);
465 }
466 body
467 {
468 import core.memory : GC;
469 ubyte[] destbuf;
470 ubyte[512] tmpbuf = void;
471 int err;
472
473 if (!inited)
474 return null;
475
476 /* may be zs.avail_out+<some constant>
477 * zs.avail_out is set nonzero by deflate in previous compress()
478 */
479 //tmpbuf = new void[zs.avail_out];
480 zs.next_out = tmpbuf.ptr;
481 zs.avail_out = tmpbuf.length;
482
483 while ( (err = deflate(&zs, mode)) != Z_STREAM_END)
484 {
485 if (err == Z_OK)
486 {
487 if (zs.avail_out != 0 && mode != Z_FINISH)
488 break;
489 else if (zs.avail_out == 0)
490 {
491 destbuf ~= tmpbuf;
492 zs.next_out = tmpbuf.ptr;
493 zs.avail_out = tmpbuf.length;
494 continue;
495 }
496 err = Z_BUF_ERROR;
497 }
498 GC.free(destbuf.ptr);
499 error(err);
500 }
501 destbuf ~= tmpbuf[0 .. (tmpbuf.length - zs.avail_out)];
502
503 if (mode == Z_FINISH)
504 {
505 err = deflateEnd(&zs);
506 inited = 0;
507 if (err)
508 error(err);
509 }
510 return destbuf;
511 }
512 }
513
514 /******
515 * Used when the data to be decompressed is not all in one buffer.
516 */
517
518 class UnCompress
519 {
520 import std.conv : to;
521
522 private:
523 z_stream zs;
524 int inited;
525 int done;
526 size_t destbufsize;
527
528 HeaderFormat format;
529
error(int err)530 void error(int err)
531 {
532 if (inited)
533 { inflateEnd(&zs);
534 inited = 0;
535 }
536 throw new ZlibException(err);
537 }
538
539 public:
540
541 /**
542 * Construct. destbufsize is the same as for D.zlib.uncompress().
543 */
this(uint destbufsize)544 this(uint destbufsize)
545 {
546 this.destbufsize = destbufsize;
547 }
548
549 /** ditto */
550 this(HeaderFormat format = HeaderFormat.determineFromData)
551 {
552 this.format = format;
553 }
554
~this()555 ~this()
556 { int err;
557
558 if (inited)
559 {
560 inited = 0;
561 inflateEnd(&zs);
562 }
563 done = 1;
564 }
565
566 /**
567 * Decompress the data in buf and return the decompressed data.
568 * The buffers returned from successive calls to this should be concatenated
569 * together.
570 */
uncompress(const (void)[]buf)571 const(void)[] uncompress(const(void)[] buf)
572 in
573 {
574 assert(!done);
575 }
576 body
577 {
578 import core.memory : GC;
579 int err;
580 ubyte[] destbuf;
581
582 if (buf.length == 0)
583 return null;
584
585 if (!inited)
586 {
587 int windowBits = 15;
588 if (format == HeaderFormat.gzip)
589 windowBits += 16;
590 else if (format == HeaderFormat.determineFromData)
591 windowBits += 32;
592
593 err = inflateInit2(&zs, windowBits);
594 if (err)
595 error(err);
596 inited = 1;
597 }
598
599 if (!destbufsize)
600 destbufsize = to!uint(buf.length) * 2;
601 destbuf = new ubyte[zs.avail_in * 2 + destbufsize];
602 zs.next_out = destbuf.ptr;
603 zs.avail_out = to!uint(destbuf.length);
604
605 if (zs.avail_in)
606 buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
607
608 zs.next_in = cast(ubyte*) buf.ptr;
609 zs.avail_in = to!uint(buf.length);
610
611 err = inflate(&zs, Z_NO_FLUSH);
612 if (err != Z_STREAM_END && err != Z_OK)
613 {
614 GC.free(destbuf.ptr);
615 error(err);
616 }
617 destbuf.length = destbuf.length - zs.avail_out;
618 return destbuf;
619 }
620
621 /**
622 * Decompress and return any remaining data.
623 * The returned data should be appended to that returned by uncompress().
624 * The UnCompress object cannot be used further.
625 */
flush()626 void[] flush()
627 in
628 {
629 assert(!done);
630 }
631 out
632 {
633 assert(done);
634 }
635 body
636 {
637 import core.memory : GC;
638 ubyte[] extra;
639 ubyte[] destbuf;
640 int err;
641
642 done = 1;
643 if (!inited)
644 return null;
645
646 L1:
647 destbuf = new ubyte[zs.avail_in * 2 + 100];
648 zs.next_out = destbuf.ptr;
649 zs.avail_out = to!uint(destbuf.length);
650
651 err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
652 if (err == Z_OK && zs.avail_out == 0)
653 {
654 extra ~= destbuf;
655 goto L1;
656 }
657 if (err != Z_STREAM_END)
658 {
659 GC.free(destbuf.ptr);
660 if (err == Z_OK)
661 err = Z_BUF_ERROR;
662 error(err);
663 }
664 destbuf = destbuf.ptr[0 .. zs.next_out - destbuf.ptr];
665 err = etc.c.zlib.inflateEnd(&zs);
666 inited = 0;
667 if (err)
668 error(err);
669 if (extra.length)
670 destbuf = extra ~ destbuf;
671 return destbuf;
672 }
673 }
674
675 /* ========================== unittest ========================= */
676
677 import std.random;
678 import std.stdio;
679
680 @system unittest // by Dave
681 {
682 debug(zlib) writeln("std.zlib.unittest");
683
CompressThenUncompress(void[]src)684 bool CompressThenUncompress (void[] src)
685 {
686 ubyte[] dst = std.zlib.compress(src);
687 double ratio = (dst.length / cast(double) src.length);
688 debug(zlib) writef("src.length: %1$d, dst: %2$d, Ratio = %3$f", src.length, dst.length, ratio);
689 ubyte[] uncompressedBuf;
690 uncompressedBuf = cast(ubyte[]) std.zlib.uncompress(dst);
691 assert(src.length == uncompressedBuf.length);
692 assert(src == uncompressedBuf);
693
694 return true;
695 }
696
697
698 // smallish buffers
699 for (int idx = 0; idx < 25; idx++)
700 {
701 char[] buf = new char[uniform(0, 100)];
702
703 // Alternate between more & less compressible
704 foreach (ref char c; buf)
705 c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 2)));
706
707 if (CompressThenUncompress(buf))
708 {
709 debug(zlib) writeln("; Success.");
710 }
711 else
712 {
713 return;
714 }
715 }
716
717 // larger buffers
718 for (int idx = 0; idx < 25; idx++)
719 {
720 char[] buf = new char[uniform(0, 1000/*0000*/)];
721
722 // Alternate between more & less compressible
723 foreach (ref char c; buf)
724 c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 10)));
725
726 if (CompressThenUncompress(buf))
727 {
728 debug(zlib) writefln("; Success.");
729 }
730 else
731 {
732 return;
733 }
734 }
735
736 debug(zlib) writefln("PASSED std.zlib.unittest");
737 }
738
739
740 @system unittest // by Artem Rebrov
741 {
742 Compress cmp = new Compress;
743 UnCompress decmp = new UnCompress;
744
745 const(void)[] input;
746 input = "tesatdffadf";
747
748 const(void)[] buf = cmp.compress(input);
749 buf ~= cmp.flush();
750 const(void)[] output = decmp.uncompress(buf);
751
752 //writefln("input = '%s'", cast(char[]) input);
753 //writefln("output = '%s'", cast(char[]) output);
754 assert( output[] == input[] );
755 }
756
757 @system unittest
758 {
759 static assert(__traits(compiles, etc.c.zlib.gzclose(null))); // bugzilla 15457
760 }
761