1 // Written in the D programming language.
2 
3 /**
4  * Compress/decompress data using the $(HTTP www._zlib.net, _zlib library).
5  *
6  * Examples:
7  *
8  * If you have a small buffer you can use $(LREF compress) and
9  * $(LREF uncompress) directly.
10  *
11  * -------
12  * import std.zlib;
13  *
14  * auto src =
15  * "the quick brown fox jumps over the lazy dog\r
16  *  the quick brown fox jumps over the lazy dog\r";
17  *
18  * ubyte[] dst;
19  * ubyte[] result;
20  *
21  * dst = compress(src);
22  * result = cast(ubyte[]) uncompress(dst);
23  * assert(result == src);
24  * -------
25  *
26  * When the data to be compressed doesn't fit in one buffer, use
27  * $(LREF Compress) and $(LREF UnCompress).
28  *
29  * -------
30  * import std.zlib;
31  * import std.stdio;
32  * import std.conv : to;
33  * import std.algorithm.iteration : map;
34  *
35  * UnCompress decmp = new UnCompress;
36  * foreach (chunk; stdin.byChunk(4096).map!(x => decmp.uncompress(x)))
37  * {
38  *     chunk.to!string.write;
39  * }
40 
41  * -------
42  *
43  * References:
44  *  $(HTTP en.wikipedia.org/wiki/Zlib, Wikipedia)
45  *
46  * Copyright: Copyright Digital Mars 2000 - 2011.
47  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
48  * Authors:   $(HTTP digitalmars.com, Walter Bright)
49  * Source:    $(PHOBOSSRC std/_zlib.d)
50  */
51 /*          Copyright Digital Mars 2000 - 2011.
52  * Distributed under the Boost Software License, Version 1.0.
53  *    (See accompanying file LICENSE_1_0.txt or copy at
54  *          http://www.boost.org/LICENSE_1_0.txt)
55  */
56 module std.zlib;
57 
58 //debug=zlib;       // uncomment to turn on debugging printf's
59 
60 import etc.c.zlib;
61 
62 // Values for 'mode'
63 
64 enum
65 {
66     Z_NO_FLUSH      = 0,
67     Z_SYNC_FLUSH    = 2,
68     Z_FULL_FLUSH    = 3,
69     Z_FINISH        = 4,
70 }
71 
72 /*************************************
73  * Errors throw a ZlibException.
74  */
75 
76 class ZlibException : Exception
77 {
this(int errnum)78     this(int errnum)
79     {   string msg;
80 
81         switch (errnum)
82         {
83             case Z_STREAM_END:      msg = "stream end"; break;
84             case Z_NEED_DICT:       msg = "need dict"; break;
85             case Z_ERRNO:           msg = "errno"; break;
86             case Z_STREAM_ERROR:    msg = "stream error"; break;
87             case Z_DATA_ERROR:      msg = "data error"; break;
88             case Z_MEM_ERROR:       msg = "mem error"; break;
89             case Z_BUF_ERROR:       msg = "buf error"; break;
90             case Z_VERSION_ERROR:   msg = "version error"; break;
91             default:                msg = "unknown error";  break;
92         }
93         super(msg);
94     }
95 }
96 
97 /**
98  * $(P Compute the Adler-32 checksum of a buffer's worth of data.)
99  *
100  * Params:
101  *     adler = the starting checksum for the computation. Use 1
102  *             for a new checksum. Use the output of this function
103  *             for a cumulative checksum.
104  *     buf = buffer containing input data
105  *
106  * Returns:
107  *     A $(D uint) checksum for the provided input data and starting checksum
108  *
109  * See_Also:
110  *     $(LINK http://en.wikipedia.org/wiki/Adler-32)
111  */
112 
adler32(uint adler,const (void)[]buf)113 uint adler32(uint adler, const(void)[] buf)
114 {
115     import std.range : chunks;
116     foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000))
117     {
118         adler = etc.c.zlib.adler32(adler, chunk.ptr, cast(uint) chunk.length);
119     }
120     return adler;
121 }
122 
123 ///
124 @system unittest
125 {
126     static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
127 
128     uint adler = adler32(0u, data);
129     assert(adler == 0xdc0037);
130 }
131 
132 @system unittest
133 {
134     static string data = "test";
135 
136     uint adler = adler32(1, data);
137     assert(adler == 0x045d01c1);
138 }
139 
140 /**
141  * $(P Compute the CRC32 checksum of a buffer's worth of data.)
142  *
143  * Params:
144  *     crc = the starting checksum for the computation. Use 0
145  *             for a new checksum. Use the output of this function
146  *             for a cumulative checksum.
147  *     buf = buffer containing input data
148  *
149  * Returns:
150  *     A $(D uint) checksum for the provided input data and starting checksum
151  *
152  * See_Also:
153  *     $(LINK http://en.wikipedia.org/wiki/Cyclic_redundancy_check)
154  */
155 
crc32(uint crc,const (void)[]buf)156 uint crc32(uint crc, const(void)[] buf)
157 {
158     import std.range : chunks;
159     foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000))
160     {
161         crc = etc.c.zlib.crc32(crc, chunk.ptr, cast(uint) chunk.length);
162     }
163     return crc;
164 }
165 
166 @system unittest
167 {
168     static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
169 
170     uint crc;
171 
172     debug(zlib) printf("D.zlib.crc32.unittest\n");
173     crc = crc32(0u, cast(void[]) data);
174     debug(zlib) printf("crc = %x\n", crc);
175     assert(crc == 0x2520577b);
176 }
177 
178 /**
179  * $(P Compress data)
180  *
181  * Params:
182  *     srcbuf = buffer containing the data to compress
183  *     level = compression level. Legal values are -1 .. 9, with -1 indicating
184  *             the default level (6), 0 indicating no compression, 1 being the
185  *             least compression and 9 being the most.
186  *
187  * Returns:
188  *     the compressed data
189  */
190 
compress(const (void)[]srcbuf,int level)191 ubyte[] compress(const(void)[] srcbuf, int level)
192 in
193 {
194     assert(-1 <= level && level <= 9);
195 }
196 body
197 {
198     import core.memory : GC;
199     auto destlen = srcbuf.length + ((srcbuf.length + 1023) / 1024) + 12;
200     auto destbuf = new ubyte[destlen];
201     auto err = etc.c.zlib.compress2(destbuf.ptr, &destlen, cast(ubyte *) srcbuf.ptr, srcbuf.length, level);
202     if (err)
203     {
204         GC.free(destbuf.ptr);
205         throw new ZlibException(err);
206     }
207 
208     destbuf.length = destlen;
209     return destbuf;
210 }
211 
212 /*********************************************
213  * ditto
214  */
215 
compress(const (void)[]srcbuf)216 ubyte[] compress(const(void)[] srcbuf)
217 {
218     return compress(srcbuf, Z_DEFAULT_COMPRESSION);
219 }
220 
221 /*********************************************
222  * Decompresses the data in srcbuf[].
223  * Params:
224  *  srcbuf  = buffer containing the compressed data.
225  *  destlen = size of the uncompressed data.
226  *            It need not be accurate, but the decompression will be faster
227  *            if the exact size is supplied.
228  *  winbits = the base two logarithm of the maximum window size.
229  * Returns: the decompressed data.
230  */
231 
232 void[] uncompress(const(void)[] srcbuf, size_t destlen = 0u, int winbits = 15)
233 {
234     import std.conv : to;
235     int err;
236     ubyte[] destbuf;
237 
238     if (!destlen)
239         destlen = srcbuf.length * 2 + 1;
240 
241     etc.c.zlib.z_stream zs;
242     zs.next_in = cast(typeof(zs.next_in)) srcbuf.ptr;
243     zs.avail_in = to!uint(srcbuf.length);
244     err = etc.c.zlib.inflateInit2(&zs, winbits);
245     if (err)
246     {
247         throw new ZlibException(err);
248     }
249 
250     size_t olddestlen = 0u;
251 
252     loop:
253     while (true)
254     {
255         destbuf.length = destlen;
256         zs.next_out = cast(typeof(zs.next_out)) &destbuf[olddestlen];
257         zs.avail_out = to!uint(destlen - olddestlen);
258         olddestlen = destlen;
259 
260         err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
261         switch (err)
262         {
263             case Z_OK:
264                 destlen = destbuf.length * 2;
265                 continue loop;
266 
267             case Z_STREAM_END:
268                 destbuf.length = zs.total_out;
269                 err = etc.c.zlib.inflateEnd(&zs);
270                 if (err != Z_OK)
271                     throw new ZlibException(err);
272                 return destbuf;
273 
274             default:
275                 etc.c.zlib.inflateEnd(&zs);
276                 throw new ZlibException(err);
277         }
278     }
279     assert(0);
280 }
281 
282 @system unittest
283 {
284     auto src =
285 "the quick brown fox jumps over the lazy dog\r
286 the quick brown fox jumps over the lazy dog\r
287 ";
288     ubyte[] dst;
289     ubyte[] result;
290 
291     //arrayPrint(src);
292     dst = compress(src);
293     //arrayPrint(dst);
294     result = cast(ubyte[]) uncompress(dst);
295     //arrayPrint(result);
296     assert(result == src);
297 }
298 
299 @system unittest
300 {
301     ubyte[] src = new ubyte[1000000];
302     ubyte[] dst;
303     ubyte[] result;
304 
305     src[] = 0x80;
306     dst = compress(src);
307     assert(dst.length*2 + 1 < src.length);
308     result = cast(ubyte[]) uncompress(dst);
309     assert(result == src);
310 }
311 
312 /+
313 void arrayPrint(ubyte[] array)
314 {
315     //printf("array %p,%d\n", cast(void*) array, array.length);
316     for (size_t i = 0; i < array.length; i++)
317     {
318         printf("%02x ", array[i]);
319         if (((i + 1) & 15) == 0)
320             printf("\n");
321     }
322     printf("\n\n");
323 }
324 +/
325 
326 /// the header format the compressed stream is wrapped in
327 enum HeaderFormat {
328     deflate, /// a standard zlib header
329     gzip, /// a gzip file format header
330     determineFromData /// used when decompressing. Try to automatically detect the stream format by looking at the data
331 }
332 
333 /*********************************************
334  * Used when the data to be compressed is not all in one buffer.
335  */
336 
337 class Compress
338 {
339     import std.conv : to;
340 
341   private:
342     z_stream zs;
343     int level = Z_DEFAULT_COMPRESSION;
344     int inited;
345     immutable bool gzip;
346 
error(int err)347     void error(int err)
348     {
349         if (inited)
350         {   deflateEnd(&zs);
351             inited = 0;
352         }
353         throw new ZlibException(err);
354     }
355 
356   public:
357 
358     /**
359      * Constructor.
360      *
361      * Params:
362      *    level = compression level. Legal values are 1 .. 9, with 1 being the least
363      *            compression and 9 being the most. The default value is 6.
364      *    header = sets the compression type to one of the options available
365      *             in $(LREF HeaderFormat). Defaults to HeaderFormat.deflate.
366      *
367      * See_Also:
368      *    $(LREF compress), $(LREF HeaderFormat)
369      */
370     this(int level, HeaderFormat header = HeaderFormat.deflate)
371     in
372     {
373         assert(1 <= level && level <= 9);
374     }
375     body
376     {
377         this.level = level;
378         this.gzip = header == HeaderFormat.gzip;
379     }
380 
381     /// ditto
382     this(HeaderFormat header = HeaderFormat.deflate)
383     {
384         this.gzip = header == HeaderFormat.gzip;
385     }
386 
~this()387     ~this()
388     {   int err;
389 
390         if (inited)
391         {
392             inited = 0;
393             deflateEnd(&zs);
394         }
395     }
396 
397     /**
398      * Compress the data in buf and return the compressed data.
399      * Params:
400      *    buf = data to compress
401      *
402      * Returns:
403      *    the compressed data. The buffers returned from successive calls to this should be concatenated together.
404      *
405      */
compress(const (void)[]buf)406     const(void)[] compress(const(void)[] buf)
407     {
408         import core.memory : GC;
409         int err;
410         ubyte[] destbuf;
411 
412         if (buf.length == 0)
413             return null;
414 
415         if (!inited)
416         {
417             err = deflateInit2(&zs, level, Z_DEFLATED, 15 + (gzip ? 16 : 0), 8, Z_DEFAULT_STRATEGY);
418             if (err)
419                 error(err);
420             inited = 1;
421         }
422 
423         destbuf = new ubyte[zs.avail_in + buf.length];
424         zs.next_out = destbuf.ptr;
425         zs.avail_out = to!uint(destbuf.length);
426 
427         if (zs.avail_in)
428             buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
429 
430         zs.next_in = cast(typeof(zs.next_in)) buf.ptr;
431         zs.avail_in = to!uint(buf.length);
432 
433         err = deflate(&zs, Z_NO_FLUSH);
434         if (err != Z_STREAM_END && err != Z_OK)
435         {
436             GC.free(destbuf.ptr);
437             error(err);
438         }
439         destbuf.length = destbuf.length - zs.avail_out;
440         return destbuf;
441     }
442 
443     /***
444      * Compress and return any remaining data.
445      * The returned data should be appended to that returned by compress().
446      * Params:
447      *  mode = one of the following:
448      *          $(DL
449                     $(DT Z_SYNC_FLUSH )
450                     $(DD Syncs up flushing to the next byte boundary.
451                         Used when more data is to be compressed later on.)
452                     $(DT Z_FULL_FLUSH )
453                     $(DD Syncs up flushing to the next byte boundary.
454                         Used when more data is to be compressed later on,
455                         and the decompressor needs to be restartable at this
456                         point.)
457                     $(DT Z_FINISH)
458                     $(DD (default) Used when finished compressing the data. )
459                 )
460      */
461     void[] flush(int mode = Z_FINISH)
462     in
463     {
464         assert(mode == Z_FINISH || mode == Z_SYNC_FLUSH || mode == Z_FULL_FLUSH);
465     }
466     body
467     {
468         import core.memory : GC;
469         ubyte[] destbuf;
470         ubyte[512] tmpbuf = void;
471         int err;
472 
473         if (!inited)
474             return null;
475 
476         /* may be  zs.avail_out+<some constant>
477          * zs.avail_out is set nonzero by deflate in previous compress()
478          */
479         //tmpbuf = new void[zs.avail_out];
480         zs.next_out = tmpbuf.ptr;
481         zs.avail_out = tmpbuf.length;
482 
483         while ( (err = deflate(&zs, mode)) != Z_STREAM_END)
484         {
485             if (err == Z_OK)
486             {
487                 if (zs.avail_out != 0 && mode != Z_FINISH)
488                     break;
489                 else if (zs.avail_out == 0)
490                 {
491                     destbuf ~= tmpbuf;
492                     zs.next_out = tmpbuf.ptr;
493                     zs.avail_out = tmpbuf.length;
494                     continue;
495                 }
496                 err = Z_BUF_ERROR;
497             }
498             GC.free(destbuf.ptr);
499             error(err);
500         }
501         destbuf ~= tmpbuf[0 .. (tmpbuf.length - zs.avail_out)];
502 
503         if (mode == Z_FINISH)
504         {
505             err = deflateEnd(&zs);
506             inited = 0;
507             if (err)
508                 error(err);
509         }
510         return destbuf;
511     }
512 }
513 
514 /******
515  * Used when the data to be decompressed is not all in one buffer.
516  */
517 
518 class UnCompress
519 {
520     import std.conv : to;
521 
522   private:
523     z_stream zs;
524     int inited;
525     int done;
526     size_t destbufsize;
527 
528     HeaderFormat format;
529 
error(int err)530     void error(int err)
531     {
532         if (inited)
533         {   inflateEnd(&zs);
534             inited = 0;
535         }
536         throw new ZlibException(err);
537     }
538 
539   public:
540 
541     /**
542      * Construct. destbufsize is the same as for D.zlib.uncompress().
543      */
this(uint destbufsize)544     this(uint destbufsize)
545     {
546         this.destbufsize = destbufsize;
547     }
548 
549     /** ditto */
550     this(HeaderFormat format = HeaderFormat.determineFromData)
551     {
552         this.format = format;
553     }
554 
~this()555     ~this()
556     {   int err;
557 
558         if (inited)
559         {
560             inited = 0;
561             inflateEnd(&zs);
562         }
563         done = 1;
564     }
565 
566     /**
567      * Decompress the data in buf and return the decompressed data.
568      * The buffers returned from successive calls to this should be concatenated
569      * together.
570      */
uncompress(const (void)[]buf)571     const(void)[] uncompress(const(void)[] buf)
572     in
573     {
574         assert(!done);
575     }
576     body
577     {
578         import core.memory : GC;
579         int err;
580         ubyte[] destbuf;
581 
582         if (buf.length == 0)
583             return null;
584 
585         if (!inited)
586         {
587         int windowBits = 15;
588         if (format == HeaderFormat.gzip)
589             windowBits += 16;
590             else if (format == HeaderFormat.determineFromData)
591             windowBits += 32;
592 
593             err = inflateInit2(&zs, windowBits);
594             if (err)
595                 error(err);
596             inited = 1;
597         }
598 
599         if (!destbufsize)
600             destbufsize = to!uint(buf.length) * 2;
601         destbuf = new ubyte[zs.avail_in * 2 + destbufsize];
602         zs.next_out = destbuf.ptr;
603         zs.avail_out = to!uint(destbuf.length);
604 
605         if (zs.avail_in)
606             buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
607 
608         zs.next_in = cast(ubyte*) buf.ptr;
609         zs.avail_in = to!uint(buf.length);
610 
611         err = inflate(&zs, Z_NO_FLUSH);
612         if (err != Z_STREAM_END && err != Z_OK)
613         {
614             GC.free(destbuf.ptr);
615             error(err);
616         }
617         destbuf.length = destbuf.length - zs.avail_out;
618         return destbuf;
619     }
620 
621     /**
622      * Decompress and return any remaining data.
623      * The returned data should be appended to that returned by uncompress().
624      * The UnCompress object cannot be used further.
625      */
flush()626     void[] flush()
627     in
628     {
629         assert(!done);
630     }
631     out
632     {
633         assert(done);
634     }
635     body
636     {
637         import core.memory : GC;
638         ubyte[] extra;
639         ubyte[] destbuf;
640         int err;
641 
642         done = 1;
643         if (!inited)
644             return null;
645 
646       L1:
647         destbuf = new ubyte[zs.avail_in * 2 + 100];
648         zs.next_out = destbuf.ptr;
649         zs.avail_out = to!uint(destbuf.length);
650 
651         err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
652         if (err == Z_OK && zs.avail_out == 0)
653         {
654             extra ~= destbuf;
655             goto L1;
656         }
657         if (err != Z_STREAM_END)
658         {
659             GC.free(destbuf.ptr);
660             if (err == Z_OK)
661                 err = Z_BUF_ERROR;
662             error(err);
663         }
664         destbuf = destbuf.ptr[0 .. zs.next_out - destbuf.ptr];
665         err = etc.c.zlib.inflateEnd(&zs);
666         inited = 0;
667         if (err)
668             error(err);
669         if (extra.length)
670             destbuf = extra ~ destbuf;
671         return destbuf;
672     }
673 }
674 
675 /* ========================== unittest ========================= */
676 
677 import std.random;
678 import std.stdio;
679 
680 @system unittest // by Dave
681 {
682     debug(zlib) writeln("std.zlib.unittest");
683 
CompressThenUncompress(void[]src)684     bool CompressThenUncompress (void[] src)
685     {
686         ubyte[] dst = std.zlib.compress(src);
687         double ratio = (dst.length / cast(double) src.length);
688         debug(zlib) writef("src.length: %1$d, dst: %2$d, Ratio = %3$f", src.length, dst.length, ratio);
689         ubyte[] uncompressedBuf;
690         uncompressedBuf = cast(ubyte[]) std.zlib.uncompress(dst);
691         assert(src.length == uncompressedBuf.length);
692         assert(src == uncompressedBuf);
693 
694         return true;
695     }
696 
697 
698     // smallish buffers
699     for (int idx = 0; idx < 25; idx++)
700     {
701         char[] buf = new char[uniform(0, 100)];
702 
703         // Alternate between more & less compressible
704         foreach (ref char c; buf)
705             c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 2)));
706 
707         if (CompressThenUncompress(buf))
708         {
709             debug(zlib) writeln("; Success.");
710         }
711         else
712         {
713             return;
714         }
715     }
716 
717     // larger buffers
718     for (int idx = 0; idx < 25; idx++)
719     {
720         char[] buf = new char[uniform(0, 1000/*0000*/)];
721 
722         // Alternate between more & less compressible
723         foreach (ref char c; buf)
724             c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 10)));
725 
726         if (CompressThenUncompress(buf))
727         {
728             debug(zlib) writefln("; Success.");
729         }
730         else
731         {
732             return;
733         }
734     }
735 
736     debug(zlib) writefln("PASSED std.zlib.unittest");
737 }
738 
739 
740 @system unittest // by Artem Rebrov
741 {
742     Compress cmp = new Compress;
743     UnCompress decmp = new UnCompress;
744 
745     const(void)[] input;
746     input = "tesatdffadf";
747 
748     const(void)[] buf = cmp.compress(input);
749     buf ~= cmp.flush();
750     const(void)[] output = decmp.uncompress(buf);
751 
752     //writefln("input = '%s'", cast(char[]) input);
753     //writefln("output = '%s'", cast(char[]) output);
754     assert( output[] == input[] );
755 }
756 
757 @system unittest
758 {
759     static assert(__traits(compiles, etc.c.zlib.gzclose(null)));        // bugzilla 15457
760 }
761