1 // Written in the D programming language.
3 /**
4  * Compress/decompress data using the $(HTTP www._zlib.net, _zlib library).
5  *
6  * Examples:
7  *
8  * If you have a small buffer you can use $(LREF compress) and
9  * $(LREF uncompress) directly.
10  *
11  * -------
12  * import std.zlib;
13  *
14  * auto src =
15  * "the quick brown fox jumps over the lazy dog\r
16  *  the quick brown fox jumps over the lazy dog\r";
17  *
18  * ubyte[] dst;
19  * ubyte[] result;
20  *
21  * dst = compress(src);
22  * result = cast(ubyte[]) uncompress(dst);
23  * assert(result == src);
24  * -------
25  *
26  * When the data to be compressed doesn't fit in one buffer, use
27  * $(LREF Compress) and $(LREF UnCompress).
28  *
29  * -------
30  * import std.zlib;
31  * import std.stdio;
32  * import std.conv : to;
33  * import std.algorithm.iteration : map;
34  *
35  * UnCompress decmp = new UnCompress;
36  * foreach (chunk; stdin.byChunk(4096).map!(x => decmp.uncompress(x)))
37  * {
38  *     chunk.to!string.write;
39  * }
41  * -------
42  *
43  * References:
44  *  $(HTTP en.wikipedia.org/wiki/Zlib, Wikipedia)
45  *
46  * Copyright: Copyright Digital Mars 2000 - 2011.
47  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
48  * Authors:   $(HTTP digitalmars.com, Walter Bright)
49  * Source:    $(PHOBOSSRC std/_zlib.d)
50  */
51 /*          Copyright Digital Mars 2000 - 2011.
52  * Distributed under the Boost Software License, Version 1.0.
53  *    (See accompanying file LICENSE_1_0.txt or copy at
54  *          http://www.boost.org/LICENSE_1_0.txt)
55  */
56 module std.zlib;
58 //debug=zlib;       // uncomment to turn on debugging printf's
60 import etc.c.zlib;
62 // Values for 'mode'
64 enum
65 {
66     Z_NO_FLUSH      = 0,
67     Z_SYNC_FLUSH    = 2,
68     Z_FULL_FLUSH    = 3,
69     Z_FINISH        = 4,
70 }
72 /*************************************
73  * Errors throw a ZlibException.
74  */
76 class ZlibException : Exception
77 {
this(int errnum)78     this(int errnum)
79     {   string msg;
81         switch (errnum)
82         {
83             case Z_STREAM_END:      msg = "stream end"; break;
84             case Z_NEED_DICT:       msg = "need dict"; break;
85             case Z_ERRNO:           msg = "errno"; break;
86             case Z_STREAM_ERROR:    msg = "stream error"; break;
87             case Z_DATA_ERROR:      msg = "data error"; break;
88             case Z_MEM_ERROR:       msg = "mem error"; break;
89             case Z_BUF_ERROR:       msg = "buf error"; break;
90             case Z_VERSION_ERROR:   msg = "version error"; break;
91             default:                msg = "unknown error";  break;
92         }
93         super(msg);
94     }
95 }
97 /**
98  * $(P Compute the Adler-32 checksum of a buffer's worth of data.)
99  *
100  * Params:
101  *     adler = the starting checksum for the computation. Use 1
102  *             for a new checksum. Use the output of this function
103  *             for a cumulative checksum.
104  *     buf = buffer containing input data
105  *
106  * Returns:
107  *     A $(D uint) checksum for the provided input data and starting checksum
108  *
109  * See_Also:
110  *     $(LINK http://en.wikipedia.org/wiki/Adler-32)
111  */
adler32(uint adler,const (void)[]buf)113 uint adler32(uint adler, const(void)[] buf)
114 {
115     import std.range : chunks;
116     foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000))
117     {
118         adler = etc.c.zlib.adler32(adler, chunk.ptr, cast(uint) chunk.length);
119     }
120     return adler;
121 }
123 ///
124 @system unittest
125 {
126     static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
128     uint adler = adler32(0u, data);
129     assert(adler == 0xdc0037);
130 }
132 @system unittest
133 {
134     static string data = "test";
136     uint adler = adler32(1, data);
137     assert(adler == 0x045d01c1);
138 }
140 /**
141  * $(P Compute the CRC32 checksum of a buffer's worth of data.)
142  *
143  * Params:
144  *     crc = the starting checksum for the computation. Use 0
145  *             for a new checksum. Use the output of this function
146  *             for a cumulative checksum.
147  *     buf = buffer containing input data
148  *
149  * Returns:
150  *     A $(D uint) checksum for the provided input data and starting checksum
151  *
152  * See_Also:
153  *     $(LINK http://en.wikipedia.org/wiki/Cyclic_redundancy_check)
154  */
crc32(uint crc,const (void)[]buf)156 uint crc32(uint crc, const(void)[] buf)
157 {
158     import std.range : chunks;
159     foreach (chunk; (cast(ubyte[]) buf).chunks(0xFFFF0000))
160     {
161         crc = etc.c.zlib.crc32(crc, chunk.ptr, cast(uint) chunk.length);
162     }
163     return crc;
164 }
166 @system unittest
167 {
168     static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
170     uint crc;
172     debug(zlib) printf("D.zlib.crc32.unittest\n");
173     crc = crc32(0u, cast(void[]) data);
174     debug(zlib) printf("crc = %x\n", crc);
175     assert(crc == 0x2520577b);
176 }
178 /**
179  * $(P Compress data)
180  *
181  * Params:
182  *     srcbuf = buffer containing the data to compress
183  *     level = compression level. Legal values are -1 .. 9, with -1 indicating
184  *             the default level (6), 0 indicating no compression, 1 being the
185  *             least compression and 9 being the most.
186  *
187  * Returns:
188  *     the compressed data
189  */
compress(const (void)[]srcbuf,int level)191 ubyte[] compress(const(void)[] srcbuf, int level)
192 in
193 {
194     assert(-1 <= level && level <= 9);
195 }
196 body
197 {
198     import core.memory : GC;
199     auto destlen = srcbuf.length + ((srcbuf.length + 1023) / 1024) + 12;
200     auto destbuf = new ubyte[destlen];
201     auto err = etc.c.zlib.compress2(destbuf.ptr, &destlen, cast(ubyte *) srcbuf.ptr, srcbuf.length, level);
202     if (err)
203     {
204         GC.free(destbuf.ptr);
205         throw new ZlibException(err);
206     }
208     destbuf.length = destlen;
209     return destbuf;
210 }
212 /*********************************************
213  * ditto
214  */
compress(const (void)[]srcbuf)216 ubyte[] compress(const(void)[] srcbuf)
217 {
218     return compress(srcbuf, Z_DEFAULT_COMPRESSION);
219 }
221 /*********************************************
222  * Decompresses the data in srcbuf[].
223  * Params:
224  *  srcbuf  = buffer containing the compressed data.
225  *  destlen = size of the uncompressed data.
226  *            It need not be accurate, but the decompression will be faster
227  *            if the exact size is supplied.
228  *  winbits = the base two logarithm of the maximum window size.
229  * Returns: the decompressed data.
230  */
232 void[] uncompress(const(void)[] srcbuf, size_t destlen = 0u, int winbits = 15)
233 {
234     import std.conv : to;
235     int err;
236     ubyte[] destbuf;
238     if (!destlen)
239         destlen = srcbuf.length * 2 + 1;
241     etc.c.zlib.z_stream zs;
242     zs.next_in = cast(typeof(zs.next_in)) srcbuf.ptr;
243     zs.avail_in = to!uint(srcbuf.length);
244     err = etc.c.zlib.inflateInit2(&zs, winbits);
245     if (err)
246     {
247         throw new ZlibException(err);
248     }
250     size_t olddestlen = 0u;
252     loop:
253     while (true)
254     {
255         destbuf.length = destlen;
256         zs.next_out = cast(typeof(zs.next_out)) &destbuf[olddestlen];
257         zs.avail_out = to!uint(destlen - olddestlen);
258         olddestlen = destlen;
260         err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
261         switch (err)
262         {
263             case Z_OK:
264                 destlen = destbuf.length * 2;
265                 continue loop;
267             case Z_STREAM_END:
268                 destbuf.length = zs.total_out;
269                 err = etc.c.zlib.inflateEnd(&zs);
270                 if (err != Z_OK)
271                     throw new ZlibException(err);
272                 return destbuf;
274             default:
275                 etc.c.zlib.inflateEnd(&zs);
276                 throw new ZlibException(err);
277         }
278     }
279     assert(0);
280 }
282 @system unittest
283 {
284     auto src =
285 "the quick brown fox jumps over the lazy dog\r
286 the quick brown fox jumps over the lazy dog\r
287 ";
288     ubyte[] dst;
289     ubyte[] result;
291     //arrayPrint(src);
292     dst = compress(src);
293     //arrayPrint(dst);
294     result = cast(ubyte[]) uncompress(dst);
295     //arrayPrint(result);
296     assert(result == src);
297 }
299 @system unittest
300 {
301     ubyte[] src = new ubyte[1000000];
302     ubyte[] dst;
303     ubyte[] result;
305     src[] = 0x80;
306     dst = compress(src);
307     assert(dst.length*2 + 1 < src.length);
308     result = cast(ubyte[]) uncompress(dst);
309     assert(result == src);
310 }
312 /+
313 void arrayPrint(ubyte[] array)
314 {
315     //printf("array %p,%d\n", cast(void*) array, array.length);
316     for (size_t i = 0; i < array.length; i++)
317     {
318         printf("%02x ", array[i]);
319         if (((i + 1) & 15) == 0)
320             printf("\n");
321     }
322     printf("\n\n");
323 }
324 +/
326 /// the header format the compressed stream is wrapped in
327 enum HeaderFormat {
328     deflate, /// a standard zlib header
329     gzip, /// a gzip file format header
330     determineFromData /// used when decompressing. Try to automatically detect the stream format by looking at the data
331 }
333 /*********************************************
334  * Used when the data to be compressed is not all in one buffer.
335  */
337 class Compress
338 {
339     import std.conv : to;
341   private:
342     z_stream zs;
343     int level = Z_DEFAULT_COMPRESSION;
344     int inited;
345     immutable bool gzip;
error(int err)347     void error(int err)
348     {
349         if (inited)
350         {   deflateEnd(&zs);
351             inited = 0;
352         }
353         throw new ZlibException(err);
354     }
356   public:
358     /**
359      * Constructor.
360      *
361      * Params:
362      *    level = compression level. Legal values are 1 .. 9, with 1 being the least
363      *            compression and 9 being the most. The default value is 6.
364      *    header = sets the compression type to one of the options available
365      *             in $(LREF HeaderFormat). Defaults to HeaderFormat.deflate.
366      *
367      * See_Also:
368      *    $(LREF compress), $(LREF HeaderFormat)
369      */
370     this(int level, HeaderFormat header = HeaderFormat.deflate)
371     in
372     {
373         assert(1 <= level && level <= 9);
374     }
375     body
376     {
377         this.level = level;
378         this.gzip = header == HeaderFormat.gzip;
379     }
381     /// ditto
382     this(HeaderFormat header = HeaderFormat.deflate)
383     {
384         this.gzip = header == HeaderFormat.gzip;
385     }
~this()387     ~this()
388     {   int err;
390         if (inited)
391         {
392             inited = 0;
393             deflateEnd(&zs);
394         }
395     }
397     /**
398      * Compress the data in buf and return the compressed data.
399      * Params:
400      *    buf = data to compress
401      *
402      * Returns:
403      *    the compressed data. The buffers returned from successive calls to this should be concatenated together.
404      *
405      */
compress(const (void)[]buf)406     const(void)[] compress(const(void)[] buf)
407     {
408         import core.memory : GC;
409         int err;
410         ubyte[] destbuf;
412         if (buf.length == 0)
413             return null;
415         if (!inited)
416         {
417             err = deflateInit2(&zs, level, Z_DEFLATED, 15 + (gzip ? 16 : 0), 8, Z_DEFAULT_STRATEGY);
418             if (err)
419                 error(err);
420             inited = 1;
421         }
423         destbuf = new ubyte[zs.avail_in + buf.length];
424         zs.next_out = destbuf.ptr;
425         zs.avail_out = to!uint(destbuf.length);
427         if (zs.avail_in)
428             buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
430         zs.next_in = cast(typeof(zs.next_in)) buf.ptr;
431         zs.avail_in = to!uint(buf.length);
433         err = deflate(&zs, Z_NO_FLUSH);
434         if (err != Z_STREAM_END && err != Z_OK)
435         {
436             GC.free(destbuf.ptr);
437             error(err);
438         }
439         destbuf.length = destbuf.length - zs.avail_out;
440         return destbuf;
441     }
443     /***
444      * Compress and return any remaining data.
445      * The returned data should be appended to that returned by compress().
446      * Params:
447      *  mode = one of the following:
448      *          $(DL
449                     $(DT Z_SYNC_FLUSH )
450                     $(DD Syncs up flushing to the next byte boundary.
451                         Used when more data is to be compressed later on.)
452                     $(DT Z_FULL_FLUSH )
453                     $(DD Syncs up flushing to the next byte boundary.
454                         Used when more data is to be compressed later on,
455                         and the decompressor needs to be restartable at this
456                         point.)
457                     $(DT Z_FINISH)
458                     $(DD (default) Used when finished compressing the data. )
459                 )
460      */
461     void[] flush(int mode = Z_FINISH)
462     in
463     {
464         assert(mode == Z_FINISH || mode == Z_SYNC_FLUSH || mode == Z_FULL_FLUSH);
465     }
466     body
467     {
468         import core.memory : GC;
469         ubyte[] destbuf;
470         ubyte[512] tmpbuf = void;
471         int err;
473         if (!inited)
474             return null;
476         /* may be  zs.avail_out+<some constant>
477          * zs.avail_out is set nonzero by deflate in previous compress()
478          */
479         //tmpbuf = new void[zs.avail_out];
480         zs.next_out = tmpbuf.ptr;
481         zs.avail_out = tmpbuf.length;
483         while ( (err = deflate(&zs, mode)) != Z_STREAM_END)
484         {
485             if (err == Z_OK)
486             {
487                 if (zs.avail_out != 0 && mode != Z_FINISH)
488                     break;
489                 else if (zs.avail_out == 0)
490                 {
491                     destbuf ~= tmpbuf;
492                     zs.next_out = tmpbuf.ptr;
493                     zs.avail_out = tmpbuf.length;
494                     continue;
495                 }
496                 err = Z_BUF_ERROR;
497             }
498             GC.free(destbuf.ptr);
499             error(err);
500         }
501         destbuf ~= tmpbuf[0 .. (tmpbuf.length - zs.avail_out)];
503         if (mode == Z_FINISH)
504         {
505             err = deflateEnd(&zs);
506             inited = 0;
507             if (err)
508                 error(err);
509         }
510         return destbuf;
511     }
512 }
514 /******
515  * Used when the data to be decompressed is not all in one buffer.
516  */
518 class UnCompress
519 {
520     import std.conv : to;
522   private:
523     z_stream zs;
524     int inited;
525     int done;
526     size_t destbufsize;
528     HeaderFormat format;
error(int err)530     void error(int err)
531     {
532         if (inited)
533         {   inflateEnd(&zs);
534             inited = 0;
535         }
536         throw new ZlibException(err);
537     }
539   public:
541     /**
542      * Construct. destbufsize is the same as for D.zlib.uncompress().
543      */
this(uint destbufsize)544     this(uint destbufsize)
545     {
546         this.destbufsize = destbufsize;
547     }
549     /** ditto */
550     this(HeaderFormat format = HeaderFormat.determineFromData)
551     {
552         this.format = format;
553     }
~this()555     ~this()
556     {   int err;
558         if (inited)
559         {
560             inited = 0;
561             inflateEnd(&zs);
562         }
563         done = 1;
564     }
566     /**
567      * Decompress the data in buf and return the decompressed data.
568      * The buffers returned from successive calls to this should be concatenated
569      * together.
570      */
uncompress(const (void)[]buf)571     const(void)[] uncompress(const(void)[] buf)
572     in
573     {
574         assert(!done);
575     }
576     body
577     {
578         import core.memory : GC;
579         int err;
580         ubyte[] destbuf;
582         if (buf.length == 0)
583             return null;
585         if (!inited)
586         {
587         int windowBits = 15;
588         if (format == HeaderFormat.gzip)
589             windowBits += 16;
590             else if (format == HeaderFormat.determineFromData)
591             windowBits += 32;
593             err = inflateInit2(&zs, windowBits);
594             if (err)
595                 error(err);
596             inited = 1;
597         }
599         if (!destbufsize)
600             destbufsize = to!uint(buf.length) * 2;
601         destbuf = new ubyte[zs.avail_in * 2 + destbufsize];
602         zs.next_out = destbuf.ptr;
603         zs.avail_out = to!uint(destbuf.length);
605         if (zs.avail_in)
606             buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
608         zs.next_in = cast(ubyte*) buf.ptr;
609         zs.avail_in = to!uint(buf.length);
611         err = inflate(&zs, Z_NO_FLUSH);
612         if (err != Z_STREAM_END && err != Z_OK)
613         {
614             GC.free(destbuf.ptr);
615             error(err);
616         }
617         destbuf.length = destbuf.length - zs.avail_out;
618         return destbuf;
619     }
621     /**
622      * Decompress and return any remaining data.
623      * The returned data should be appended to that returned by uncompress().
624      * The UnCompress object cannot be used further.
625      */
flush()626     void[] flush()
627     in
628     {
629         assert(!done);
630     }
631     out
632     {
633         assert(done);
634     }
635     body
636     {
637         import core.memory : GC;
638         ubyte[] extra;
639         ubyte[] destbuf;
640         int err;
642         done = 1;
643         if (!inited)
644             return null;
646       L1:
647         destbuf = new ubyte[zs.avail_in * 2 + 100];
648         zs.next_out = destbuf.ptr;
649         zs.avail_out = to!uint(destbuf.length);
651         err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
652         if (err == Z_OK && zs.avail_out == 0)
653         {
654             extra ~= destbuf;
655             goto L1;
656         }
657         if (err != Z_STREAM_END)
658         {
659             GC.free(destbuf.ptr);
660             if (err == Z_OK)
661                 err = Z_BUF_ERROR;
662             error(err);
663         }
664         destbuf = destbuf.ptr[0 .. zs.next_out - destbuf.ptr];
665         err = etc.c.zlib.inflateEnd(&zs);
666         inited = 0;
667         if (err)
668             error(err);
669         if (extra.length)
670             destbuf = extra ~ destbuf;
671         return destbuf;
672     }
673 }
675 /* ========================== unittest ========================= */
677 import std.random;
678 import std.stdio;
680 @system unittest // by Dave
681 {
682     debug(zlib) writeln("std.zlib.unittest");
CompressThenUncompress(void[]src)684     bool CompressThenUncompress (void[] src)
685     {
686         ubyte[] dst = std.zlib.compress(src);
687         double ratio = (dst.length / cast(double) src.length);
688         debug(zlib) writef("src.length: %1$d, dst: %2$d, Ratio = %3$f", src.length, dst.length, ratio);
689         ubyte[] uncompressedBuf;
690         uncompressedBuf = cast(ubyte[]) std.zlib.uncompress(dst);
691         assert(src.length == uncompressedBuf.length);
692         assert(src == uncompressedBuf);
694         return true;
695     }
698     // smallish buffers
699     for (int idx = 0; idx < 25; idx++)
700     {
701         char[] buf = new char[uniform(0, 100)];
703         // Alternate between more & less compressible
704         foreach (ref char c; buf)
705             c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 2)));
707         if (CompressThenUncompress(buf))
708         {
709             debug(zlib) writeln("; Success.");
710         }
711         else
712         {
713             return;
714         }
715     }
717     // larger buffers
718     for (int idx = 0; idx < 25; idx++)
719     {
720         char[] buf = new char[uniform(0, 1000/*0000*/)];
722         // Alternate between more & less compressible
723         foreach (ref char c; buf)
724             c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 10)));
726         if (CompressThenUncompress(buf))
727         {
728             debug(zlib) writefln("; Success.");
729         }
730         else
731         {
732             return;
733         }
734     }
736     debug(zlib) writefln("PASSED std.zlib.unittest");
737 }
740 @system unittest // by Artem Rebrov
741 {
742     Compress cmp = new Compress;
743     UnCompress decmp = new UnCompress;
745     const(void)[] input;
746     input = "tesatdffadf";
748     const(void)[] buf = cmp.compress(input);
749     buf ~= cmp.flush();
750     const(void)[] output = decmp.uncompress(buf);
752     //writefln("input = '%s'", cast(char[]) input);
753     //writefln("output = '%s'", cast(char[]) output);
754     assert( output[] == input[] );
755 }
757 @system unittest
758 {
759     static assert(__traits(compiles, etc.c.zlib.gzclose(null)));        // bugzilla 15457
760 }