xref: /minix/common/dist/zlib/examples/fitblk.c (revision ebfedea0)
1 /*	$NetBSD: fitblk.c,v 1.1.1.1 2006/01/14 20:11:08 christos Exp $	*/
2 
3 /* fitblk.c: example of fitting compressed output to a specified size
4    Not copyrighted -- provided to the public domain
5    Version 1.1  25 November 2004  Mark Adler */
6 
7 /* Version history:
8    1.0  24 Nov 2004  First version
9    1.1  25 Nov 2004  Change deflateInit2() to deflateInit()
10                      Use fixed-size, stack-allocated raw buffers
11                      Simplify code moving compression to subroutines
12                      Use assert() for internal errors
13                      Add detailed description of approach
14  */
15 
16 /* Approach to just fitting a requested compressed size:
17 
18    fitblk performs three compression passes on a portion of the input
19    data in order to determine how much of that input will compress to
20    nearly the requested output block size.  The first pass generates
21    enough deflate blocks to produce output to fill the requested
22    output size plus a specfied excess amount (see the EXCESS define
23    below).  The last deflate block may go quite a bit past that, but
24    is discarded.  The second pass decompresses and recompresses just
25    the compressed data that fit in the requested plus excess sized
26    buffer.  The deflate process is terminated after that amount of
27    input, which is less than the amount consumed on the first pass.
28    The last deflate block of the result will be of a comparable size
29    to the final product, so that the header for that deflate block and
30    the compression ratio for that block will be about the same as in
31    the final product.  The third compression pass decompresses the
32    result of the second step, but only the compressed data up to the
33    requested size minus an amount to allow the compressed stream to
34    complete (see the MARGIN define below).  That will result in a
35    final compressed stream whose length is less than or equal to the
36    requested size.  Assuming sufficient input and a requested size
37    greater than a few hundred bytes, the shortfall will typically be
38    less than ten bytes.
39 
40    If the input is short enough that the first compression completes
41    before filling the requested output size, then that compressed
42    stream is return with no recompression.
43 
44    EXCESS is chosen to be just greater than the shortfall seen in a
45    two pass approach similar to the above.  That shortfall is due to
46    the last deflate block compressing more efficiently with a smaller
47    header on the second pass.  EXCESS is set to be large enough so
48    that there is enough uncompressed data for the second pass to fill
49    out the requested size, and small enough so that the final deflate
50    block of the second pass will be close in size to the final deflate
51    block of the third and final pass.  MARGIN is chosen to be just
52    large enough to assure that the final compression has enough room
53    to complete in all cases.
54  */
55 
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <assert.h>
59 #include "zlib.h"
60 
61 #define local static
62 
63 /* print nastygram and leave */
64 local void quit(char *why)
65 {
66     fprintf(stderr, "fitblk abort: %s\n", why);
67     exit(1);
68 }
69 
70 #define RAWLEN 4096    /* intermediate uncompressed buffer size */
71 
72 /* compress from file to def until provided buffer is full or end of
73    input reached; return last deflate() return value, or Z_ERRNO if
74    there was read error on the file */
75 local int partcompress(FILE *in, z_streamp def)
76 {
77     int ret, flush;
78     unsigned char raw[RAWLEN];
79 
80     flush = Z_NO_FLUSH;
81     do {
82         def->avail_in = fread(raw, 1, RAWLEN, in);
83         if (ferror(in))
84             return Z_ERRNO;
85         def->next_in = raw;
86         if (feof(in))
87             flush = Z_FINISH;
88         ret = deflate(def, flush);
89         assert(ret != Z_STREAM_ERROR);
90     } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
91     return ret;
92 }
93 
94 /* recompress from inf's input to def's output; the input for inf and
95    the output for def are set in those structures before calling;
96    return last deflate() return value, or Z_MEM_ERROR if inflate()
97    was not able to allocate enough memory when it needed to */
98 local int recompress(z_streamp inf, z_streamp def)
99 {
100     int ret, flush;
101     unsigned char raw[RAWLEN];
102 
103     flush = Z_NO_FLUSH;
104     do {
105         /* decompress */
106         inf->avail_out = RAWLEN;
107         inf->next_out = raw;
108         ret = inflate(inf, Z_NO_FLUSH);
109         assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
110                ret != Z_NEED_DICT);
111         if (ret == Z_MEM_ERROR)
112             return ret;
113 
114         /* compress what was decompresed until done or no room */
115         def->avail_in = RAWLEN - inf->avail_out;
116         def->next_in = raw;
117         if (inf->avail_out != 0)
118             flush = Z_FINISH;
119         ret = deflate(def, flush);
120         assert(ret != Z_STREAM_ERROR);
121     } while (ret != Z_STREAM_END && def->avail_out != 0);
122     return ret;
123 }
124 
125 #define EXCESS 256      /* empirically determined stream overage */
126 #define MARGIN 8        /* amount to back off for completion */
127 
128 /* compress from stdin to fixed-size block on stdout */
129 int main(int argc, char **argv)
130 {
131     int ret;                /* return code */
132     unsigned size;          /* requested fixed output block size */
133     unsigned have;          /* bytes written by deflate() call */
134     unsigned char *blk;     /* intermediate and final stream */
135     unsigned char *tmp;     /* close to desired size stream */
136     z_stream def, inf;      /* zlib deflate and inflate states */
137 
138     /* get requested output size */
139     if (argc != 2)
140         quit("need one argument: size of output block");
141     ret = strtol(argv[1], argv + 1, 10);
142     if (argv[1][0] != 0)
143         quit("argument must be a number");
144     if (ret < 8)            /* 8 is minimum zlib stream size */
145         quit("need positive size of 8 or greater");
146     size = (unsigned)ret;
147 
148     /* allocate memory for buffers and compression engine */
149     blk = malloc(size + EXCESS);
150     def.zalloc = Z_NULL;
151     def.zfree = Z_NULL;
152     def.opaque = Z_NULL;
153     ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
154     if (ret != Z_OK || blk == NULL)
155         quit("out of memory");
156 
157     /* compress from stdin until output full, or no more input */
158     def.avail_out = size + EXCESS;
159     def.next_out = blk;
160     ret = partcompress(stdin, &def);
161     if (ret == Z_ERRNO)
162         quit("error reading input");
163 
164     /* if it all fit, then size was undersubscribed -- done! */
165     if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
166         /* write block to stdout */
167         have = size + EXCESS - def.avail_out;
168         if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
169             quit("error writing output");
170 
171         /* clean up and print results to stderr */
172         ret = deflateEnd(&def);
173         assert(ret != Z_STREAM_ERROR);
174         free(blk);
175         fprintf(stderr,
176                 "%u bytes unused out of %u requested (all input)\n",
177                 size - have, size);
178         return 0;
179     }
180 
181     /* it didn't all fit -- set up for recompression */
182     inf.zalloc = Z_NULL;
183     inf.zfree = Z_NULL;
184     inf.opaque = Z_NULL;
185     inf.avail_in = 0;
186     inf.next_in = Z_NULL;
187     ret = inflateInit(&inf);
188     tmp = malloc(size + EXCESS);
189     if (ret != Z_OK || tmp == NULL)
190         quit("out of memory");
191     ret = deflateReset(&def);
192     assert(ret != Z_STREAM_ERROR);
193 
194     /* do first recompression close to the right amount */
195     inf.avail_in = size + EXCESS;
196     inf.next_in = blk;
197     def.avail_out = size + EXCESS;
198     def.next_out = tmp;
199     ret = recompress(&inf, &def);
200     if (ret == Z_MEM_ERROR)
201         quit("out of memory");
202 
203     /* set up for next reocmpression */
204     ret = inflateReset(&inf);
205     assert(ret != Z_STREAM_ERROR);
206     ret = deflateReset(&def);
207     assert(ret != Z_STREAM_ERROR);
208 
209     /* do second and final recompression (third compression) */
210     inf.avail_in = size - MARGIN;   /* assure stream will complete */
211     inf.next_in = tmp;
212     def.avail_out = size;
213     def.next_out = blk;
214     ret = recompress(&inf, &def);
215     if (ret == Z_MEM_ERROR)
216         quit("out of memory");
217     assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */
218 
219     /* done -- write block to stdout */
220     have = size - def.avail_out;
221     if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
222         quit("error writing output");
223 
224     /* clean up and print results to stderr */
225     free(tmp);
226     ret = inflateEnd(&inf);
227     assert(ret != Z_STREAM_ERROR);
228     ret = deflateEnd(&def);
229     assert(ret != Z_STREAM_ERROR);
230     free(blk);
231     fprintf(stderr,
232             "%u bytes unused out of %u requested (%lu input)\n",
233             size - have, size, def.total_in);
234     return 0;
235 }
236